1 /* $NetBSD: makewhatis.c,v 1.17 2001/11/23 13:18:54 tron Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Matthias Scheler. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 #include <sys/cdefs.h> 40 #ifndef lint 41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\ 42 All rights reserved.\n"); 43 #endif /* not lint */ 44 45 #ifndef lint 46 __RCSID("$NetBSD: makewhatis.c,v 1.17 2001/11/23 13:18:54 tron Exp $"); 47 #endif /* not lint */ 48 49 #include <sys/types.h> 50 #include <sys/param.h> 51 #include <sys/stat.h> 52 #include <sys/wait.h> 53 54 #include <ctype.h> 55 #include <err.h> 56 #include <errno.h> 57 #include <fcntl.h> 58 #include <fts.h> 59 #include <locale.h> 60 #include <paths.h> 61 #include <signal.h> 62 #include <stdio.h> 63 #include <stdlib.h> 64 #include <string.h> 65 #include <unistd.h> 66 #include <zlib.h> 67 68 typedef struct manpagestruct manpage; 69 struct manpagestruct { 70 manpage *mp_left,*mp_right; 71 ino_t mp_inode; 72 char mp_name[1]; 73 }; 74 75 typedef struct whatisstruct whatis; 76 struct whatisstruct { 77 whatis *wi_left,*wi_right; 78 char *wi_data; 79 }; 80 81 int main (int, char **); 82 char *findwhitespace (char *); 83 char *strmove (char *,char *); 84 char *GetS (gzFile, char *, int); 85 int manpagesection (char *); 86 char *createsectionstring(char *); 87 int addmanpage (manpage **, ino_t, char *); 88 int addwhatis (whatis **, char *); 89 char *replacestring (char *, char *, char *); 90 void catpreprocess (char *); 91 char *parsecatpage (gzFile *); 92 int manpreprocess (char *); 93 char *nroff (gzFile *); 94 char *parsemanpage (gzFile *, int); 95 char *getwhatisdata (char *); 96 void processmanpages (manpage **,whatis **); 97 int dumpwhatis (FILE *, whatis *); 98 99 char *default_manpath[] = { 100 "/usr/share/man", 101 NULL 102 }; 103 104 char sectionext[] = "0123456789ln"; 105 char whatisdb[] = "whatis.db"; 106 107 int 108 main(int argc,char **argv) 109 { 110 char **manpath; 111 FTS *fts; 112 FTSENT *fe; 113 manpage *source; 114 whatis *dest; 115 FILE *out; 116 117 (void)setlocale(LC_ALL, ""); 118 119 manpath = (argc < 2) ? default_manpath : &argv[1]; 120 121 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) { 122 perror(getprogname()); 123 return EXIT_FAILURE; 124 } 125 126 source = NULL; 127 while ((fe = fts_read(fts)) != NULL) { 128 switch (fe->fts_info) { 129 case FTS_F: 130 if (manpagesection(fe->fts_path) >= 0) 131 if (!addmanpage(&source, 132 fe->fts_statp->st_ino, 133 fe->fts_path)) 134 err(EXIT_FAILURE, NULL); 135 case FTS_D: 136 case FTS_DC: 137 case FTS_DEFAULT: 138 case FTS_DP: 139 case FTS_SLNONE: 140 break; 141 default: 142 errx(EXIT_FAILURE, "%s: %s", fe->fts_path, 143 strerror(fe->fts_errno)); 144 145 } 146 } 147 148 (void)fts_close(fts); 149 150 dest = NULL; 151 processmanpages(&source, &dest); 152 153 if (chdir(manpath[0]) < 0) 154 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno)); 155 156 if ((out = fopen(whatisdb, "w")) == NULL) 157 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno)); 158 159 if (!(dumpwhatis(out, dest) || 160 (fclose(out) < 0)) || 161 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0)) 162 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno)); 163 164 return EXIT_SUCCESS; 165 } 166 167 char 168 *findwhitespace(char *str) 169 170 { 171 while (!isspace(*str)) 172 if (*str++ == '\0') { 173 str = NULL; 174 break; 175 } 176 177 return str; 178 } 179 180 char 181 *strmove(char *dest,char *src) 182 183 { 184 return memmove(dest, src, strlen(src) + 1); 185 } 186 187 char 188 *GetS(gzFile in, char *buffer, int length) 189 190 { 191 char *ptr; 192 193 if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0')) 194 ptr = NULL; 195 196 return ptr; 197 } 198 199 int 200 manpagesection(char *name) 201 { 202 char *ptr; 203 204 if ((ptr = strrchr(name, '/')) != NULL) 205 ptr++; 206 else 207 ptr = name; 208 209 while ((ptr = strchr(ptr, '.')) != NULL) { 210 int section; 211 212 ptr++; 213 section=0; 214 while (sectionext[section] != '\0') 215 if (sectionext[section] == *ptr) 216 return section; 217 else 218 section++; 219 } 220 221 return -1; 222 } 223 224 char 225 *createsectionstring(char *section_id) 226 { 227 char *section; 228 229 if ((section = malloc(strlen(section_id) + 7)) != NULL) { 230 section[0] = ' '; 231 section[1] = '('; 232 (void) strcat(strcpy(§ion[2], section_id), ") - "); 233 } 234 return section; 235 } 236 237 int 238 addmanpage(manpage **tree,ino_t inode,char *name) 239 { 240 manpage *mp; 241 242 while ((mp = *tree) != NULL) { 243 if (mp->mp_inode == inode) 244 return 1; 245 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right); 246 } 247 248 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL) 249 return 0; 250 251 mp->mp_left = NULL; 252 mp->mp_right = NULL; 253 mp->mp_inode = inode; 254 (void) strcpy(mp->mp_name, name); 255 *tree = mp; 256 257 return 1; 258 } 259 260 int 261 addwhatis(whatis **tree, char *data) 262 { 263 whatis *wi; 264 int result; 265 266 while (isspace(*data)) 267 data++; 268 269 if (*data == '/') { 270 char *ptr; 271 272 ptr = ++data; 273 while ((*ptr != '\0') && !isspace(*ptr)) 274 if (*ptr++ == '/') 275 data = ptr; 276 } 277 278 while ((wi = *tree) != NULL) { 279 result=strcmp(data, wi->wi_data); 280 if (result == 0) return 1; 281 tree = &((result < 0) ? wi->wi_left : wi->wi_right); 282 } 283 284 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL) 285 return 0; 286 287 wi->wi_left = NULL; 288 wi->wi_right = NULL; 289 wi->wi_data = data; 290 *tree = wi; 291 292 return 1; 293 } 294 295 void 296 catpreprocess(char *from) 297 { 298 char *to; 299 300 to = from; 301 while (isspace(*from)) from++; 302 303 while (*from != '\0') 304 if (isspace(*from)) { 305 while (isspace(*++from)); 306 if (*from != '\0') 307 *to++ = ' '; 308 } 309 else if (*(from + 1) == '\10') 310 from += 2; 311 else 312 *to++ = *from++; 313 314 *to = '\0'; 315 } 316 317 char * 318 replacestring(char *string, char *old, char *new) 319 320 { 321 char *ptr, *result; 322 int slength, olength, nlength, pos; 323 324 if (new == NULL) 325 return strdup(string); 326 327 ptr = strstr(string, old); 328 if (ptr == NULL) 329 return strdup(string); 330 331 slength = strlen(string); 332 olength = strlen(old); 333 nlength = strlen(new); 334 if ((result = malloc(slength - olength + nlength + 1)) == NULL) 335 return NULL; 336 337 pos = ptr - string; 338 (void) memcpy(result, string, pos); 339 (void) memcpy(&result[pos], new, nlength); 340 (void) strcpy(&result[pos + nlength], &string[pos + olength]); 341 342 return result; 343 } 344 345 char * 346 parsecatpage(gzFile *in) 347 { 348 char buffer[8192]; 349 char *section, *ptr, *last; 350 int size; 351 352 do { 353 if (GetS(in, buffer, sizeof(buffer)) == NULL) 354 return NULL; 355 } 356 while (buffer[0] == '\n'); 357 358 section = NULL; 359 if ((ptr = strchr(buffer, '(')) != NULL) { 360 if ((last = strchr(ptr + 1, ')')) !=NULL) { 361 int length; 362 363 length = last - ptr + 1; 364 if ((section = malloc(length + 5)) == NULL) 365 return NULL; 366 367 *section = ' '; 368 (void) memcpy(section + 1, ptr, length); 369 (void) strcpy(section + 1 + length, " - "); 370 } 371 } 372 373 for (;;) { 374 if (GetS(in, buffer, sizeof(buffer)) == NULL) { 375 free(section); 376 return NULL; 377 } 378 catpreprocess(buffer); 379 if (strncmp(buffer, "NAME", 4) == 0) 380 break; 381 } 382 383 ptr = last = buffer; 384 size = sizeof(buffer) - 1; 385 while ((size > 0) && (GetS(in, ptr, size) != NULL)) { 386 int length; 387 388 catpreprocess(ptr); 389 390 length = strlen(ptr); 391 if (length == 0) { 392 *last = '\0'; 393 394 ptr = replacestring(buffer, " - ", section); 395 free(section); 396 return ptr; 397 } 398 if ((length > 1) && (ptr[length - 1] == '-') && 399 isalpha(ptr[length - 2])) 400 last = &ptr[--length]; 401 else { 402 last = &ptr[length++]; 403 *last = ' '; 404 } 405 406 ptr += length; 407 size -= length; 408 } 409 410 free(section); 411 412 return NULL; 413 } 414 415 int 416 manpreprocess(char *line) 417 { 418 char *from, *to; 419 420 to = from = line; 421 while (isspace(*from)) from++; 422 if (strncmp(from, ".\\\"", 3) == 0) 423 return 1; 424 425 while (*from != '\0') 426 if (isspace(*from)) { 427 while (isspace(*++from)); 428 if ((*from != '\0') && (*from != ',')) 429 *to++ = ' '; 430 } 431 else if (*from == '\\') 432 switch (*++from) { 433 case '\0': 434 case '-': 435 break; 436 case 'f': 437 case 's': 438 from++; 439 if ((*from=='+') || (*from=='-')) 440 from++; 441 while (isdigit(*from)) 442 from++; 443 break; 444 default: 445 from++; 446 } 447 else 448 if (*from == '"') 449 from++; 450 else 451 *to++ = *from++; 452 453 *to = '\0'; 454 455 if (strncasecmp(line, ".Xr", 3) == 0) { 456 char *sect; 457 458 from = line + 3; 459 if (isspace(*from)) 460 from++; 461 462 if ((sect = findwhitespace(from)) != NULL) { 463 int length; 464 465 *sect++ = '\0'; 466 length = strlen(from); 467 (void) memmove(line, from, length); 468 line[length++] = '('; 469 to = &line[length]; 470 length = strlen(sect); 471 (void) memmove(to, sect, length); 472 (void) strcpy(&to[length], ")"); 473 } 474 } 475 476 return 0; 477 } 478 479 char * 480 nroff(gzFile *in) 481 { 482 char tempname[MAXPATHLEN], buffer[65536], *data; 483 int tempfd, bytes, pipefd[2], status; 484 static int devnull = -1; 485 pid_t child; 486 487 if (gzrewind(in) < 0) { 488 perror(getprogname()); 489 return NULL; 490 } 491 492 if ((devnull < 0) && 493 ((devnull = open(_PATH_DEVNULL, O_WRONLY, 0)) < 0)) { 494 perror(getprogname()); 495 return NULL; 496 } 497 498 (void)strcpy(tempname, _PATH_TMP "makewhatis.XXXXXX"); 499 if ((tempfd = mkstemp(tempname)) < 0) { 500 perror(getprogname()); 501 return NULL; 502 } 503 504 while ((bytes = gzread(in, buffer, sizeof(buffer))) > 0) 505 if (write(tempfd, buffer, bytes) != bytes) { 506 bytes = -1; 507 break; 508 } 509 510 if ((bytes < 0) || 511 (lseek(tempfd, 0, SEEK_SET) < 0) || 512 (pipe(pipefd) < 0)) { 513 (void)close(tempfd); 514 (void)unlink(tempname); 515 return NULL; 516 } 517 518 switch (child = vfork()) { 519 case -1: 520 perror(getprogname()); 521 (void)close(pipefd[1]); 522 (void)close(pipefd[0]); 523 (void)close(tempfd); 524 (void)unlink(tempname); 525 return NULL; 526 /* NOTREACHED */ 527 case 0: 528 (void)close(pipefd[0]); 529 if (tempfd != STDIN_FILENO) { 530 (void)dup2(tempfd, STDIN_FILENO); 531 (void)close(tempfd); 532 } 533 if (pipefd[1] != STDOUT_FILENO) { 534 (void)dup2(pipefd[1], STDOUT_FILENO); 535 (void)close(pipefd[1]); 536 } 537 if (devnull != STDERR_FILENO) { 538 (void)dup2(devnull, STDERR_FILENO); 539 (void)close(devnull); 540 } 541 (void)execlp("nroff", "nroff", "-S", "-man", NULL); 542 _exit(EXIT_FAILURE); 543 default: 544 (void)close(pipefd[1]); 545 (void)close(tempfd); 546 /* NOTREACHED */ 547 } 548 549 if ((in = gzdopen(pipefd[0], "r")) == NULL) { 550 if (errno == 0) 551 errno = ENOMEM; 552 perror(getprogname()); 553 (void)close(pipefd[0]); 554 (void)kill(child, SIGTERM); 555 while (waitpid(child, NULL, 0) != child); 556 (void)unlink(tempname); 557 return NULL; 558 } 559 560 data = parsecatpage(in); 561 while (gzread(in, buffer, sizeof(buffer)) > 0); 562 (void)gzclose(in); 563 564 while (waitpid(child, &status, 0) != child); 565 if ((data != NULL) && 566 !(WIFEXITED(status) && (WEXITSTATUS(status) == 0))) { 567 free(data); 568 data = NULL; 569 } 570 571 (void)unlink(tempname); 572 573 return data; 574 } 575 576 char * 577 parsemanpage(gzFile *in, int defaultsection) 578 { 579 char *section, buffer[8192], *ptr; 580 581 section = NULL; 582 do { 583 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) { 584 free(section); 585 return NULL; 586 } 587 if (manpreprocess(buffer)) 588 continue; 589 if (strncasecmp(buffer, ".Dt", 3) == 0) { 590 char *end; 591 592 ptr = &buffer[3]; 593 if (isspace(*ptr)) 594 ptr++; 595 if ((ptr = findwhitespace(ptr)) == NULL) 596 continue; 597 598 if ((end = findwhitespace(++ptr)) != NULL) 599 *end = '\0'; 600 601 free(section); 602 section = createsectionstring(ptr); 603 } 604 else if (strncasecmp(buffer, ".TH", 3) == 0) { 605 ptr = &buffer[3]; 606 while (isspace(*ptr)) 607 ptr++; 608 if ((ptr = findwhitespace(ptr)) != NULL) { 609 char *next; 610 611 while (isspace(*ptr)) 612 ptr++; 613 if ((next = findwhitespace(ptr)) != NULL) 614 *next = '\0'; 615 free(section); 616 section = createsectionstring(ptr); 617 } 618 } 619 else if (strncasecmp(buffer, ".Ds", 3) == 0) { 620 free(section); 621 return NULL; 622 } 623 } while (strncasecmp(buffer, ".Sh NAME", 8) != 0); 624 625 do { 626 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) { 627 free(section); 628 return NULL; 629 } 630 } while (manpreprocess(buffer)); 631 632 if (strncasecmp(buffer, ".Nm", 3) == 0) { 633 int length, offset; 634 635 ptr = &buffer[3]; 636 while (isspace(*ptr)) 637 ptr++; 638 639 length = strlen(ptr); 640 if ((length > 1) && (ptr[length - 1] == ',') && 641 isspace(ptr[length - 2])) { 642 ptr[--length] = '\0'; 643 ptr[length - 1] = ','; 644 } 645 (void) memmove(buffer, ptr, length + 1); 646 647 offset = length + 3; 648 ptr = &buffer[offset]; 649 for (;;) { 650 int more; 651 652 if ((sizeof(buffer) == offset) || 653 (GetS(in, ptr, sizeof(buffer) - offset) 654 == NULL)) { 655 free(section); 656 return NULL; 657 } 658 if (manpreprocess(ptr)) 659 continue; 660 661 if (strncasecmp(ptr, ".Nm", 3) != 0) break; 662 663 ptr += 3; 664 if (isspace(*ptr)) 665 ptr++; 666 667 buffer[length++] = ' '; 668 more = strlen(ptr); 669 if ((more > 1) && (ptr[more - 1] == ',') && 670 isspace(ptr[more - 2])) { 671 ptr[--more] = '\0'; 672 ptr[more - 1] = ','; 673 } 674 675 (void) memmove(&buffer[length], ptr, more + 1); 676 length += more; 677 offset = length + 3; 678 679 ptr = &buffer[offset]; 680 } 681 682 if (strncasecmp(ptr, ".Nd", 3) == 0) { 683 (void) strcpy(&buffer[length], " -"); 684 685 while (strncasecmp(ptr, ".Sh", 3) != 0) { 686 int more; 687 688 if (*ptr == '.') { 689 char *space; 690 691 if (strncasecmp(ptr, ".Nd", 3) != 0) { 692 free(section); 693 return NULL; 694 } 695 space = findwhitespace(ptr); 696 if (space == NULL) 697 ptr = ""; 698 else { 699 space++; 700 (void) strmove(ptr, space); 701 } 702 } 703 704 if (*ptr != '\0') { 705 buffer[offset - 1] = ' '; 706 more = strlen(ptr) + 1; 707 offset += more; 708 } 709 ptr = &buffer[offset]; 710 if ((sizeof(buffer) == offset) || 711 (GetS(in, ptr, sizeof(buffer) - offset) 712 == NULL)) { 713 free(section); 714 return NULL; 715 } 716 if (manpreprocess(ptr)) 717 *ptr = '\0'; 718 } 719 } 720 } 721 else { 722 int offset; 723 724 if (*buffer == '.') { 725 char *space; 726 727 if ((space = findwhitespace(&buffer[1])) == NULL) { 728 free(section); 729 return NULL; 730 } 731 space++; 732 (void) strmove(buffer, space); 733 } 734 735 offset = strlen(buffer) + 1; 736 for (;;) { 737 int more; 738 739 ptr = &buffer[offset]; 740 if ((sizeof(buffer) == offset) || 741 (GetS(in, ptr, sizeof(buffer) - offset) 742 == NULL)) { 743 free(section); 744 return NULL; 745 } 746 if (manpreprocess(ptr) || (*ptr == '\0')) 747 continue; 748 749 if ((strncasecmp(ptr, ".Sh", 3) == 0) || 750 (strncasecmp(ptr, ".Ss", 3) == 0)) 751 break; 752 753 if (*ptr == '.') { 754 char *space; 755 756 if ((space = findwhitespace(ptr)) == NULL) { 757 continue; 758 } 759 760 space++; 761 (void) memmove(ptr, space, strlen(space) + 1); 762 } 763 764 buffer[offset - 1] = ' '; 765 more = strlen(ptr); 766 if ((more > 1) && (ptr[more - 1] == ',') && 767 isspace(ptr[more - 2])) { 768 ptr[more - 1] = '\0'; 769 ptr[more - 2] = ','; 770 } 771 else more++; 772 offset += more; 773 } 774 } 775 776 if (section == NULL) { 777 char sectionbuffer[24]; 778 779 (void) sprintf(sectionbuffer, " (%c) - ", 780 sectionext[defaultsection]); 781 ptr = replacestring(buffer, " - ", sectionbuffer); 782 } 783 else { 784 ptr = replacestring(buffer, " - ", section); 785 free(section); 786 } 787 return ptr; 788 } 789 790 char * 791 getwhatisdata(char *name) 792 { 793 gzFile *in; 794 char *data; 795 int section; 796 797 if ((in = gzopen(name, "r")) == NULL) { 798 errx(EXIT_FAILURE, "%s: %s", 799 name, 800 strerror((errno == 0) ? ENOMEM : errno)); 801 /* NOTREACHED */ 802 } 803 804 section = manpagesection(name); 805 if (section == 0) 806 data = parsecatpage(in); 807 else { 808 data = parsemanpage(in, section); 809 if (data == NULL) 810 data = nroff(in); 811 } 812 813 (void) gzclose(in); 814 return data; 815 } 816 817 void 818 processmanpages(manpage **source, whatis **dest) 819 { 820 manpage *mp; 821 822 mp = *source; 823 *source = NULL; 824 825 while (mp != NULL) { 826 manpage *obsolete; 827 char *data; 828 829 if (mp->mp_left != NULL) 830 processmanpages(&mp->mp_left,dest); 831 832 if ((data = getwhatisdata(mp->mp_name)) != NULL) { 833 if (!addwhatis(dest,data)) 834 err(EXIT_FAILURE, NULL); 835 } 836 837 obsolete = mp; 838 mp = mp->mp_right; 839 free(obsolete); 840 } 841 } 842 843 int 844 dumpwhatis (FILE *out, whatis *tree) 845 { 846 while (tree != NULL) { 847 if (tree->wi_left) 848 if (!dumpwhatis(out, tree->wi_left)) return 0; 849 850 if ((fputs(tree->wi_data, out) == EOF) || 851 (fputc('\n', out) == EOF)) 852 return 0; 853 854 tree = tree->wi_right; 855 } 856 857 return 1; 858 } 859