1 /* $NetBSD: makewhatis.c,v 1.12 2000/07/13 06:45:22 tron Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Matthias Scheler. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 #include <sys/cdefs.h> 40 #ifndef lint 41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\ 42 All rights reserved.\n"); 43 #endif /* not lint */ 44 45 #ifndef lint 46 __RCSID("$NetBSD: makewhatis.c,v 1.12 2000/07/13 06:45:22 tron Exp $"); 47 #endif /* not lint */ 48 49 #include <sys/types.h> 50 #include <sys/param.h> 51 #include <sys/stat.h> 52 #include <sys/wait.h> 53 54 #include <ctype.h> 55 #include <err.h> 56 #include <errno.h> 57 #include <fcntl.h> 58 #include <fts.h> 59 #include <locale.h> 60 #include <paths.h> 61 #include <signal.h> 62 #include <stdio.h> 63 #include <stdlib.h> 64 #include <string.h> 65 #include <unistd.h> 66 #include <zlib.h> 67 68 typedef struct manpagestruct manpage; 69 struct manpagestruct { 70 manpage *mp_left,*mp_right; 71 ino_t mp_inode; 72 char mp_name[1]; 73 }; 74 75 typedef struct whatisstruct whatis; 76 struct whatisstruct { 77 whatis *wi_left,*wi_right; 78 char *wi_data; 79 }; 80 81 int main (int, char **); 82 char *findwhitespace(char *); 83 char *GetS(gzFile, char *, int); 84 int manpagesection (char *); 85 int addmanpage (manpage **, ino_t, char *); 86 int addwhatis (whatis **, char *); 87 char *replacestring (char *, char *, char *); 88 void catpreprocess (char *); 89 char *parsecatpage (gzFile *); 90 int manpreprocess (char *); 91 char *nroff (gzFile *); 92 char *parsemanpage (gzFile *, int); 93 char *getwhatisdata (char *); 94 void processmanpages (manpage **,whatis **); 95 int dumpwhatis (FILE *, whatis *); 96 97 char *default_manpath[] = { 98 "/usr/share/man", 99 NULL 100 }; 101 102 char sectionext[] = "0123456789ln"; 103 char whatisdb[] = "whatis.db"; 104 105 extern char *__progname; 106 107 int 108 main(int argc,char **argv) 109 { 110 char **manpath; 111 FTS *fts; 112 FTSENT *fe; 113 manpage *source; 114 whatis *dest; 115 FILE *out; 116 117 (void)setlocale(LC_ALL, ""); 118 119 manpath = (argc < 2) ? default_manpath : &argv[1]; 120 121 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) { 122 perror(__progname); 123 return EXIT_FAILURE; 124 } 125 126 source = NULL; 127 while ((fe = fts_read(fts)) != NULL) { 128 switch (fe->fts_info) { 129 case FTS_F: 130 if (manpagesection(fe->fts_path) >= 0) 131 if (!addmanpage(&source, 132 fe->fts_statp->st_ino, 133 fe->fts_path)) 134 err(EXIT_FAILURE, NULL); 135 case FTS_D: 136 case FTS_DC: 137 case FTS_DEFAULT: 138 case FTS_DP: 139 case FTS_SLNONE: 140 break; 141 default: 142 errx(EXIT_FAILURE, "%s: %s", fe->fts_path, 143 strerror(fe->fts_errno)); 144 145 } 146 } 147 148 (void)fts_close(fts); 149 150 dest = NULL; 151 processmanpages(&source, &dest); 152 153 if (chdir(manpath[0]) < 0) 154 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno)); 155 156 if ((out = fopen(whatisdb, "w")) == NULL) 157 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno)); 158 159 if (!(dumpwhatis(out, dest) || 160 (fclose(out) < 0)) || 161 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0)) 162 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno)); 163 164 return EXIT_SUCCESS; 165 } 166 167 char 168 *findwhitespace(char *str) 169 170 { 171 while (!isspace(*str)) 172 if (*str++ == '\0') { 173 str = NULL; 174 break; 175 } 176 177 return str; 178 } 179 180 char 181 *GetS(gzFile in, char *buffer, int length) 182 183 { 184 char *ptr; 185 186 if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0')) 187 ptr = NULL; 188 189 return ptr; 190 } 191 192 int 193 manpagesection(char *name) 194 { 195 char *ptr; 196 197 if ((ptr = strrchr(name, '/')) != NULL) 198 ptr++; 199 else 200 ptr = name; 201 202 while ((ptr = strchr(ptr, '.')) != NULL) { 203 int section; 204 205 ptr++; 206 section=0; 207 while (sectionext[section] != '\0') 208 if (sectionext[section] == *ptr) 209 return section; 210 else 211 section++; 212 } 213 214 return -1; 215 } 216 217 int 218 addmanpage(manpage **tree,ino_t inode,char *name) 219 { 220 manpage *mp; 221 222 while ((mp = *tree) != NULL) { 223 if (mp->mp_inode == inode) 224 return 1; 225 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right); 226 } 227 228 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL) 229 return 0; 230 231 mp->mp_left = NULL; 232 mp->mp_right = NULL; 233 mp->mp_inode = inode; 234 (void) strcpy(mp->mp_name, name); 235 *tree = mp; 236 237 return 1; 238 } 239 240 int 241 addwhatis(whatis **tree, char *data) 242 { 243 whatis *wi; 244 int result; 245 246 while (isspace(*data)) 247 data++; 248 249 if (*data == '/') { 250 char *ptr; 251 252 ptr = ++data; 253 while ((*ptr != '\0') && !isspace(*ptr)) 254 if (*ptr++ == '/') 255 data = ptr; 256 } 257 258 while ((wi = *tree) != NULL) { 259 result=strcmp(data, wi->wi_data); 260 if (result == 0) return 1; 261 tree = &((result < 0) ? wi->wi_left : wi->wi_right); 262 } 263 264 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL) 265 return 0; 266 267 wi->wi_left = NULL; 268 wi->wi_right = NULL; 269 wi->wi_data = data; 270 *tree = wi; 271 272 return 1; 273 } 274 275 void 276 catpreprocess(char *from) 277 { 278 char *to; 279 280 to = from; 281 while (isspace(*from)) from++; 282 283 while (*from != '\0') 284 if (isspace(*from)) { 285 while (isspace(*++from)); 286 if (*from != '\0') 287 *to++ = ' '; 288 } 289 else if (*(from + 1) == '\10') 290 from += 2; 291 else 292 *to++ = *from++; 293 294 *to = '\0'; 295 } 296 297 char * 298 replacestring(char *string, char *old, char *new) 299 300 { 301 char *ptr, *result; 302 int slength, olength, nlength, pos; 303 304 if (new == NULL) 305 return strdup(string); 306 307 ptr = strstr(string, old); 308 if (ptr == NULL) 309 return strdup(string); 310 311 slength = strlen(string); 312 olength = strlen(old); 313 nlength = strlen(new); 314 if ((result = malloc(slength - olength + nlength + 1)) == NULL) 315 return NULL; 316 317 pos = ptr - string; 318 (void) memcpy(result, string, pos); 319 (void) memcpy(&result[pos], new, nlength); 320 (void) strcpy(&result[pos + nlength], &string[pos + olength]); 321 322 return result; 323 } 324 325 char * 326 parsecatpage(gzFile *in) 327 { 328 char buffer[8192]; 329 char *section, *ptr, *last; 330 int size; 331 332 do { 333 if (GetS(in, buffer, sizeof(buffer)) == NULL) 334 return NULL; 335 } 336 while (buffer[0] == '\n'); 337 338 section = NULL; 339 if ((ptr = strchr(buffer, '(')) != NULL) { 340 if ((last = strchr(ptr + 1, ')')) !=NULL) { 341 int length; 342 343 length = last - ptr + 1; 344 if ((section = malloc(length + 5)) == NULL) 345 return NULL; 346 347 *section = ' '; 348 (void) memcpy(section + 1, ptr, length); 349 (void) strcpy(section + 1 + length, " - "); 350 } 351 } 352 353 for (;;) { 354 if (GetS(in, buffer, sizeof(buffer)) == NULL) { 355 free(section); 356 return NULL; 357 } 358 if (strncmp(buffer, "N\10NA\10AM\10ME\10E", 12) == 0) 359 break; 360 } 361 362 ptr = last = buffer; 363 size = sizeof(buffer) - 1; 364 while ((size > 0) && (GetS(in, ptr, size) != NULL)) { 365 int length; 366 367 catpreprocess(ptr); 368 369 length = strlen(ptr); 370 if (length == 0) { 371 *last = '\0'; 372 373 ptr = replacestring(buffer, " - ", section); 374 free(section); 375 return ptr; 376 } 377 if ((length > 1) && (ptr[length - 1] == '-') && 378 isalpha(ptr[length - 2])) 379 last = &ptr[--length]; 380 else { 381 last = &ptr[length++]; 382 *last = ' '; 383 } 384 385 ptr += length; 386 size -= length; 387 } 388 389 free(section); 390 391 return NULL; 392 } 393 394 int 395 manpreprocess(char *line) 396 { 397 char *from, *to; 398 399 to = from = line; 400 while (isspace(*from)) from++; 401 if (strncmp(from, ".\\\"", 3) == 0) 402 return 1; 403 404 while (*from != '\0') 405 if (isspace(*from)) { 406 while (isspace(*++from)); 407 if ((*from != '\0') && (*from != ',')) 408 *to++ = ' '; 409 } 410 else if (*from == '\\') 411 switch (*++from) { 412 case '\0': 413 case '-': 414 break; 415 case 's': 416 if ((*from=='+') || (*from=='-')) 417 from++; 418 while (isdigit(*from)) 419 from++; 420 break; 421 default: 422 from++; 423 } 424 else 425 if (*from == '"') 426 from++; 427 else 428 *to++ = *from++; 429 430 *to = '\0'; 431 432 if (strncasecmp(line, ".Xr", 3) == 0) { 433 char *sect; 434 435 from = line + 3; 436 if (isspace(*from)) 437 from++; 438 439 if ((sect = findwhitespace(from)) != NULL) { 440 int length; 441 442 *sect++ = '\0'; 443 length = strlen(from); 444 (void) memmove(line, from, length); 445 line[length++] = '('; 446 to = &line[length]; 447 length = strlen(sect); 448 (void) memmove(to, sect, length); 449 (void) strcpy(&to[length], ")"); 450 } 451 } 452 453 return 0; 454 } 455 456 char * 457 nroff(gzFile *in) 458 { 459 char tempname[MAXPATHLEN], buffer[65536], *data; 460 int tempfd, bytes, pipefd[2], status; 461 static int devnull = -1; 462 pid_t child; 463 464 if (gzrewind(in) < 0) { 465 perror(__progname); 466 return NULL; 467 } 468 469 if ((devnull < 0) && 470 ((devnull = open(_PATH_DEVNULL, O_WRONLY, 0)) < 0)) { 471 perror(__progname); 472 return NULL; 473 } 474 475 (void)strcpy(tempname, _PATH_TMP "makewhatis.XXXXXX"); 476 if ((tempfd = mkstemp(tempname)) < 0) { 477 perror(__progname); 478 return NULL; 479 } 480 481 while ((bytes = gzread(in, buffer, sizeof(buffer))) > 0) 482 if (write(tempfd, buffer, bytes) != bytes) { 483 bytes = -1; 484 break; 485 } 486 487 if ((bytes < 0) || 488 (lseek(tempfd, 0, SEEK_SET) < 0) || 489 (pipe(pipefd) < 0)) { 490 perror(__progname); 491 (void)close(tempfd); 492 (void)unlink(tempname); 493 return NULL; 494 } 495 496 switch (child = vfork()) { 497 case -1: 498 perror(__progname); 499 (void)close(pipefd[1]); 500 (void)close(pipefd[0]); 501 (void)close(tempfd); 502 (void)unlink(tempname); 503 return NULL; 504 /* NOTREACHED */ 505 case 0: 506 (void)close(pipefd[0]); 507 if (tempfd != STDIN_FILENO) { 508 (void)dup2(tempfd, STDIN_FILENO); 509 (void)close(tempfd); 510 } 511 if (pipefd[1] != STDOUT_FILENO) { 512 (void)dup2(pipefd[1], STDOUT_FILENO); 513 (void)close(pipefd[1]); 514 } 515 if (devnull != STDERR_FILENO) { 516 (void)dup2(devnull, STDERR_FILENO); 517 (void)close(devnull); 518 } 519 (void)execlp("nroff", "nroff", "-S", "-man", NULL); 520 _exit(EXIT_FAILURE); 521 default: 522 (void)close(pipefd[1]); 523 (void)close(tempfd); 524 /* NOTREACHED */ 525 } 526 527 if ((in = gzdopen(pipefd[0], "r")) == NULL) { 528 if (errno == 0) 529 errno = ENOMEM; 530 perror(__progname); 531 (void)close(pipefd[0]); 532 (void)kill(child, SIGTERM); 533 while (waitpid(child, NULL, 0) != child); 534 (void)unlink(tempname); 535 return NULL; 536 } 537 538 data = parsecatpage(in); 539 while (gzread(in, buffer, sizeof(buffer)) > 0); 540 (void)gzclose(in); 541 542 while (waitpid(child, &status, 0) != child); 543 if ((data != NULL) && 544 !(WIFEXITED(status) && (WEXITSTATUS(status) == 0))) { 545 free(data); 546 data = NULL; 547 } 548 549 (void)unlink(tempname); 550 551 return data; 552 } 553 554 char * 555 parsemanpage(gzFile *in, int defaultsection) 556 { 557 char *section, buffer[8192], *ptr; 558 559 section = NULL; 560 do { 561 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) { 562 free(section); 563 return NULL; 564 } 565 if (manpreprocess(buffer)) 566 continue; 567 if (strncasecmp(buffer, ".Dt", 3) == 0) { 568 char *end; 569 570 ptr = &buffer[3]; 571 if (isspace(*ptr)) 572 ptr++; 573 if ((ptr = findwhitespace(ptr)) == NULL) 574 continue; 575 576 if ((end = findwhitespace(++ptr)) != NULL) 577 *end = '\0'; 578 579 free(section); 580 if ((section = malloc(strlen(ptr) + 7)) != NULL) { 581 section[0] = ' '; 582 section[1] = '('; 583 (void) strcpy(§ion[2], ptr); 584 (void) strcat(§ion[2], ") - "); 585 } 586 } 587 else if (strncasecmp(buffer, ".Ds", 3) == 0) 588 return nroff(in); 589 } while ((strncasecmp(buffer, ".Sh NAME", 8) != 0)); 590 591 do { 592 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) { 593 free(section); 594 return NULL; 595 } 596 } while (manpreprocess(buffer)); 597 598 if (strncasecmp(buffer, ".Nm", 3) == 0) { 599 int length, offset; 600 601 ptr = &buffer[3]; 602 while (isspace(*ptr)) 603 ptr++; 604 605 length = strlen(ptr); 606 if ((length > 1) && (ptr[length - 1] == ',') && 607 isspace(ptr[length - 2])) { 608 ptr[--length] = '\0'; 609 ptr[length - 1] = ','; 610 } 611 (void) memmove(buffer, ptr, length + 1); 612 613 offset = length + 3; 614 ptr = &buffer[offset]; 615 for (;;) { 616 int more; 617 618 if ((sizeof(buffer) == offset) || 619 (GetS(in, ptr, sizeof(buffer) - offset) 620 == NULL)) { 621 free(section); 622 return NULL; 623 } 624 if (manpreprocess(ptr)) 625 continue; 626 627 if (strncasecmp(ptr, ".Nm", 3) != 0) break; 628 629 ptr += 3; 630 if (isspace(*ptr)) 631 ptr++; 632 633 buffer[length++] = ' '; 634 more = strlen(ptr); 635 if ((more > 1) && (ptr[more - 1] == ',') && 636 isspace(ptr[more - 2])) { 637 ptr[--more] = '\0'; 638 ptr[more - 1] = ','; 639 } 640 641 (void) memmove(&buffer[length], ptr, more + 1); 642 length += more; 643 offset = length + 3; 644 645 ptr = &buffer[offset]; 646 } 647 648 if (strncasecmp(ptr, ".Nd", 3) == 0) { 649 (void) strcpy(&buffer[length], " -"); 650 651 while (strncasecmp(ptr, ".Sh", 3) != 0) { 652 int more; 653 654 if (*ptr == '.') { 655 char *space; 656 657 if ((space = findwhitespace(ptr)) == NULL) 658 ptr = ""; 659 else { 660 space++; 661 (void) memmove(ptr, space, 662 strlen(space) + 1); 663 } 664 } 665 666 if (*ptr != '\0') { 667 buffer[offset - 1] = ' '; 668 more = strlen(ptr) + 1; 669 offset += more; 670 } 671 ptr = &buffer[offset]; 672 if ((sizeof(buffer) == offset) || 673 (GetS(in, ptr, sizeof(buffer) - offset) 674 == NULL)) { 675 free(section); 676 return NULL; 677 } 678 if (manpreprocess(ptr)) 679 *ptr = '\0'; 680 } 681 } 682 } 683 else { 684 int offset; 685 686 if (*buffer == '.') { 687 char *space; 688 689 if ((space = findwhitespace(buffer)) == NULL) { 690 free(section); 691 return NULL; 692 } 693 space++; 694 (void) memmove(buffer, space, strlen(space) + 1); 695 } 696 697 offset = strlen(buffer) + 1; 698 for (;;) { 699 int more; 700 701 ptr = &buffer[offset]; 702 if ((sizeof(buffer) == offset) || 703 (GetS(in, ptr, sizeof(buffer) - offset) 704 == NULL)) { 705 free(section); 706 return NULL; 707 } 708 if (manpreprocess(ptr) || (*ptr == '\0')) 709 continue; 710 711 if ((strncasecmp(ptr, ".Sh", 3) == 0) || 712 (strncasecmp(ptr, ".Ss", 3) == 0)) 713 break; 714 715 if (*ptr == '.') { 716 char *space; 717 718 if ((space = findwhitespace(ptr)) == NULL) { 719 continue; 720 } 721 722 space++; 723 (void) memmove(ptr, space, strlen(space) + 1); 724 } 725 726 buffer[offset - 1] = ' '; 727 more = strlen(ptr); 728 if ((more > 1) && (ptr[more - 1] == ',') && 729 isspace(ptr[more - 2])) { 730 ptr[more - 1] = '\0'; 731 ptr[more - 2] = ','; 732 } 733 else more++; 734 offset += more; 735 } 736 } 737 738 if (section == NULL) { 739 char sectionbuffer[24]; 740 741 (void) sprintf(sectionbuffer, " (%c) - ", 742 sectionext[defaultsection]); 743 ptr = replacestring(buffer, " - ", sectionbuffer); 744 } 745 else { 746 ptr = replacestring(buffer, " - ", section); 747 free(section); 748 } 749 return ptr; 750 } 751 752 char * 753 getwhatisdata(char *name) 754 { 755 gzFile *in; 756 char *data; 757 int section; 758 759 if ((in = gzopen(name, "r")) == NULL) { 760 errx(EXIT_FAILURE, "%s: %s", 761 name, 762 strerror((errno == 0) ? ENOMEM : errno)); 763 /* NOTREACHED */ 764 } 765 766 section = manpagesection(name); 767 data = (section == 0) ? parsecatpage(in) : parsemanpage(in, section); 768 769 (void) gzclose(in); 770 return data; 771 } 772 773 void 774 processmanpages(manpage **source, whatis **dest) 775 { 776 manpage *mp; 777 778 mp = *source; 779 *source = NULL; 780 781 while (mp != NULL) { 782 manpage *obsolete; 783 char *data; 784 785 if (mp->mp_left != NULL) 786 processmanpages(&mp->mp_left,dest); 787 788 if ((data = getwhatisdata(mp->mp_name)) != NULL) { 789 if (!addwhatis(dest,data)) 790 err(EXIT_FAILURE, NULL); 791 } 792 793 obsolete = mp; 794 mp = mp->mp_right; 795 free(obsolete); 796 } 797 } 798 799 int 800 dumpwhatis (FILE *out, whatis *tree) 801 { 802 while (tree != NULL) { 803 if (tree->wi_left) 804 if (!dumpwhatis(out, tree->wi_left)) return 0; 805 806 if ((fputs(tree->wi_data, out) == EOF) || 807 (fputc('\n', out) == EOF)) 808 return 0; 809 810 tree = tree->wi_right; 811 } 812 813 return 1; 814 } 815