1 /* $NetBSD: makewhatis.c,v 1.13 2001/02/19 22:46:14 cgd Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Matthias Scheler. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 #include <sys/cdefs.h> 40 #ifndef lint 41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\ 42 All rights reserved.\n"); 43 #endif /* not lint */ 44 45 #ifndef lint 46 __RCSID("$NetBSD: makewhatis.c,v 1.13 2001/02/19 22:46:14 cgd Exp $"); 47 #endif /* not lint */ 48 49 #include <sys/types.h> 50 #include <sys/param.h> 51 #include <sys/stat.h> 52 #include <sys/wait.h> 53 54 #include <ctype.h> 55 #include <err.h> 56 #include <errno.h> 57 #include <fcntl.h> 58 #include <fts.h> 59 #include <locale.h> 60 #include <paths.h> 61 #include <signal.h> 62 #include <stdio.h> 63 #include <stdlib.h> 64 #include <string.h> 65 #include <unistd.h> 66 #include <zlib.h> 67 68 typedef struct manpagestruct manpage; 69 struct manpagestruct { 70 manpage *mp_left,*mp_right; 71 ino_t mp_inode; 72 char mp_name[1]; 73 }; 74 75 typedef struct whatisstruct whatis; 76 struct whatisstruct { 77 whatis *wi_left,*wi_right; 78 char *wi_data; 79 }; 80 81 int main (int, char **); 82 char *findwhitespace(char *); 83 char *GetS(gzFile, char *, int); 84 int manpagesection (char *); 85 int addmanpage (manpage **, ino_t, char *); 86 int addwhatis (whatis **, char *); 87 char *replacestring (char *, char *, char *); 88 void catpreprocess (char *); 89 char *parsecatpage (gzFile *); 90 int manpreprocess (char *); 91 char *nroff (gzFile *); 92 char *parsemanpage (gzFile *, int); 93 char *getwhatisdata (char *); 94 void processmanpages (manpage **,whatis **); 95 int dumpwhatis (FILE *, whatis *); 96 97 char *default_manpath[] = { 98 "/usr/share/man", 99 NULL 100 }; 101 102 char sectionext[] = "0123456789ln"; 103 char whatisdb[] = "whatis.db"; 104 105 int 106 main(int argc,char **argv) 107 { 108 char **manpath; 109 FTS *fts; 110 FTSENT *fe; 111 manpage *source; 112 whatis *dest; 113 FILE *out; 114 115 (void)setlocale(LC_ALL, ""); 116 117 manpath = (argc < 2) ? default_manpath : &argv[1]; 118 119 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) { 120 perror(getprogname()); 121 return EXIT_FAILURE; 122 } 123 124 source = NULL; 125 while ((fe = fts_read(fts)) != NULL) { 126 switch (fe->fts_info) { 127 case FTS_F: 128 if (manpagesection(fe->fts_path) >= 0) 129 if (!addmanpage(&source, 130 fe->fts_statp->st_ino, 131 fe->fts_path)) 132 err(EXIT_FAILURE, NULL); 133 case FTS_D: 134 case FTS_DC: 135 case FTS_DEFAULT: 136 case FTS_DP: 137 case FTS_SLNONE: 138 break; 139 default: 140 errx(EXIT_FAILURE, "%s: %s", fe->fts_path, 141 strerror(fe->fts_errno)); 142 143 } 144 } 145 146 (void)fts_close(fts); 147 148 dest = NULL; 149 processmanpages(&source, &dest); 150 151 if (chdir(manpath[0]) < 0) 152 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno)); 153 154 if ((out = fopen(whatisdb, "w")) == NULL) 155 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno)); 156 157 if (!(dumpwhatis(out, dest) || 158 (fclose(out) < 0)) || 159 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0)) 160 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno)); 161 162 return EXIT_SUCCESS; 163 } 164 165 char 166 *findwhitespace(char *str) 167 168 { 169 while (!isspace(*str)) 170 if (*str++ == '\0') { 171 str = NULL; 172 break; 173 } 174 175 return str; 176 } 177 178 char 179 *GetS(gzFile in, char *buffer, int length) 180 181 { 182 char *ptr; 183 184 if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0')) 185 ptr = NULL; 186 187 return ptr; 188 } 189 190 int 191 manpagesection(char *name) 192 { 193 char *ptr; 194 195 if ((ptr = strrchr(name, '/')) != NULL) 196 ptr++; 197 else 198 ptr = name; 199 200 while ((ptr = strchr(ptr, '.')) != NULL) { 201 int section; 202 203 ptr++; 204 section=0; 205 while (sectionext[section] != '\0') 206 if (sectionext[section] == *ptr) 207 return section; 208 else 209 section++; 210 } 211 212 return -1; 213 } 214 215 int 216 addmanpage(manpage **tree,ino_t inode,char *name) 217 { 218 manpage *mp; 219 220 while ((mp = *tree) != NULL) { 221 if (mp->mp_inode == inode) 222 return 1; 223 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right); 224 } 225 226 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL) 227 return 0; 228 229 mp->mp_left = NULL; 230 mp->mp_right = NULL; 231 mp->mp_inode = inode; 232 (void) strcpy(mp->mp_name, name); 233 *tree = mp; 234 235 return 1; 236 } 237 238 int 239 addwhatis(whatis **tree, char *data) 240 { 241 whatis *wi; 242 int result; 243 244 while (isspace(*data)) 245 data++; 246 247 if (*data == '/') { 248 char *ptr; 249 250 ptr = ++data; 251 while ((*ptr != '\0') && !isspace(*ptr)) 252 if (*ptr++ == '/') 253 data = ptr; 254 } 255 256 while ((wi = *tree) != NULL) { 257 result=strcmp(data, wi->wi_data); 258 if (result == 0) return 1; 259 tree = &((result < 0) ? wi->wi_left : wi->wi_right); 260 } 261 262 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL) 263 return 0; 264 265 wi->wi_left = NULL; 266 wi->wi_right = NULL; 267 wi->wi_data = data; 268 *tree = wi; 269 270 return 1; 271 } 272 273 void 274 catpreprocess(char *from) 275 { 276 char *to; 277 278 to = from; 279 while (isspace(*from)) from++; 280 281 while (*from != '\0') 282 if (isspace(*from)) { 283 while (isspace(*++from)); 284 if (*from != '\0') 285 *to++ = ' '; 286 } 287 else if (*(from + 1) == '\10') 288 from += 2; 289 else 290 *to++ = *from++; 291 292 *to = '\0'; 293 } 294 295 char * 296 replacestring(char *string, char *old, char *new) 297 298 { 299 char *ptr, *result; 300 int slength, olength, nlength, pos; 301 302 if (new == NULL) 303 return strdup(string); 304 305 ptr = strstr(string, old); 306 if (ptr == NULL) 307 return strdup(string); 308 309 slength = strlen(string); 310 olength = strlen(old); 311 nlength = strlen(new); 312 if ((result = malloc(slength - olength + nlength + 1)) == NULL) 313 return NULL; 314 315 pos = ptr - string; 316 (void) memcpy(result, string, pos); 317 (void) memcpy(&result[pos], new, nlength); 318 (void) strcpy(&result[pos + nlength], &string[pos + olength]); 319 320 return result; 321 } 322 323 char * 324 parsecatpage(gzFile *in) 325 { 326 char buffer[8192]; 327 char *section, *ptr, *last; 328 int size; 329 330 do { 331 if (GetS(in, buffer, sizeof(buffer)) == NULL) 332 return NULL; 333 } 334 while (buffer[0] == '\n'); 335 336 section = NULL; 337 if ((ptr = strchr(buffer, '(')) != NULL) { 338 if ((last = strchr(ptr + 1, ')')) !=NULL) { 339 int length; 340 341 length = last - ptr + 1; 342 if ((section = malloc(length + 5)) == NULL) 343 return NULL; 344 345 *section = ' '; 346 (void) memcpy(section + 1, ptr, length); 347 (void) strcpy(section + 1 + length, " - "); 348 } 349 } 350 351 for (;;) { 352 if (GetS(in, buffer, sizeof(buffer)) == NULL) { 353 free(section); 354 return NULL; 355 } 356 if (strncmp(buffer, "N\10NA\10AM\10ME\10E", 12) == 0) 357 break; 358 } 359 360 ptr = last = buffer; 361 size = sizeof(buffer) - 1; 362 while ((size > 0) && (GetS(in, ptr, size) != NULL)) { 363 int length; 364 365 catpreprocess(ptr); 366 367 length = strlen(ptr); 368 if (length == 0) { 369 *last = '\0'; 370 371 ptr = replacestring(buffer, " - ", section); 372 free(section); 373 return ptr; 374 } 375 if ((length > 1) && (ptr[length - 1] == '-') && 376 isalpha(ptr[length - 2])) 377 last = &ptr[--length]; 378 else { 379 last = &ptr[length++]; 380 *last = ' '; 381 } 382 383 ptr += length; 384 size -= length; 385 } 386 387 free(section); 388 389 return NULL; 390 } 391 392 int 393 manpreprocess(char *line) 394 { 395 char *from, *to; 396 397 to = from = line; 398 while (isspace(*from)) from++; 399 if (strncmp(from, ".\\\"", 3) == 0) 400 return 1; 401 402 while (*from != '\0') 403 if (isspace(*from)) { 404 while (isspace(*++from)); 405 if ((*from != '\0') && (*from != ',')) 406 *to++ = ' '; 407 } 408 else if (*from == '\\') 409 switch (*++from) { 410 case '\0': 411 case '-': 412 break; 413 case 's': 414 if ((*from=='+') || (*from=='-')) 415 from++; 416 while (isdigit(*from)) 417 from++; 418 break; 419 default: 420 from++; 421 } 422 else 423 if (*from == '"') 424 from++; 425 else 426 *to++ = *from++; 427 428 *to = '\0'; 429 430 if (strncasecmp(line, ".Xr", 3) == 0) { 431 char *sect; 432 433 from = line + 3; 434 if (isspace(*from)) 435 from++; 436 437 if ((sect = findwhitespace(from)) != NULL) { 438 int length; 439 440 *sect++ = '\0'; 441 length = strlen(from); 442 (void) memmove(line, from, length); 443 line[length++] = '('; 444 to = &line[length]; 445 length = strlen(sect); 446 (void) memmove(to, sect, length); 447 (void) strcpy(&to[length], ")"); 448 } 449 } 450 451 return 0; 452 } 453 454 char * 455 nroff(gzFile *in) 456 { 457 char tempname[MAXPATHLEN], buffer[65536], *data; 458 int tempfd, bytes, pipefd[2], status; 459 static int devnull = -1; 460 pid_t child; 461 462 if (gzrewind(in) < 0) { 463 perror(getprogname()); 464 return NULL; 465 } 466 467 if ((devnull < 0) && 468 ((devnull = open(_PATH_DEVNULL, O_WRONLY, 0)) < 0)) { 469 perror(getprogname()); 470 return NULL; 471 } 472 473 (void)strcpy(tempname, _PATH_TMP "makewhatis.XXXXXX"); 474 if ((tempfd = mkstemp(tempname)) < 0) { 475 perror(getprogname()); 476 return NULL; 477 } 478 479 while ((bytes = gzread(in, buffer, sizeof(buffer))) > 0) 480 if (write(tempfd, buffer, bytes) != bytes) { 481 bytes = -1; 482 break; 483 } 484 485 if ((bytes < 0) || 486 (lseek(tempfd, 0, SEEK_SET) < 0) || 487 (pipe(pipefd) < 0)) { 488 perror(getprogname()); 489 (void)close(tempfd); 490 (void)unlink(tempname); 491 return NULL; 492 } 493 494 switch (child = vfork()) { 495 case -1: 496 perror(getprogname()); 497 (void)close(pipefd[1]); 498 (void)close(pipefd[0]); 499 (void)close(tempfd); 500 (void)unlink(tempname); 501 return NULL; 502 /* NOTREACHED */ 503 case 0: 504 (void)close(pipefd[0]); 505 if (tempfd != STDIN_FILENO) { 506 (void)dup2(tempfd, STDIN_FILENO); 507 (void)close(tempfd); 508 } 509 if (pipefd[1] != STDOUT_FILENO) { 510 (void)dup2(pipefd[1], STDOUT_FILENO); 511 (void)close(pipefd[1]); 512 } 513 if (devnull != STDERR_FILENO) { 514 (void)dup2(devnull, STDERR_FILENO); 515 (void)close(devnull); 516 } 517 (void)execlp("nroff", "nroff", "-S", "-man", NULL); 518 _exit(EXIT_FAILURE); 519 default: 520 (void)close(pipefd[1]); 521 (void)close(tempfd); 522 /* NOTREACHED */ 523 } 524 525 if ((in = gzdopen(pipefd[0], "r")) == NULL) { 526 if (errno == 0) 527 errno = ENOMEM; 528 perror(getprogname()); 529 (void)close(pipefd[0]); 530 (void)kill(child, SIGTERM); 531 while (waitpid(child, NULL, 0) != child); 532 (void)unlink(tempname); 533 return NULL; 534 } 535 536 data = parsecatpage(in); 537 while (gzread(in, buffer, sizeof(buffer)) > 0); 538 (void)gzclose(in); 539 540 while (waitpid(child, &status, 0) != child); 541 if ((data != NULL) && 542 !(WIFEXITED(status) && (WEXITSTATUS(status) == 0))) { 543 free(data); 544 data = NULL; 545 } 546 547 (void)unlink(tempname); 548 549 return data; 550 } 551 552 char * 553 parsemanpage(gzFile *in, int defaultsection) 554 { 555 char *section, buffer[8192], *ptr; 556 557 section = NULL; 558 do { 559 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) { 560 free(section); 561 return NULL; 562 } 563 if (manpreprocess(buffer)) 564 continue; 565 if (strncasecmp(buffer, ".Dt", 3) == 0) { 566 char *end; 567 568 ptr = &buffer[3]; 569 if (isspace(*ptr)) 570 ptr++; 571 if ((ptr = findwhitespace(ptr)) == NULL) 572 continue; 573 574 if ((end = findwhitespace(++ptr)) != NULL) 575 *end = '\0'; 576 577 free(section); 578 if ((section = malloc(strlen(ptr) + 7)) != NULL) { 579 section[0] = ' '; 580 section[1] = '('; 581 (void) strcpy(§ion[2], ptr); 582 (void) strcat(§ion[2], ") - "); 583 } 584 } 585 else if (strncasecmp(buffer, ".Ds", 3) == 0) 586 return nroff(in); 587 } while ((strncasecmp(buffer, ".Sh NAME", 8) != 0)); 588 589 do { 590 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) { 591 free(section); 592 return NULL; 593 } 594 } while (manpreprocess(buffer)); 595 596 if (strncasecmp(buffer, ".Nm", 3) == 0) { 597 int length, offset; 598 599 ptr = &buffer[3]; 600 while (isspace(*ptr)) 601 ptr++; 602 603 length = strlen(ptr); 604 if ((length > 1) && (ptr[length - 1] == ',') && 605 isspace(ptr[length - 2])) { 606 ptr[--length] = '\0'; 607 ptr[length - 1] = ','; 608 } 609 (void) memmove(buffer, ptr, length + 1); 610 611 offset = length + 3; 612 ptr = &buffer[offset]; 613 for (;;) { 614 int more; 615 616 if ((sizeof(buffer) == offset) || 617 (GetS(in, ptr, sizeof(buffer) - offset) 618 == NULL)) { 619 free(section); 620 return NULL; 621 } 622 if (manpreprocess(ptr)) 623 continue; 624 625 if (strncasecmp(ptr, ".Nm", 3) != 0) break; 626 627 ptr += 3; 628 if (isspace(*ptr)) 629 ptr++; 630 631 buffer[length++] = ' '; 632 more = strlen(ptr); 633 if ((more > 1) && (ptr[more - 1] == ',') && 634 isspace(ptr[more - 2])) { 635 ptr[--more] = '\0'; 636 ptr[more - 1] = ','; 637 } 638 639 (void) memmove(&buffer[length], ptr, more + 1); 640 length += more; 641 offset = length + 3; 642 643 ptr = &buffer[offset]; 644 } 645 646 if (strncasecmp(ptr, ".Nd", 3) == 0) { 647 (void) strcpy(&buffer[length], " -"); 648 649 while (strncasecmp(ptr, ".Sh", 3) != 0) { 650 int more; 651 652 if (*ptr == '.') { 653 char *space; 654 655 if ((space = findwhitespace(ptr)) == NULL) 656 ptr = ""; 657 else { 658 space++; 659 (void) memmove(ptr, space, 660 strlen(space) + 1); 661 } 662 } 663 664 if (*ptr != '\0') { 665 buffer[offset - 1] = ' '; 666 more = strlen(ptr) + 1; 667 offset += more; 668 } 669 ptr = &buffer[offset]; 670 if ((sizeof(buffer) == offset) || 671 (GetS(in, ptr, sizeof(buffer) - offset) 672 == NULL)) { 673 free(section); 674 return NULL; 675 } 676 if (manpreprocess(ptr)) 677 *ptr = '\0'; 678 } 679 } 680 } 681 else { 682 int offset; 683 684 if (*buffer == '.') { 685 char *space; 686 687 if ((space = findwhitespace(buffer)) == NULL) { 688 free(section); 689 return NULL; 690 } 691 space++; 692 (void) memmove(buffer, space, strlen(space) + 1); 693 } 694 695 offset = strlen(buffer) + 1; 696 for (;;) { 697 int more; 698 699 ptr = &buffer[offset]; 700 if ((sizeof(buffer) == offset) || 701 (GetS(in, ptr, sizeof(buffer) - offset) 702 == NULL)) { 703 free(section); 704 return NULL; 705 } 706 if (manpreprocess(ptr) || (*ptr == '\0')) 707 continue; 708 709 if ((strncasecmp(ptr, ".Sh", 3) == 0) || 710 (strncasecmp(ptr, ".Ss", 3) == 0)) 711 break; 712 713 if (*ptr == '.') { 714 char *space; 715 716 if ((space = findwhitespace(ptr)) == NULL) { 717 continue; 718 } 719 720 space++; 721 (void) memmove(ptr, space, strlen(space) + 1); 722 } 723 724 buffer[offset - 1] = ' '; 725 more = strlen(ptr); 726 if ((more > 1) && (ptr[more - 1] == ',') && 727 isspace(ptr[more - 2])) { 728 ptr[more - 1] = '\0'; 729 ptr[more - 2] = ','; 730 } 731 else more++; 732 offset += more; 733 } 734 } 735 736 if (section == NULL) { 737 char sectionbuffer[24]; 738 739 (void) sprintf(sectionbuffer, " (%c) - ", 740 sectionext[defaultsection]); 741 ptr = replacestring(buffer, " - ", sectionbuffer); 742 } 743 else { 744 ptr = replacestring(buffer, " - ", section); 745 free(section); 746 } 747 return ptr; 748 } 749 750 char * 751 getwhatisdata(char *name) 752 { 753 gzFile *in; 754 char *data; 755 int section; 756 757 if ((in = gzopen(name, "r")) == NULL) { 758 errx(EXIT_FAILURE, "%s: %s", 759 name, 760 strerror((errno == 0) ? ENOMEM : errno)); 761 /* NOTREACHED */ 762 } 763 764 section = manpagesection(name); 765 data = (section == 0) ? parsecatpage(in) : parsemanpage(in, section); 766 767 (void) gzclose(in); 768 return data; 769 } 770 771 void 772 processmanpages(manpage **source, whatis **dest) 773 { 774 manpage *mp; 775 776 mp = *source; 777 *source = NULL; 778 779 while (mp != NULL) { 780 manpage *obsolete; 781 char *data; 782 783 if (mp->mp_left != NULL) 784 processmanpages(&mp->mp_left,dest); 785 786 if ((data = getwhatisdata(mp->mp_name)) != NULL) { 787 if (!addwhatis(dest,data)) 788 err(EXIT_FAILURE, NULL); 789 } 790 791 obsolete = mp; 792 mp = mp->mp_right; 793 free(obsolete); 794 } 795 } 796 797 int 798 dumpwhatis (FILE *out, whatis *tree) 799 { 800 while (tree != NULL) { 801 if (tree->wi_left) 802 if (!dumpwhatis(out, tree->wi_left)) return 0; 803 804 if ((fputs(tree->wi_data, out) == EOF) || 805 (fputc('\n', out) == EOF)) 806 return 0; 807 808 tree = tree->wi_right; 809 } 810 811 return 1; 812 } 813