1 /* $NetBSD: makewhatis.c,v 1.7 2000/01/24 23:03:54 tron Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Matthias Scheler. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 #include <sys/cdefs.h> 40 #ifndef lint 41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\ 42 All rights reserved.\n"); 43 #endif /* not lint */ 44 45 #ifndef lint 46 __RCSID("$NetBSD: makewhatis.c,v 1.7 2000/01/24 23:03:54 tron Exp $"); 47 #endif /* not lint */ 48 49 #include <sys/types.h> 50 #include <sys/stat.h> 51 52 #include <ctype.h> 53 #include <err.h> 54 #include <errno.h> 55 #include <fts.h> 56 #include <locale.h> 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <string.h> 60 #include <unistd.h> 61 #include <zlib.h> 62 63 typedef struct manpagestruct manpage; 64 struct manpagestruct { 65 manpage *mp_left,*mp_right; 66 ino_t mp_inode; 67 char mp_name[1]; 68 }; 69 70 typedef struct whatisstruct whatis; 71 struct whatisstruct { 72 whatis *wi_left,*wi_right; 73 char *wi_data; 74 }; 75 76 int main (int, char **); 77 char *findwhitespace(char *); 78 char *GetS(gzFile, char *, int); 79 int manpagesection (char *); 80 int addmanpage (manpage **, ino_t, char *); 81 int addwhatis (whatis **, char *); 82 char *replacestring (char *, char *, char *); 83 void catpreprocess (char *); 84 char *parsecatpage (gzFile *); 85 int manpreprocess (char *); 86 char *parsemanpage (gzFile *, int); 87 char *getwhatisdata (char *); 88 void processmanpages (manpage **,whatis **); 89 int dumpwhatis (FILE *, whatis *); 90 91 char *default_manpath[] = { 92 "/usr/share/man", 93 NULL 94 }; 95 96 char sectionext[] = "0123456789ln"; 97 char whatisdb[] = "whatis.db"; 98 99 extern char *__progname; 100 101 int 102 main(int argc,char **argv) 103 { 104 char **manpath; 105 FTS *fts; 106 FTSENT *fe; 107 manpage *source; 108 whatis *dest; 109 FILE *out; 110 111 (void)setlocale(LC_ALL, ""); 112 113 manpath = (argc < 2) ? default_manpath : &argv[1]; 114 115 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) { 116 perror(__progname); 117 return EXIT_FAILURE; 118 } 119 120 source = NULL; 121 while ((fe = fts_read(fts)) != NULL) { 122 switch (fe->fts_info) { 123 case FTS_F: 124 if (manpagesection(fe->fts_path) >= 0) 125 if (!addmanpage(&source, 126 fe->fts_statp->st_ino, 127 fe->fts_path)) 128 err(EXIT_FAILURE, NULL); 129 case FTS_D: 130 case FTS_DC: 131 case FTS_DEFAULT: 132 case FTS_DP: 133 case FTS_SLNONE: 134 break; 135 default: 136 errx(EXIT_FAILURE, "%s: %s", fe->fts_path, 137 strerror(fe->fts_errno)); 138 /* NOTREACHED */ 139 } 140 } 141 142 (void)fts_close(fts); 143 144 dest = NULL; 145 processmanpages(&source, &dest); 146 147 if (chdir(manpath[0]) < 0) 148 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno)); 149 150 if ((out = fopen(whatisdb, "w")) == NULL) 151 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno)); 152 153 if (!(dumpwhatis(out, dest) || 154 (fclose(out) < 0)) || 155 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0)) 156 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno)); 157 158 return EXIT_SUCCESS; 159 } 160 161 char 162 *findwhitespace(char *str) 163 164 { 165 while (!isspace(*str)) 166 if (*str++ == '\0') { 167 str = NULL; 168 break; 169 } 170 171 return str; 172 } 173 174 char 175 *GetS(gzFile in, char *buffer, int length) 176 177 { 178 char *ptr; 179 180 if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0')) 181 ptr = NULL; 182 183 return ptr; 184 } 185 186 int 187 manpagesection(char *name) 188 { 189 char *ptr; 190 191 if ((ptr = strrchr(name, '/')) != NULL) 192 ptr++; 193 else 194 ptr = name; 195 196 while ((ptr = strchr(ptr, '.')) != NULL) { 197 int section; 198 199 ptr++; 200 section=0; 201 while (sectionext[section] != '\0') 202 if (sectionext[section] == *ptr) 203 return section; 204 else 205 section++; 206 } 207 208 return -1; 209 } 210 211 int 212 addmanpage(manpage **tree,ino_t inode,char *name) 213 { 214 manpage *mp; 215 216 while ((mp = *tree) != NULL) { 217 if (mp->mp_inode == inode) 218 return 1; 219 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right); 220 } 221 222 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL) 223 return 0; 224 225 mp->mp_left = NULL; 226 mp->mp_right = NULL; 227 mp->mp_inode = inode; 228 (void) strcpy(mp->mp_name, name); 229 *tree = mp; 230 231 return 1; 232 } 233 234 int 235 addwhatis(whatis **tree, char *data) 236 { 237 whatis *wi; 238 int result; 239 240 while (isspace(*data)) 241 data++; 242 243 if (*data == '/') { 244 char *ptr; 245 246 ptr = ++data; 247 while ((*ptr != '\0') && !isspace(*ptr)) 248 if (*ptr++ == '/') 249 data = ptr; 250 } 251 252 while ((wi = *tree) != NULL) { 253 result=strcmp(data, wi->wi_data); 254 if (result == 0) return 1; 255 tree = &((result < 0) ? wi->wi_left : wi->wi_right); 256 } 257 258 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL) 259 return 0; 260 261 wi->wi_left = NULL; 262 wi->wi_right = NULL; 263 wi->wi_data = data; 264 *tree = wi; 265 266 return 1; 267 } 268 269 void 270 catpreprocess(char *from) 271 { 272 char *to; 273 274 to = from; 275 while (isspace(*from)) from++; 276 277 while (*from != '\0') 278 if (isspace(*from)) { 279 while (isspace(*++from)); 280 if (*from != '\0') 281 *to++ = ' '; 282 } 283 else if (*(from + 1) == '\10') 284 from += 2; 285 else 286 *to++ = *from++; 287 288 *to = '\0'; 289 } 290 291 char * 292 replacestring(char *string, char *old, char *new) 293 294 { 295 char *ptr, *result; 296 int slength, olength, nlength, pos; 297 298 if (new == NULL) 299 return strdup(string); 300 301 ptr = strstr(string, old); 302 if (ptr == NULL) 303 return strdup(string); 304 305 slength = strlen(string); 306 olength = strlen(old); 307 nlength = strlen(new); 308 if ((result = malloc(slength - olength + nlength + 1)) == NULL) 309 return NULL; 310 311 pos = ptr - string; 312 (void) memcpy(result, string, pos); 313 (void) memcpy(&result[pos], new, nlength); 314 (void) strcpy(&result[pos + nlength], &string[pos + olength]); 315 316 return result; 317 } 318 319 char * 320 parsecatpage(gzFile *in) 321 { 322 char buffer[8192]; 323 char *section, *ptr, *last; 324 int size; 325 326 do { 327 if (GetS(in, buffer, sizeof(buffer)) == NULL) 328 return NULL; 329 } 330 while (buffer[0] == '\n'); 331 332 section = NULL; 333 if ((ptr = strchr(buffer, '(')) != NULL) { 334 if ((last = strchr(ptr + 1, ')')) !=NULL) { 335 int length; 336 337 length = last - ptr + 1; 338 if ((section = malloc(length + 5)) == NULL) 339 return NULL; 340 341 *section = ' '; 342 (void) memcpy(section + 1, ptr, length); 343 (void) strcpy(section + 1 + length, " - "); 344 } 345 } 346 347 for (;;) { 348 if (GetS(in, buffer, sizeof(buffer)) == NULL) { 349 free(section); 350 return NULL; 351 } 352 if (strncmp(buffer, "N\10NA\10AM\10ME\10E", 12) == 0) 353 break; 354 } 355 356 ptr = last = buffer; 357 size = sizeof(buffer) - 1; 358 while ((size > 0) && (GetS(in, ptr, size) != NULL)) { 359 int length; 360 361 catpreprocess(ptr); 362 363 length = strlen(ptr); 364 if (length == 0) { 365 *last = '\0'; 366 367 ptr = replacestring(buffer, " - ", section); 368 free(section); 369 return ptr; 370 } 371 if ((length > 1) && (ptr[length - 1] == '-') && 372 isalpha(ptr[length - 2])) 373 last = &ptr[--length]; 374 else { 375 last = &ptr[length++]; 376 *last = ' '; 377 } 378 379 ptr += length; 380 size -= length; 381 } 382 383 free(section); 384 385 return NULL; 386 } 387 388 int 389 manpreprocess(char *line) 390 { 391 char *from, *to; 392 393 to = from = line; 394 while (isspace(*from)) from++; 395 if (strncmp(from, ".\\\"", 3) == 0) 396 return 1; 397 398 while (*from != '\0') 399 if (isspace(*from)) { 400 while (isspace(*++from)); 401 if ((*from != '\0') && (*from != ',')) 402 *to++ = ' '; 403 } 404 else if (*from == '\\') 405 switch (*++from) { 406 case '\0': 407 case '-': 408 break; 409 case 's': 410 if ((*from=='+') || (*from=='-')) 411 from++; 412 while (isdigit(*from)) 413 from++; 414 break; 415 default: 416 from++; 417 } 418 else 419 if (*from == '"') 420 from++; 421 else 422 *to++ = *from++; 423 424 *to = '\0'; 425 426 if (strncasecmp(line, ".Xr", 3) == 0) { 427 char *sect; 428 429 from = line + 3; 430 if (isspace(*from)) 431 from++; 432 433 if ((sect = findwhitespace(from)) != NULL) { 434 int length; 435 436 *sect++ = '\0'; 437 length = strlen(from); 438 (void) memmove(line, from, length); 439 line[length++] = '('; 440 to = &line[length]; 441 length = strlen(sect); 442 (void) memmove(to, sect, length); 443 (void) strcpy(&to[length], ")"); 444 } 445 } 446 447 return 0; 448 } 449 450 char * 451 parsemanpage(gzFile *in, int defaultsection) 452 { 453 char *section, buffer[8192], *ptr; 454 455 section = NULL; 456 do { 457 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) { 458 free(section); 459 return NULL; 460 } 461 if (manpreprocess(buffer)) 462 continue; 463 if (strncasecmp(buffer, ".Dt", 3) == 0) { 464 char *end; 465 466 ptr = &buffer[3]; 467 if (isspace(*ptr)) 468 ptr++; 469 if ((ptr = findwhitespace(ptr)) == NULL) 470 continue; 471 472 if ((end = findwhitespace(++ptr)) != NULL) 473 *end = '\0'; 474 475 free(section); 476 if ((section = malloc(strlen(ptr) + 7)) != NULL) { 477 section[0] = ' '; 478 section[1] = '('; 479 (void) strcpy(§ion[2], ptr); 480 (void) strcat(§ion[2], ") - "); 481 } 482 } 483 } while ((strncasecmp(buffer, ".Sh NAME", 8) != 0)); 484 485 do { 486 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) { 487 free(section); 488 return NULL; 489 } 490 } while (manpreprocess(buffer)); 491 492 if (strncasecmp(buffer, ".Nm", 3) == 0) { 493 int length, offset; 494 495 ptr = &buffer[3]; 496 while (isspace(*ptr)) 497 ptr++; 498 499 length = strlen(ptr); 500 if ((length > 1) && (ptr[length - 1] == ',') && 501 isspace(ptr[length - 2])) { 502 ptr[--length] = '\0'; 503 ptr[length - 1] = ','; 504 } 505 (void) memmove(buffer, ptr, length + 1); 506 507 offset = length + 3; 508 ptr = &buffer[offset]; 509 for (;;) { 510 int more; 511 512 if ((sizeof(buffer) == offset) || 513 (GetS(in, ptr, sizeof(buffer) - offset) 514 == NULL)) { 515 free(section); 516 return NULL; 517 } 518 if (manpreprocess(ptr)) 519 continue; 520 521 if (strncasecmp(ptr, ".Nm", 3) != 0) break; 522 523 ptr += 3; 524 if (isspace(*ptr)) 525 ptr++; 526 527 buffer[length++] = ' '; 528 more = strlen(ptr); 529 if ((more > 1) && (ptr[more - 1] == ',') && 530 isspace(ptr[more - 2])) { 531 ptr[--more] = '\0'; 532 ptr[more - 1] = ','; 533 } 534 535 (void) memmove(&buffer[length], ptr, more + 1); 536 length += more; 537 offset = length + 3; 538 539 ptr = &buffer[offset]; 540 } 541 542 if (strncasecmp(ptr, ".Nd", 3) == 0) { 543 (void) strcpy(&buffer[length], " -"); 544 545 while (strncasecmp(ptr, ".Sh", 3) != 0) { 546 int more; 547 548 if (*ptr == '.') { 549 char *space; 550 551 if ((space = findwhitespace(ptr)) == NULL) 552 ptr = ""; 553 else { 554 space++; 555 (void) memmove(ptr, space, 556 strlen(space) + 1); 557 } 558 } 559 560 if (*ptr != '\0') { 561 buffer[offset - 1] = ' '; 562 more = strlen(ptr) + 1; 563 offset += more; 564 } 565 ptr = &buffer[offset]; 566 if ((sizeof(buffer) == offset) || 567 (GetS(in, ptr, sizeof(buffer) - offset) 568 == NULL)) { 569 free(section); 570 return NULL; 571 } 572 if (manpreprocess(ptr)) 573 *ptr = '\0'; 574 } 575 } 576 } 577 else { 578 int offset; 579 580 if (*buffer == '.') { 581 char *space; 582 583 if ((space = findwhitespace(buffer)) == NULL) { 584 free(section); 585 return NULL; 586 } 587 space++; 588 (void) memmove(buffer, space, strlen(space) + 1); 589 } 590 591 offset = strlen(buffer) + 1; 592 for (;;) { 593 int more; 594 595 ptr = &buffer[offset]; 596 if ((sizeof(buffer) == offset) || 597 (GetS(in, ptr, sizeof(buffer) - offset) 598 == NULL)) { 599 free(section); 600 return NULL; 601 } 602 if (manpreprocess(ptr) || (*ptr == '\0')) 603 continue; 604 605 if ((strncasecmp(ptr, ".Sh", 3) == 0) || 606 (strncasecmp(ptr, ".Ss", 3) == 0)) 607 break; 608 609 if (*ptr == '.') { 610 char *space; 611 612 if ((space = findwhitespace(ptr)) == NULL) { 613 continue; 614 } 615 616 space++; 617 (void) memmove(ptr, space, strlen(space) + 1); 618 } 619 620 buffer[offset - 1] = ' '; 621 more = strlen(ptr); 622 if ((more > 1) && (ptr[more - 1] == ',') && 623 isspace(ptr[more - 2])) { 624 ptr[more - 1] = '\0'; 625 ptr[more - 2] = ','; 626 } 627 else more++; 628 offset += more; 629 } 630 } 631 632 if (section == NULL) { 633 char sectionbuffer[24]; 634 635 (void) sprintf(sectionbuffer, " (%c) - ", 636 sectionext[defaultsection]); 637 ptr = replacestring(buffer, " - ", sectionbuffer); 638 } 639 else { 640 ptr = replacestring(buffer, " - ", section); 641 free(section); 642 } 643 return ptr; 644 } 645 646 char * 647 getwhatisdata(char *name) 648 { 649 gzFile *in; 650 char *data; 651 int section; 652 653 if ((in = gzopen(name, "r")) == NULL) { 654 errx(EXIT_FAILURE, "%s: %s", 655 name, 656 strerror((errno == 0) ? ENOMEM : errno)); 657 /* NOTREACHED */ 658 } 659 660 section = manpagesection(name); 661 data = (section == 0) ? parsecatpage(in) : parsemanpage(in, section); 662 663 (void) gzclose(in); 664 return data; 665 } 666 667 void 668 processmanpages(manpage **source, whatis **dest) 669 { 670 manpage *mp; 671 672 mp = *source; 673 *source = NULL; 674 675 while (mp != NULL) { 676 manpage *obsolete; 677 char *data; 678 679 if (mp->mp_left != NULL) 680 processmanpages(&mp->mp_left,dest); 681 682 if ((data = getwhatisdata(mp->mp_name)) != NULL) { 683 if (!addwhatis(dest,data)) 684 err(EXIT_FAILURE, NULL); 685 } 686 687 obsolete = mp; 688 mp = mp->mp_right; 689 free(obsolete); 690 } 691 } 692 693 int 694 dumpwhatis (FILE *out, whatis *tree) 695 { 696 while (tree != NULL) { 697 if (tree->wi_left) 698 if (!dumpwhatis(out, tree->wi_left)) return 0; 699 700 if ((fputs(tree->wi_data, out) == EOF) || 701 (fputc('\n', out) == EOF)) 702 return 0; 703 704 tree = tree->wi_right; 705 } 706 707 return 1; 708 } 709