1 /* $Id: mandocdb.c,v 1.111 2014/06/21 16:17:56 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/stat.h> 19 #include <sys/wait.h> 20 21 #include <assert.h> 22 #include <ctype.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <fts.h> 26 #include <getopt.h> 27 #include <limits.h> 28 #include <stddef.h> 29 #include <stdio.h> 30 #include <stdint.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <unistd.h> 34 35 #include <ohash.h> 36 #include <sqlite3.h> 37 38 #include "mdoc.h" 39 #include "man.h" 40 #include "mandoc.h" 41 #include "mandoc_aux.h" 42 #include "manpath.h" 43 #include "mansearch.h" 44 45 extern int mansearch_keymax; 46 extern const char *const mansearch_keynames[]; 47 48 #define SQL_EXEC(_v) \ 49 if (SQLITE_OK != sqlite3_exec(db, (_v), NULL, NULL, NULL)) \ 50 say("", "%s: %s", (_v), sqlite3_errmsg(db)) 51 #define SQL_BIND_TEXT(_s, _i, _v) \ 52 if (SQLITE_OK != sqlite3_bind_text \ 53 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \ 54 say(mlink->file, "%s", sqlite3_errmsg(db)) 55 #define SQL_BIND_INT(_s, _i, _v) \ 56 if (SQLITE_OK != sqlite3_bind_int \ 57 ((_s), (_i)++, (_v))) \ 58 say(mlink->file, "%s", sqlite3_errmsg(db)) 59 #define SQL_BIND_INT64(_s, _i, _v) \ 60 if (SQLITE_OK != sqlite3_bind_int64 \ 61 ((_s), (_i)++, (_v))) \ 62 say(mlink->file, "%s", sqlite3_errmsg(db)) 63 #define SQL_STEP(_s) \ 64 if (SQLITE_DONE != sqlite3_step((_s))) \ 65 say(mlink->file, "%s", sqlite3_errmsg(db)) 66 67 enum op { 68 OP_DEFAULT = 0, /* new dbs from dir list or default config */ 69 OP_CONFFILE, /* new databases from custom config file */ 70 OP_UPDATE, /* delete/add entries in existing database */ 71 OP_DELETE, /* delete entries from existing database */ 72 OP_TEST /* change no databases, report potential problems */ 73 }; 74 75 enum form { 76 FORM_NONE, /* format is unknown */ 77 FORM_SRC, /* format is -man or -mdoc */ 78 FORM_CAT /* format is cat */ 79 }; 80 81 struct str { 82 char *rendered; /* key in UTF-8 or ASCII form */ 83 const struct mpage *mpage; /* if set, the owning parse */ 84 uint64_t mask; /* bitmask in sequence */ 85 char key[]; /* may contain escape sequences */ 86 }; 87 88 struct inodev { 89 ino_t st_ino; 90 dev_t st_dev; 91 }; 92 93 struct mpage { 94 struct inodev inodev; /* used for hashing routine */ 95 int64_t pageid; /* pageid in mpages SQL table */ 96 enum form form; /* format from file content */ 97 char *sec; /* section from file content */ 98 char *arch; /* architecture from file content */ 99 char *title; /* title from file content */ 100 char *desc; /* description from file content */ 101 struct mlink *mlinks; /* singly linked list */ 102 }; 103 104 struct mlink { 105 char file[PATH_MAX]; /* filename rel. to manpath */ 106 enum form dform; /* format from directory */ 107 enum form fform; /* format from file name suffix */ 108 char *dsec; /* section from directory */ 109 char *arch; /* architecture from directory */ 110 char *name; /* name from file name (not empty) */ 111 char *fsec; /* section from file name suffix */ 112 struct mlink *next; /* singly linked list */ 113 struct mpage *mpage; /* parent */ 114 int gzip; /* filename has a .gz suffix */ 115 }; 116 117 enum stmt { 118 STMT_DELETE_PAGE = 0, /* delete mpage */ 119 STMT_INSERT_PAGE, /* insert mpage */ 120 STMT_INSERT_LINK, /* insert mlink */ 121 STMT_INSERT_NAME, /* insert name */ 122 STMT_INSERT_KEY, /* insert parsed key */ 123 STMT__MAX 124 }; 125 126 typedef int (*mdoc_fp)(struct mpage *, const struct mdoc_node *); 127 128 struct mdoc_handler { 129 mdoc_fp fp; /* optional handler */ 130 uint64_t mask; /* set unless handler returns 0 */ 131 }; 132 133 static void dbclose(int); 134 static void dbadd(struct mpage *, struct mchars *); 135 static void dbadd_mlink(const struct mlink *mlink); 136 static int dbopen(int); 137 static void dbprune(void); 138 static void filescan(const char *); 139 static void *hash_alloc(size_t, void *); 140 static void hash_free(void *, void *); 141 static void *hash_calloc(size_t, size_t, void *); 142 static void mlink_add(struct mlink *, const struct stat *); 143 static void mlink_check(struct mpage *, struct mlink *); 144 static void mlink_free(struct mlink *); 145 static void mlinks_undupe(struct mpage *); 146 static void mpages_free(void); 147 static void mpages_merge(struct mchars *, struct mparse *); 148 static void names_check(void); 149 static void parse_cat(struct mpage *, int); 150 static void parse_man(struct mpage *, const struct man_node *); 151 static void parse_mdoc(struct mpage *, const struct mdoc_node *); 152 static int parse_mdoc_body(struct mpage *, const struct mdoc_node *); 153 static int parse_mdoc_head(struct mpage *, const struct mdoc_node *); 154 static int parse_mdoc_Fd(struct mpage *, const struct mdoc_node *); 155 static int parse_mdoc_Fn(struct mpage *, const struct mdoc_node *); 156 static int parse_mdoc_Nd(struct mpage *, const struct mdoc_node *); 157 static int parse_mdoc_Nm(struct mpage *, const struct mdoc_node *); 158 static int parse_mdoc_Sh(struct mpage *, const struct mdoc_node *); 159 static int parse_mdoc_Xr(struct mpage *, const struct mdoc_node *); 160 static void putkey(const struct mpage *, char *, uint64_t); 161 static void putkeys(const struct mpage *, 162 const char *, size_t, uint64_t); 163 static void putmdockey(const struct mpage *, 164 const struct mdoc_node *, uint64_t); 165 static void render_key(struct mchars *, struct str *); 166 static void say(const char *, const char *, ...); 167 static int set_basedir(const char *); 168 static int treescan(void); 169 static size_t utf8(unsigned int, char [7]); 170 171 static char tempfilename[32]; 172 static char *progname; 173 static int nodb; /* no database changes */ 174 static int mparse_options; /* abort the parse early */ 175 static int use_all; /* use all found files */ 176 static int debug; /* print what we're doing */ 177 static int warnings; /* warn about crap */ 178 static int write_utf8; /* write UTF-8 output; else ASCII */ 179 static int exitcode; /* to be returned by main */ 180 static enum op op; /* operational mode */ 181 static char basedir[PATH_MAX]; /* current base directory */ 182 static struct ohash mpages; /* table of distinct manual pages */ 183 static struct ohash mlinks; /* table of directory entries */ 184 static struct ohash names; /* table of all names */ 185 static struct ohash strings; /* table of all strings */ 186 static sqlite3 *db = NULL; /* current database */ 187 static sqlite3_stmt *stmts[STMT__MAX]; /* current statements */ 188 static uint64_t name_mask; 189 190 static const struct mdoc_handler mdocs[MDOC_MAX] = { 191 { NULL, 0 }, /* Ap */ 192 { NULL, 0 }, /* Dd */ 193 { NULL, 0 }, /* Dt */ 194 { NULL, 0 }, /* Os */ 195 { parse_mdoc_Sh, TYPE_Sh }, /* Sh */ 196 { parse_mdoc_head, TYPE_Ss }, /* Ss */ 197 { NULL, 0 }, /* Pp */ 198 { NULL, 0 }, /* D1 */ 199 { NULL, 0 }, /* Dl */ 200 { NULL, 0 }, /* Bd */ 201 { NULL, 0 }, /* Ed */ 202 { NULL, 0 }, /* Bl */ 203 { NULL, 0 }, /* El */ 204 { NULL, 0 }, /* It */ 205 { NULL, 0 }, /* Ad */ 206 { NULL, TYPE_An }, /* An */ 207 { NULL, TYPE_Ar }, /* Ar */ 208 { NULL, TYPE_Cd }, /* Cd */ 209 { NULL, TYPE_Cm }, /* Cm */ 210 { NULL, TYPE_Dv }, /* Dv */ 211 { NULL, TYPE_Er }, /* Er */ 212 { NULL, TYPE_Ev }, /* Ev */ 213 { NULL, 0 }, /* Ex */ 214 { NULL, TYPE_Fa }, /* Fa */ 215 { parse_mdoc_Fd, 0 }, /* Fd */ 216 { NULL, TYPE_Fl }, /* Fl */ 217 { parse_mdoc_Fn, 0 }, /* Fn */ 218 { NULL, TYPE_Ft }, /* Ft */ 219 { NULL, TYPE_Ic }, /* Ic */ 220 { NULL, TYPE_In }, /* In */ 221 { NULL, TYPE_Li }, /* Li */ 222 { parse_mdoc_Nd, 0 }, /* Nd */ 223 { parse_mdoc_Nm, 0 }, /* Nm */ 224 { NULL, 0 }, /* Op */ 225 { NULL, 0 }, /* Ot */ 226 { NULL, TYPE_Pa }, /* Pa */ 227 { NULL, 0 }, /* Rv */ 228 { NULL, TYPE_St }, /* St */ 229 { NULL, TYPE_Va }, /* Va */ 230 { parse_mdoc_body, TYPE_Va }, /* Vt */ 231 { parse_mdoc_Xr, 0 }, /* Xr */ 232 { NULL, 0 }, /* %A */ 233 { NULL, 0 }, /* %B */ 234 { NULL, 0 }, /* %D */ 235 { NULL, 0 }, /* %I */ 236 { NULL, 0 }, /* %J */ 237 { NULL, 0 }, /* %N */ 238 { NULL, 0 }, /* %O */ 239 { NULL, 0 }, /* %P */ 240 { NULL, 0 }, /* %R */ 241 { NULL, 0 }, /* %T */ 242 { NULL, 0 }, /* %V */ 243 { NULL, 0 }, /* Ac */ 244 { NULL, 0 }, /* Ao */ 245 { NULL, 0 }, /* Aq */ 246 { NULL, TYPE_At }, /* At */ 247 { NULL, 0 }, /* Bc */ 248 { NULL, 0 }, /* Bf */ 249 { NULL, 0 }, /* Bo */ 250 { NULL, 0 }, /* Bq */ 251 { NULL, TYPE_Bsx }, /* Bsx */ 252 { NULL, TYPE_Bx }, /* Bx */ 253 { NULL, 0 }, /* Db */ 254 { NULL, 0 }, /* Dc */ 255 { NULL, 0 }, /* Do */ 256 { NULL, 0 }, /* Dq */ 257 { NULL, 0 }, /* Ec */ 258 { NULL, 0 }, /* Ef */ 259 { NULL, TYPE_Em }, /* Em */ 260 { NULL, 0 }, /* Eo */ 261 { NULL, TYPE_Fx }, /* Fx */ 262 { NULL, TYPE_Ms }, /* Ms */ 263 { NULL, 0 }, /* No */ 264 { NULL, 0 }, /* Ns */ 265 { NULL, TYPE_Nx }, /* Nx */ 266 { NULL, TYPE_Ox }, /* Ox */ 267 { NULL, 0 }, /* Pc */ 268 { NULL, 0 }, /* Pf */ 269 { NULL, 0 }, /* Po */ 270 { NULL, 0 }, /* Pq */ 271 { NULL, 0 }, /* Qc */ 272 { NULL, 0 }, /* Ql */ 273 { NULL, 0 }, /* Qo */ 274 { NULL, 0 }, /* Qq */ 275 { NULL, 0 }, /* Re */ 276 { NULL, 0 }, /* Rs */ 277 { NULL, 0 }, /* Sc */ 278 { NULL, 0 }, /* So */ 279 { NULL, 0 }, /* Sq */ 280 { NULL, 0 }, /* Sm */ 281 { NULL, 0 }, /* Sx */ 282 { NULL, TYPE_Sy }, /* Sy */ 283 { NULL, TYPE_Tn }, /* Tn */ 284 { NULL, 0 }, /* Ux */ 285 { NULL, 0 }, /* Xc */ 286 { NULL, 0 }, /* Xo */ 287 { parse_mdoc_head, 0 }, /* Fo */ 288 { NULL, 0 }, /* Fc */ 289 { NULL, 0 }, /* Oo */ 290 { NULL, 0 }, /* Oc */ 291 { NULL, 0 }, /* Bk */ 292 { NULL, 0 }, /* Ek */ 293 { NULL, 0 }, /* Bt */ 294 { NULL, 0 }, /* Hf */ 295 { NULL, 0 }, /* Fr */ 296 { NULL, 0 }, /* Ud */ 297 { NULL, TYPE_Lb }, /* Lb */ 298 { NULL, 0 }, /* Lp */ 299 { NULL, TYPE_Lk }, /* Lk */ 300 { NULL, TYPE_Mt }, /* Mt */ 301 { NULL, 0 }, /* Brq */ 302 { NULL, 0 }, /* Bro */ 303 { NULL, 0 }, /* Brc */ 304 { NULL, 0 }, /* %C */ 305 { NULL, 0 }, /* Es */ 306 { NULL, 0 }, /* En */ 307 { NULL, TYPE_Dx }, /* Dx */ 308 { NULL, 0 }, /* %Q */ 309 { NULL, 0 }, /* br */ 310 { NULL, 0 }, /* sp */ 311 { NULL, 0 }, /* %U */ 312 { NULL, 0 }, /* Ta */ 313 }; 314 315 316 int 317 mandocdb(int argc, char *argv[]) 318 { 319 int ch, i; 320 size_t j, sz; 321 const char *path_arg; 322 struct mchars *mc; 323 struct manpaths dirs; 324 struct mparse *mp; 325 struct ohash_info mpages_info, mlinks_info; 326 327 memset(stmts, 0, STMT__MAX * sizeof(sqlite3_stmt *)); 328 memset(&dirs, 0, sizeof(struct manpaths)); 329 330 mpages_info.alloc = mlinks_info.alloc = hash_alloc; 331 mpages_info.calloc = mlinks_info.calloc = hash_calloc; 332 mpages_info.free = mlinks_info.free = hash_free; 333 334 mpages_info.key_offset = offsetof(struct mpage, inodev); 335 mlinks_info.key_offset = offsetof(struct mlink, file); 336 337 progname = strrchr(argv[0], '/'); 338 if (progname == NULL) 339 progname = argv[0]; 340 else 341 ++progname; 342 343 /* 344 * We accept a few different invocations. 345 * The CHECKOP macro makes sure that invocation styles don't 346 * clobber each other. 347 */ 348 #define CHECKOP(_op, _ch) do \ 349 if (OP_DEFAULT != (_op)) { \ 350 fprintf(stderr, "%s: -%c: Conflicting option\n", \ 351 progname, (_ch)); \ 352 goto usage; \ 353 } while (/*CONSTCOND*/0) 354 355 path_arg = NULL; 356 op = OP_DEFAULT; 357 358 while (-1 != (ch = getopt(argc, argv, "aC:Dd:npQT:tu:v"))) 359 switch (ch) { 360 case 'a': 361 use_all = 1; 362 break; 363 case 'C': 364 CHECKOP(op, ch); 365 path_arg = optarg; 366 op = OP_CONFFILE; 367 break; 368 case 'D': 369 debug++; 370 break; 371 case 'd': 372 CHECKOP(op, ch); 373 path_arg = optarg; 374 op = OP_UPDATE; 375 break; 376 case 'n': 377 nodb = 1; 378 break; 379 case 'p': 380 warnings = 1; 381 break; 382 case 'Q': 383 mparse_options |= MPARSE_QUICK; 384 break; 385 case 'T': 386 if (strcmp(optarg, "utf8")) { 387 fprintf(stderr, "%s: -T%s: " 388 "Unsupported output format\n", 389 progname, optarg); 390 goto usage; 391 } 392 write_utf8 = 1; 393 break; 394 case 't': 395 CHECKOP(op, ch); 396 dup2(STDOUT_FILENO, STDERR_FILENO); 397 op = OP_TEST; 398 nodb = warnings = 1; 399 break; 400 case 'u': 401 CHECKOP(op, ch); 402 path_arg = optarg; 403 op = OP_DELETE; 404 break; 405 case 'v': 406 /* Compatibility with espie@'s makewhatis. */ 407 break; 408 default: 409 goto usage; 410 } 411 412 argc -= optind; 413 argv += optind; 414 415 if (OP_CONFFILE == op && argc > 0) { 416 fprintf(stderr, "%s: -C: Too many arguments\n", 417 progname); 418 goto usage; 419 } 420 421 exitcode = (int)MANDOCLEVEL_OK; 422 mp = mparse_alloc(mparse_options, MANDOCLEVEL_FATAL, NULL, NULL); 423 mc = mchars_alloc(); 424 425 ohash_init(&mpages, 6, &mpages_info); 426 ohash_init(&mlinks, 6, &mlinks_info); 427 428 if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) { 429 430 /* 431 * Most of these deal with a specific directory. 432 * Jump into that directory first. 433 */ 434 if (OP_TEST != op && 0 == set_basedir(path_arg)) 435 goto out; 436 437 if (dbopen(1)) { 438 /* 439 * The existing database is usable. Process 440 * all files specified on the command-line. 441 */ 442 use_all = 1; 443 for (i = 0; i < argc; i++) 444 filescan(argv[i]); 445 if (OP_TEST != op) 446 dbprune(); 447 } else { 448 /* 449 * Database missing or corrupt. 450 * Recreate from scratch. 451 */ 452 exitcode = (int)MANDOCLEVEL_OK; 453 op = OP_DEFAULT; 454 if (0 == treescan()) 455 goto out; 456 if (0 == dbopen(0)) 457 goto out; 458 } 459 if (OP_DELETE != op) 460 mpages_merge(mc, mp); 461 dbclose(OP_DEFAULT == op ? 0 : 1); 462 } else { 463 /* 464 * If we have arguments, use them as our manpaths. 465 * If we don't, grok from manpath(1) or however else 466 * manpath_parse() wants to do it. 467 */ 468 if (argc > 0) { 469 dirs.paths = mandoc_reallocarray(NULL, 470 argc, sizeof(char *)); 471 dirs.sz = (size_t)argc; 472 for (i = 0; i < argc; i++) 473 dirs.paths[i] = mandoc_strdup(argv[i]); 474 } else 475 manpath_parse(&dirs, path_arg, NULL, NULL); 476 477 if (0 == dirs.sz) { 478 exitcode = (int)MANDOCLEVEL_BADARG; 479 say("", "Empty manpath"); 480 } 481 482 /* 483 * First scan the tree rooted at a base directory, then 484 * build a new database and finally move it into place. 485 * Ignore zero-length directories and strip trailing 486 * slashes. 487 */ 488 for (j = 0; j < dirs.sz; j++) { 489 sz = strlen(dirs.paths[j]); 490 if (sz && '/' == dirs.paths[j][sz - 1]) 491 dirs.paths[j][--sz] = '\0'; 492 if (0 == sz) 493 continue; 494 495 if (j) { 496 ohash_init(&mpages, 6, &mpages_info); 497 ohash_init(&mlinks, 6, &mlinks_info); 498 } 499 500 if (0 == set_basedir(dirs.paths[j])) 501 goto out; 502 if (0 == treescan()) 503 goto out; 504 if (0 == dbopen(0)) 505 goto out; 506 507 mpages_merge(mc, mp); 508 if (warnings && !nodb && 509 ! (MPARSE_QUICK & mparse_options)) 510 names_check(); 511 dbclose(0); 512 513 if (j + 1 < dirs.sz) { 514 mpages_free(); 515 ohash_delete(&mpages); 516 ohash_delete(&mlinks); 517 } 518 } 519 } 520 out: 521 manpath_free(&dirs); 522 mchars_free(mc); 523 mparse_free(mp); 524 mpages_free(); 525 ohash_delete(&mpages); 526 ohash_delete(&mlinks); 527 return(exitcode); 528 usage: 529 fprintf(stderr, "usage: %s [-aDnpQ] [-C file] [-Tutf8]\n" 530 " %s [-aDnpQ] [-Tutf8] dir ...\n" 531 " %s [-DnpQ] [-Tutf8] -d dir [file ...]\n" 532 " %s [-Dnp] -u dir [file ...]\n" 533 " %s [-Q] -t file ...\n", 534 progname, progname, progname, 535 progname, progname); 536 537 return((int)MANDOCLEVEL_BADARG); 538 } 539 540 /* 541 * Scan a directory tree rooted at "basedir" for manpages. 542 * We use fts(), scanning directory parts along the way for clues to our 543 * section and architecture. 544 * 545 * If use_all has been specified, grok all files. 546 * If not, sanitise paths to the following: 547 * 548 * [./]man*[/<arch>]/<name>.<section> 549 * or 550 * [./]cat<section>[/<arch>]/<name>.0 551 * 552 * TODO: accomodate for multi-language directories. 553 */ 554 static int 555 treescan(void) 556 { 557 char buf[PATH_MAX]; 558 FTS *f; 559 FTSENT *ff; 560 struct mlink *mlink; 561 int dform, gzip; 562 char *dsec, *arch, *fsec, *cp; 563 const char *path; 564 const char *argv[2]; 565 566 argv[0] = "."; 567 argv[1] = (char *)NULL; 568 569 f = fts_open((char * const *)argv, 570 FTS_PHYSICAL | FTS_NOCHDIR, NULL); 571 if (NULL == f) { 572 exitcode = (int)MANDOCLEVEL_SYSERR; 573 say("", "&fts_open"); 574 return(0); 575 } 576 577 dsec = arch = NULL; 578 dform = FORM_NONE; 579 580 while (NULL != (ff = fts_read(f))) { 581 path = ff->fts_path + 2; 582 switch (ff->fts_info) { 583 584 /* 585 * Symbolic links require various sanity checks, 586 * then get handled just like regular files. 587 */ 588 case FTS_SL: 589 if (NULL == realpath(path, buf)) { 590 if (warnings) 591 say(path, "&realpath"); 592 continue; 593 } 594 if (strstr(buf, basedir) != buf) { 595 if (warnings) say("", 596 "%s: outside base directory", buf); 597 continue; 598 } 599 /* Use logical inode to avoid mpages dupe. */ 600 if (-1 == stat(path, ff->fts_statp)) { 601 if (warnings) 602 say(path, "&stat"); 603 continue; 604 } 605 /* FALLTHROUGH */ 606 607 /* 608 * If we're a regular file, add an mlink by using the 609 * stored directory data and handling the filename. 610 */ 611 case FTS_F: 612 if (0 == strcmp(path, MANDOC_DB)) 613 continue; 614 if ( ! use_all && ff->fts_level < 2) { 615 if (warnings) 616 say(path, "Extraneous file"); 617 continue; 618 } 619 gzip = 0; 620 fsec = NULL; 621 while (NULL == fsec) { 622 fsec = strrchr(ff->fts_name, '.'); 623 if (NULL == fsec || strcmp(fsec+1, "gz")) 624 break; 625 gzip = 1; 626 *fsec = '\0'; 627 fsec = NULL; 628 } 629 if (NULL == fsec) { 630 if ( ! use_all) { 631 if (warnings) 632 say(path, 633 "No filename suffix"); 634 continue; 635 } 636 } else if (0 == strcmp(++fsec, "html")) { 637 if (warnings) 638 say(path, "Skip html"); 639 continue; 640 } else if (0 == strcmp(fsec, "ps")) { 641 if (warnings) 642 say(path, "Skip ps"); 643 continue; 644 } else if (0 == strcmp(fsec, "pdf")) { 645 if (warnings) 646 say(path, "Skip pdf"); 647 continue; 648 } else if ( ! use_all && 649 ((FORM_SRC == dform && strcmp(fsec, dsec)) || 650 (FORM_CAT == dform && strcmp(fsec, "0")))) { 651 if (warnings) 652 say(path, "Wrong filename suffix"); 653 continue; 654 } else 655 fsec[-1] = '\0'; 656 657 mlink = mandoc_calloc(1, sizeof(struct mlink)); 658 if (strlcpy(mlink->file, path, 659 sizeof(mlink->file)) >= 660 sizeof(mlink->file)) { 661 say(path, "Filename too long"); 662 free(mlink); 663 continue; 664 } 665 mlink->dform = dform; 666 mlink->dsec = dsec; 667 mlink->arch = arch; 668 mlink->name = ff->fts_name; 669 mlink->fsec = fsec; 670 mlink->gzip = gzip; 671 mlink_add(mlink, ff->fts_statp); 672 continue; 673 674 case FTS_D: 675 /* FALLTHROUGH */ 676 case FTS_DP: 677 break; 678 679 default: 680 if (warnings) 681 say(path, "Not a regular file"); 682 continue; 683 } 684 685 switch (ff->fts_level) { 686 case 0: 687 /* Ignore the root directory. */ 688 break; 689 case 1: 690 /* 691 * This might contain manX/ or catX/. 692 * Try to infer this from the name. 693 * If we're not in use_all, enforce it. 694 */ 695 cp = ff->fts_name; 696 if (FTS_DP == ff->fts_info) 697 break; 698 699 if (0 == strncmp(cp, "man", 3)) { 700 dform = FORM_SRC; 701 dsec = cp + 3; 702 } else if (0 == strncmp(cp, "cat", 3)) { 703 dform = FORM_CAT; 704 dsec = cp + 3; 705 } else { 706 dform = FORM_NONE; 707 dsec = NULL; 708 } 709 710 if (NULL != dsec || use_all) 711 break; 712 713 if (warnings) 714 say(path, "Unknown directory part"); 715 fts_set(f, ff, FTS_SKIP); 716 break; 717 case 2: 718 /* 719 * Possibly our architecture. 720 * If we're descending, keep tabs on it. 721 */ 722 if (FTS_DP != ff->fts_info && NULL != dsec) 723 arch = ff->fts_name; 724 else 725 arch = NULL; 726 break; 727 default: 728 if (FTS_DP == ff->fts_info || use_all) 729 break; 730 if (warnings) 731 say(path, "Extraneous directory part"); 732 fts_set(f, ff, FTS_SKIP); 733 break; 734 } 735 } 736 737 fts_close(f); 738 return(1); 739 } 740 741 /* 742 * Add a file to the mlinks table. 743 * Do not verify that it's a "valid" looking manpage (we'll do that 744 * later). 745 * 746 * Try to infer the manual section, architecture, and page name from the 747 * path, assuming it looks like 748 * 749 * [./]man*[/<arch>]/<name>.<section> 750 * or 751 * [./]cat<section>[/<arch>]/<name>.0 752 * 753 * See treescan() for the fts(3) version of this. 754 */ 755 static void 756 filescan(const char *file) 757 { 758 char buf[PATH_MAX]; 759 struct stat st; 760 struct mlink *mlink; 761 char *p, *start; 762 763 assert(use_all); 764 765 if (0 == strncmp(file, "./", 2)) 766 file += 2; 767 768 /* 769 * We have to do lstat(2) before realpath(3) loses 770 * the information whether this is a symbolic link. 771 * We need to know that because for symbolic links, 772 * we want to use the orginal file name, while for 773 * regular files, we want to use the real path. 774 */ 775 if (-1 == lstat(file, &st)) { 776 exitcode = (int)MANDOCLEVEL_BADARG; 777 say(file, "&lstat"); 778 return; 779 } else if (0 == ((S_IFREG | S_IFLNK) & st.st_mode)) { 780 exitcode = (int)MANDOCLEVEL_BADARG; 781 say(file, "Not a regular file"); 782 return; 783 } 784 785 /* 786 * We have to resolve the file name to the real path 787 * in any case for the base directory check. 788 */ 789 if (NULL == realpath(file, buf)) { 790 exitcode = (int)MANDOCLEVEL_BADARG; 791 say(file, "&realpath"); 792 return; 793 } 794 795 if (OP_TEST == op) 796 start = buf; 797 else if (strstr(buf, basedir) == buf) 798 start = buf + strlen(basedir); 799 else { 800 exitcode = (int)MANDOCLEVEL_BADARG; 801 say("", "%s: outside base directory", buf); 802 return; 803 } 804 805 /* 806 * Now we are sure the file is inside our tree. 807 * If it is a symbolic link, ignore the real path 808 * and use the original name. 809 * This implies passing stuff like "cat1/../man1/foo.1" 810 * on the command line won't work. So don't do that. 811 * Note the stat(2) can still fail if the link target 812 * doesn't exist. 813 */ 814 if (S_IFLNK & st.st_mode) { 815 if (-1 == stat(buf, &st)) { 816 exitcode = (int)MANDOCLEVEL_BADARG; 817 say(file, "&stat"); 818 return; 819 } 820 if (strlcpy(buf, file, sizeof(buf)) >= sizeof(buf)) { 821 say(file, "Filename too long"); 822 return; 823 } 824 start = buf; 825 if (OP_TEST != op && strstr(buf, basedir) == buf) 826 start += strlen(basedir); 827 } 828 829 mlink = mandoc_calloc(1, sizeof(struct mlink)); 830 if (strlcpy(mlink->file, start, sizeof(mlink->file)) >= 831 sizeof(mlink->file)) { 832 say(start, "Filename too long"); 833 return; 834 } 835 836 /* 837 * First try to guess our directory structure. 838 * If we find a separator, try to look for man* or cat*. 839 * If we find one of these and what's underneath is a directory, 840 * assume it's an architecture. 841 */ 842 if (NULL != (p = strchr(start, '/'))) { 843 *p++ = '\0'; 844 if (0 == strncmp(start, "man", 3)) { 845 mlink->dform = FORM_SRC; 846 mlink->dsec = start + 3; 847 } else if (0 == strncmp(start, "cat", 3)) { 848 mlink->dform = FORM_CAT; 849 mlink->dsec = start + 3; 850 } 851 852 start = p; 853 if (NULL != mlink->dsec && NULL != (p = strchr(start, '/'))) { 854 *p++ = '\0'; 855 mlink->arch = start; 856 start = p; 857 } 858 } 859 860 /* 861 * Now check the file suffix. 862 * Suffix of `.0' indicates a catpage, `.1-9' is a manpage. 863 */ 864 p = strrchr(start, '\0'); 865 while (p-- > start && '/' != *p && '.' != *p) 866 /* Loop. */ ; 867 868 if ('.' == *p) { 869 *p++ = '\0'; 870 mlink->fsec = p; 871 } 872 873 /* 874 * Now try to parse the name. 875 * Use the filename portion of the path. 876 */ 877 mlink->name = start; 878 if (NULL != (p = strrchr(start, '/'))) { 879 mlink->name = p + 1; 880 *p = '\0'; 881 } 882 mlink_add(mlink, &st); 883 } 884 885 static void 886 mlink_add(struct mlink *mlink, const struct stat *st) 887 { 888 struct inodev inodev; 889 struct mpage *mpage; 890 unsigned int slot; 891 892 assert(NULL != mlink->file); 893 894 mlink->dsec = mandoc_strdup(mlink->dsec ? mlink->dsec : ""); 895 mlink->arch = mandoc_strdup(mlink->arch ? mlink->arch : ""); 896 mlink->name = mandoc_strdup(mlink->name ? mlink->name : ""); 897 mlink->fsec = mandoc_strdup(mlink->fsec ? mlink->fsec : ""); 898 899 if ('0' == *mlink->fsec) { 900 free(mlink->fsec); 901 mlink->fsec = mandoc_strdup(mlink->dsec); 902 mlink->fform = FORM_CAT; 903 } else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec) 904 mlink->fform = FORM_SRC; 905 else 906 mlink->fform = FORM_NONE; 907 908 slot = ohash_qlookup(&mlinks, mlink->file); 909 assert(NULL == ohash_find(&mlinks, slot)); 910 ohash_insert(&mlinks, slot, mlink); 911 912 inodev.st_ino = st->st_ino; 913 inodev.st_dev = st->st_dev; 914 slot = ohash_lookup_memory(&mpages, (char *)&inodev, 915 sizeof(struct inodev), inodev.st_ino); 916 mpage = ohash_find(&mpages, slot); 917 if (NULL == mpage) { 918 mpage = mandoc_calloc(1, sizeof(struct mpage)); 919 mpage->inodev.st_ino = inodev.st_ino; 920 mpage->inodev.st_dev = inodev.st_dev; 921 ohash_insert(&mpages, slot, mpage); 922 } else 923 mlink->next = mpage->mlinks; 924 mpage->mlinks = mlink; 925 mlink->mpage = mpage; 926 } 927 928 static void 929 mlink_free(struct mlink *mlink) 930 { 931 932 free(mlink->dsec); 933 free(mlink->arch); 934 free(mlink->name); 935 free(mlink->fsec); 936 free(mlink); 937 } 938 939 static void 940 mpages_free(void) 941 { 942 struct mpage *mpage; 943 struct mlink *mlink; 944 unsigned int slot; 945 946 mpage = ohash_first(&mpages, &slot); 947 while (NULL != mpage) { 948 while (NULL != (mlink = mpage->mlinks)) { 949 mpage->mlinks = mlink->next; 950 mlink_free(mlink); 951 } 952 free(mpage->sec); 953 free(mpage->arch); 954 free(mpage->title); 955 free(mpage->desc); 956 free(mpage); 957 mpage = ohash_next(&mpages, &slot); 958 } 959 } 960 961 /* 962 * For each mlink to the mpage, check whether the path looks like 963 * it is formatted, and if it does, check whether a source manual 964 * exists by the same name, ignoring the suffix. 965 * If both conditions hold, drop the mlink. 966 */ 967 static void 968 mlinks_undupe(struct mpage *mpage) 969 { 970 char buf[PATH_MAX]; 971 struct mlink **prev; 972 struct mlink *mlink; 973 char *bufp; 974 975 mpage->form = FORM_CAT; 976 prev = &mpage->mlinks; 977 while (NULL != (mlink = *prev)) { 978 if (FORM_CAT != mlink->dform) { 979 mpage->form = FORM_NONE; 980 goto nextlink; 981 } 982 (void)strlcpy(buf, mlink->file, sizeof(buf)); 983 bufp = strstr(buf, "cat"); 984 assert(NULL != bufp); 985 memcpy(bufp, "man", 3); 986 if (NULL != (bufp = strrchr(buf, '.'))) 987 *++bufp = '\0'; 988 (void)strlcat(buf, mlink->dsec, sizeof(buf)); 989 if (NULL == ohash_find(&mlinks, 990 ohash_qlookup(&mlinks, buf))) 991 goto nextlink; 992 if (warnings) 993 say(mlink->file, "Man source exists: %s", buf); 994 if (use_all) 995 goto nextlink; 996 *prev = mlink->next; 997 mlink_free(mlink); 998 continue; 999 nextlink: 1000 prev = &(*prev)->next; 1001 } 1002 } 1003 1004 static void 1005 mlink_check(struct mpage *mpage, struct mlink *mlink) 1006 { 1007 struct str *str; 1008 unsigned int slot; 1009 1010 /* 1011 * Check whether the manual section given in a file 1012 * agrees with the directory where the file is located. 1013 * Some manuals have suffixes like (3p) on their 1014 * section number either inside the file or in the 1015 * directory name, some are linked into more than one 1016 * section, like encrypt(1) = makekey(8). 1017 */ 1018 1019 if (FORM_SRC == mpage->form && 1020 strcasecmp(mpage->sec, mlink->dsec)) 1021 say(mlink->file, "Section \"%s\" manual in %s directory", 1022 mpage->sec, mlink->dsec); 1023 1024 /* 1025 * Manual page directories exist for each kernel 1026 * architecture as returned by machine(1). 1027 * However, many manuals only depend on the 1028 * application architecture as returned by arch(1). 1029 * For example, some (2/ARM) manuals are shared 1030 * across the "armish" and "zaurus" kernel 1031 * architectures. 1032 * A few manuals are even shared across completely 1033 * different architectures, for example fdformat(1) 1034 * on amd64, i386, sparc, and sparc64. 1035 */ 1036 1037 if (strcasecmp(mpage->arch, mlink->arch)) 1038 say(mlink->file, "Architecture \"%s\" manual in " 1039 "\"%s\" directory", mpage->arch, mlink->arch); 1040 1041 /* 1042 * XXX 1043 * parse_cat() doesn't set NAME_TITLE yet. 1044 */ 1045 1046 if (FORM_CAT == mpage->form) 1047 return; 1048 1049 /* 1050 * Check whether this mlink 1051 * appears as a name in the NAME section. 1052 */ 1053 1054 slot = ohash_qlookup(&names, mlink->name); 1055 str = ohash_find(&names, slot); 1056 assert(NULL != str); 1057 if ( ! (NAME_TITLE & str->mask)) 1058 say(mlink->file, "Name missing in NAME section"); 1059 } 1060 1061 /* 1062 * Run through the files in the global vector "mpages" 1063 * and add them to the database specified in "basedir". 1064 * 1065 * This handles the parsing scheme itself, using the cues of directory 1066 * and filename to determine whether the file is parsable or not. 1067 */ 1068 static void 1069 mpages_merge(struct mchars *mc, struct mparse *mp) 1070 { 1071 char any[] = "any"; 1072 struct ohash_info str_info; 1073 int fd[2]; 1074 struct mpage *mpage, *mpage_dest; 1075 struct mlink *mlink, *mlink_dest; 1076 struct mdoc *mdoc; 1077 struct man *man; 1078 char *sodest; 1079 char *cp; 1080 pid_t child_pid; 1081 int status; 1082 unsigned int pslot; 1083 enum mandoclevel lvl; 1084 1085 str_info.alloc = hash_alloc; 1086 str_info.calloc = hash_calloc; 1087 str_info.free = hash_free; 1088 str_info.key_offset = offsetof(struct str, key); 1089 1090 if (0 == nodb) 1091 SQL_EXEC("BEGIN TRANSACTION"); 1092 1093 mpage = ohash_first(&mpages, &pslot); 1094 while (NULL != mpage) { 1095 mlinks_undupe(mpage); 1096 if (NULL == mpage->mlinks) { 1097 mpage = ohash_next(&mpages, &pslot); 1098 continue; 1099 } 1100 1101 name_mask = NAME_MASK; 1102 ohash_init(&names, 4, &str_info); 1103 ohash_init(&strings, 6, &str_info); 1104 mparse_reset(mp); 1105 mdoc = NULL; 1106 man = NULL; 1107 sodest = NULL; 1108 child_pid = 0; 1109 fd[0] = -1; 1110 fd[1] = -1; 1111 1112 if (mpage->mlinks->gzip) { 1113 if (-1 == pipe(fd)) { 1114 exitcode = (int)MANDOCLEVEL_SYSERR; 1115 say(mpage->mlinks->file, "&pipe gunzip"); 1116 goto nextpage; 1117 } 1118 switch (child_pid = fork()) { 1119 case -1: 1120 exitcode = (int)MANDOCLEVEL_SYSERR; 1121 say(mpage->mlinks->file, "&fork gunzip"); 1122 child_pid = 0; 1123 close(fd[1]); 1124 close(fd[0]); 1125 goto nextpage; 1126 case 0: 1127 close(fd[0]); 1128 if (-1 == dup2(fd[1], STDOUT_FILENO)) { 1129 say(mpage->mlinks->file, 1130 "&dup gunzip"); 1131 exit(1); 1132 } 1133 execlp("gunzip", "gunzip", "-c", 1134 mpage->mlinks->file, NULL); 1135 say(mpage->mlinks->file, "&exec gunzip"); 1136 exit(1); 1137 default: 1138 close(fd[1]); 1139 break; 1140 } 1141 } 1142 1143 /* 1144 * Try interpreting the file as mdoc(7) or man(7) 1145 * source code, unless it is already known to be 1146 * formatted. Fall back to formatted mode. 1147 */ 1148 if (FORM_CAT != mpage->mlinks->dform || 1149 FORM_CAT != mpage->mlinks->fform) { 1150 lvl = mparse_readfd(mp, fd[0], mpage->mlinks->file); 1151 if (lvl < MANDOCLEVEL_FATAL) 1152 mparse_result(mp, &mdoc, &man, &sodest); 1153 } 1154 1155 if (NULL != sodest) { 1156 mlink_dest = ohash_find(&mlinks, 1157 ohash_qlookup(&mlinks, sodest)); 1158 if (NULL != mlink_dest) { 1159 1160 /* The .so target exists. */ 1161 1162 mpage_dest = mlink_dest->mpage; 1163 mlink = mpage->mlinks; 1164 while (1) { 1165 mlink->mpage = mpage_dest; 1166 1167 /* 1168 * If the target was already 1169 * processed, add the links 1170 * to the database now. 1171 * Otherwise, this will 1172 * happen when we come 1173 * to the target. 1174 */ 1175 1176 if (mpage_dest->pageid) 1177 dbadd_mlink(mlink); 1178 1179 if (NULL == mlink->next) 1180 break; 1181 mlink = mlink->next; 1182 } 1183 1184 /* Move all links to the target. */ 1185 1186 mlink->next = mlink_dest->next; 1187 mlink_dest->next = mpage->mlinks; 1188 mpage->mlinks = NULL; 1189 } 1190 goto nextpage; 1191 } else if (NULL != mdoc) { 1192 mpage->form = FORM_SRC; 1193 mpage->sec = 1194 mandoc_strdup(mdoc_meta(mdoc)->msec); 1195 mpage->arch = mdoc_meta(mdoc)->arch; 1196 mpage->arch = mandoc_strdup( 1197 NULL == mpage->arch ? "" : mpage->arch); 1198 mpage->title = 1199 mandoc_strdup(mdoc_meta(mdoc)->title); 1200 } else if (NULL != man) { 1201 mpage->form = FORM_SRC; 1202 mpage->sec = 1203 mandoc_strdup(man_meta(man)->msec); 1204 mpage->arch = 1205 mandoc_strdup(mpage->mlinks->arch); 1206 mpage->title = 1207 mandoc_strdup(man_meta(man)->title); 1208 } else { 1209 mpage->form = FORM_CAT; 1210 mpage->sec = 1211 mandoc_strdup(mpage->mlinks->dsec); 1212 mpage->arch = 1213 mandoc_strdup(mpage->mlinks->arch); 1214 mpage->title = 1215 mandoc_strdup(mpage->mlinks->name); 1216 } 1217 putkey(mpage, mpage->sec, TYPE_sec); 1218 putkey(mpage, '\0' == *mpage->arch ? 1219 any : mpage->arch, TYPE_arch); 1220 1221 for (mlink = mpage->mlinks; mlink; mlink = mlink->next) { 1222 if ('\0' != *mlink->dsec) 1223 putkey(mpage, mlink->dsec, TYPE_sec); 1224 if ('\0' != *mlink->fsec) 1225 putkey(mpage, mlink->fsec, TYPE_sec); 1226 putkey(mpage, '\0' == *mlink->arch ? 1227 any : mlink->arch, TYPE_arch); 1228 putkey(mpage, mlink->name, NAME_FILE); 1229 } 1230 1231 assert(NULL == mpage->desc); 1232 if (NULL != mdoc) { 1233 if (NULL != (cp = mdoc_meta(mdoc)->name)) 1234 putkey(mpage, cp, NAME_HEAD); 1235 parse_mdoc(mpage, mdoc_node(mdoc)); 1236 } else if (NULL != man) 1237 parse_man(mpage, man_node(man)); 1238 else 1239 parse_cat(mpage, fd[0]); 1240 if (NULL == mpage->desc) 1241 mpage->desc = mandoc_strdup(mpage->mlinks->name); 1242 1243 if (warnings && !use_all) 1244 for (mlink = mpage->mlinks; mlink; 1245 mlink = mlink->next) 1246 mlink_check(mpage, mlink); 1247 1248 dbadd(mpage, mc); 1249 1250 nextpage: 1251 if (child_pid) { 1252 if (-1 == waitpid(child_pid, &status, 0)) { 1253 exitcode = (int)MANDOCLEVEL_SYSERR; 1254 say(mpage->mlinks->file, "&wait gunzip"); 1255 } else if (WIFSIGNALED(status)) { 1256 exitcode = (int)MANDOCLEVEL_SYSERR; 1257 say(mpage->mlinks->file, 1258 "gunzip died from signal %d", 1259 WTERMSIG(status)); 1260 } else if (WEXITSTATUS(status)) { 1261 exitcode = (int)MANDOCLEVEL_SYSERR; 1262 say(mpage->mlinks->file, 1263 "gunzip failed with code %d", 1264 WEXITSTATUS(status)); 1265 } 1266 } 1267 ohash_delete(&strings); 1268 ohash_delete(&names); 1269 mpage = ohash_next(&mpages, &pslot); 1270 } 1271 1272 if (0 == nodb) 1273 SQL_EXEC("END TRANSACTION"); 1274 } 1275 1276 static void 1277 names_check(void) 1278 { 1279 sqlite3_stmt *stmt; 1280 const char *name, *sec, *arch, *key; 1281 int irc; 1282 1283 sqlite3_prepare_v2(db, 1284 "SELECT name, sec, arch, key FROM (" 1285 "SELECT name AS key, pageid FROM names " 1286 "WHERE bits & ? AND NOT EXISTS (" 1287 "SELECT pageid FROM mlinks " 1288 "WHERE mlinks.pageid == names.pageid " 1289 "AND mlinks.name == names.name" 1290 ")" 1291 ") JOIN (" 1292 "SELECT sec, arch, name, pageid FROM mlinks " 1293 "GROUP BY pageid" 1294 ") USING (pageid);", 1295 -1, &stmt, NULL); 1296 1297 if (SQLITE_OK != sqlite3_bind_int64(stmt, 1, NAME_TITLE)) 1298 say("", "%s", sqlite3_errmsg(db)); 1299 1300 while (SQLITE_ROW == (irc = sqlite3_step(stmt))) { 1301 name = sqlite3_column_text(stmt, 0); 1302 sec = sqlite3_column_text(stmt, 1); 1303 arch = sqlite3_column_text(stmt, 2); 1304 key = sqlite3_column_text(stmt, 3); 1305 say("", "%s(%s%s%s) lacks mlink \"%s\"", name, sec, 1306 '\0' == *arch ? "" : "/", 1307 '\0' == *arch ? "" : arch, key); 1308 } 1309 sqlite3_finalize(stmt); 1310 } 1311 1312 static void 1313 parse_cat(struct mpage *mpage, int fd) 1314 { 1315 FILE *stream; 1316 char *line, *p, *title; 1317 size_t len, plen, titlesz; 1318 1319 stream = (-1 == fd) ? 1320 fopen(mpage->mlinks->file, "r") : 1321 fdopen(fd, "r"); 1322 if (NULL == stream) { 1323 if (warnings) 1324 say(mpage->mlinks->file, "&fopen"); 1325 return; 1326 } 1327 1328 /* Skip to first blank line. */ 1329 1330 while (NULL != (line = fgetln(stream, &len))) 1331 if ('\n' == *line) 1332 break; 1333 1334 /* 1335 * Assume the first line that is not indented 1336 * is the first section header. Skip to it. 1337 */ 1338 1339 while (NULL != (line = fgetln(stream, &len))) 1340 if ('\n' != *line && ' ' != *line) 1341 break; 1342 1343 /* 1344 * Read up until the next section into a buffer. 1345 * Strip the leading and trailing newline from each read line, 1346 * appending a trailing space. 1347 * Ignore empty (whitespace-only) lines. 1348 */ 1349 1350 titlesz = 0; 1351 title = NULL; 1352 1353 while (NULL != (line = fgetln(stream, &len))) { 1354 if (' ' != *line || '\n' != line[len - 1]) 1355 break; 1356 while (len > 0 && isspace((unsigned char)*line)) { 1357 line++; 1358 len--; 1359 } 1360 if (1 == len) 1361 continue; 1362 title = mandoc_realloc(title, titlesz + len); 1363 memcpy(title + titlesz, line, len); 1364 titlesz += len; 1365 title[titlesz - 1] = ' '; 1366 } 1367 1368 /* 1369 * If no page content can be found, or the input line 1370 * is already the next section header, or there is no 1371 * trailing newline, reuse the page title as the page 1372 * description. 1373 */ 1374 1375 if (NULL == title || '\0' == *title) { 1376 if (warnings) 1377 say(mpage->mlinks->file, 1378 "Cannot find NAME section"); 1379 fclose(stream); 1380 free(title); 1381 return; 1382 } 1383 1384 title = mandoc_realloc(title, titlesz + 1); 1385 title[titlesz] = '\0'; 1386 1387 /* 1388 * Skip to the first dash. 1389 * Use the remaining line as the description (no more than 70 1390 * bytes). 1391 */ 1392 1393 if (NULL != (p = strstr(title, "- "))) { 1394 for (p += 2; ' ' == *p || '\b' == *p; p++) 1395 /* Skip to next word. */ ; 1396 } else { 1397 if (warnings) 1398 say(mpage->mlinks->file, 1399 "No dash in title line"); 1400 p = title; 1401 } 1402 1403 plen = strlen(p); 1404 1405 /* Strip backspace-encoding from line. */ 1406 1407 while (NULL != (line = memchr(p, '\b', plen))) { 1408 len = line - p; 1409 if (0 == len) { 1410 memmove(line, line + 1, plen--); 1411 continue; 1412 } 1413 memmove(line - 1, line + 1, plen - len); 1414 plen -= 2; 1415 } 1416 1417 mpage->desc = mandoc_strdup(p); 1418 fclose(stream); 1419 free(title); 1420 } 1421 1422 /* 1423 * Put a type/word pair into the word database for this particular file. 1424 */ 1425 static void 1426 putkey(const struct mpage *mpage, char *value, uint64_t type) 1427 { 1428 char *cp; 1429 1430 assert(NULL != value); 1431 if (TYPE_arch == type) 1432 for (cp = value; *cp; cp++) 1433 if (isupper((unsigned char)*cp)) 1434 *cp = _tolower((unsigned char)*cp); 1435 putkeys(mpage, value, strlen(value), type); 1436 } 1437 1438 /* 1439 * Grok all nodes at or below a certain mdoc node into putkey(). 1440 */ 1441 static void 1442 putmdockey(const struct mpage *mpage, 1443 const struct mdoc_node *n, uint64_t m) 1444 { 1445 1446 for ( ; NULL != n; n = n->next) { 1447 if (NULL != n->child) 1448 putmdockey(mpage, n->child, m); 1449 if (MDOC_TEXT == n->type) 1450 putkey(mpage, n->string, m); 1451 } 1452 } 1453 1454 static void 1455 parse_man(struct mpage *mpage, const struct man_node *n) 1456 { 1457 const struct man_node *head, *body; 1458 char *start, *title; 1459 char byte; 1460 size_t sz; 1461 1462 if (NULL == n) 1463 return; 1464 1465 /* 1466 * We're only searching for one thing: the first text child in 1467 * the BODY of a NAME section. Since we don't keep track of 1468 * sections in -man, run some hoops to find out whether we're in 1469 * the correct section or not. 1470 */ 1471 1472 if (MAN_BODY == n->type && MAN_SH == n->tok) { 1473 body = n; 1474 assert(body->parent); 1475 if (NULL != (head = body->parent->head) && 1476 1 == head->nchild && 1477 NULL != (head = (head->child)) && 1478 MAN_TEXT == head->type && 1479 0 == strcmp(head->string, "NAME") && 1480 NULL != body->child) { 1481 1482 /* 1483 * Suck the entire NAME section into memory. 1484 * Yes, we might run away. 1485 * But too many manuals have big, spread-out 1486 * NAME sections over many lines. 1487 */ 1488 1489 title = NULL; 1490 man_deroff(&title, body); 1491 if (NULL == title) 1492 return; 1493 1494 /* 1495 * Go through a special heuristic dance here. 1496 * Conventionally, one or more manual names are 1497 * comma-specified prior to a whitespace, then a 1498 * dash, then a description. Try to puzzle out 1499 * the name parts here. 1500 */ 1501 1502 start = title; 1503 for ( ;; ) { 1504 sz = strcspn(start, " ,"); 1505 if ('\0' == start[sz]) 1506 break; 1507 1508 byte = start[sz]; 1509 start[sz] = '\0'; 1510 1511 /* 1512 * Assume a stray trailing comma in the 1513 * name list if a name begins with a dash. 1514 */ 1515 1516 if ('-' == start[0] || 1517 ('\\' == start[0] && '-' == start[1])) 1518 break; 1519 1520 putkey(mpage, start, NAME_TITLE); 1521 1522 if (' ' == byte) { 1523 start += sz + 1; 1524 break; 1525 } 1526 1527 assert(',' == byte); 1528 start += sz + 1; 1529 while (' ' == *start) 1530 start++; 1531 } 1532 1533 if (start == title) { 1534 putkey(mpage, start, NAME_TITLE); 1535 free(title); 1536 return; 1537 } 1538 1539 while (isspace((unsigned char)*start)) 1540 start++; 1541 1542 if (0 == strncmp(start, "-", 1)) 1543 start += 1; 1544 else if (0 == strncmp(start, "\\-\\-", 4)) 1545 start += 4; 1546 else if (0 == strncmp(start, "\\-", 2)) 1547 start += 2; 1548 else if (0 == strncmp(start, "\\(en", 4)) 1549 start += 4; 1550 else if (0 == strncmp(start, "\\(em", 4)) 1551 start += 4; 1552 1553 while (' ' == *start) 1554 start++; 1555 1556 mpage->desc = mandoc_strdup(start); 1557 free(title); 1558 return; 1559 } 1560 } 1561 1562 for (n = n->child; n; n = n->next) { 1563 if (NULL != mpage->desc) 1564 break; 1565 parse_man(mpage, n); 1566 } 1567 } 1568 1569 static void 1570 parse_mdoc(struct mpage *mpage, const struct mdoc_node *n) 1571 { 1572 1573 assert(NULL != n); 1574 for (n = n->child; NULL != n; n = n->next) { 1575 switch (n->type) { 1576 case MDOC_ELEM: 1577 /* FALLTHROUGH */ 1578 case MDOC_BLOCK: 1579 /* FALLTHROUGH */ 1580 case MDOC_HEAD: 1581 /* FALLTHROUGH */ 1582 case MDOC_BODY: 1583 /* FALLTHROUGH */ 1584 case MDOC_TAIL: 1585 if (NULL != mdocs[n->tok].fp) 1586 if (0 == (*mdocs[n->tok].fp)(mpage, n)) 1587 break; 1588 if (mdocs[n->tok].mask) 1589 putmdockey(mpage, n->child, 1590 mdocs[n->tok].mask); 1591 break; 1592 default: 1593 assert(MDOC_ROOT != n->type); 1594 continue; 1595 } 1596 if (NULL != n->child) 1597 parse_mdoc(mpage, n); 1598 } 1599 } 1600 1601 static int 1602 parse_mdoc_Fd(struct mpage *mpage, const struct mdoc_node *n) 1603 { 1604 const char *start, *end; 1605 size_t sz; 1606 1607 if (SEC_SYNOPSIS != n->sec || 1608 NULL == (n = n->child) || 1609 MDOC_TEXT != n->type) 1610 return(0); 1611 1612 /* 1613 * Only consider those `Fd' macro fields that begin with an 1614 * "inclusion" token (versus, e.g., #define). 1615 */ 1616 1617 if (strcmp("#include", n->string)) 1618 return(0); 1619 1620 if (NULL == (n = n->next) || MDOC_TEXT != n->type) 1621 return(0); 1622 1623 /* 1624 * Strip away the enclosing angle brackets and make sure we're 1625 * not zero-length. 1626 */ 1627 1628 start = n->string; 1629 if ('<' == *start || '"' == *start) 1630 start++; 1631 1632 if (0 == (sz = strlen(start))) 1633 return(0); 1634 1635 end = &start[(int)sz - 1]; 1636 if ('>' == *end || '"' == *end) 1637 end--; 1638 1639 if (end > start) 1640 putkeys(mpage, start, end - start + 1, TYPE_In); 1641 return(0); 1642 } 1643 1644 static int 1645 parse_mdoc_Fn(struct mpage *mpage, const struct mdoc_node *n) 1646 { 1647 char *cp; 1648 1649 if (NULL == (n = n->child) || MDOC_TEXT != n->type) 1650 return(0); 1651 1652 /* 1653 * Parse: .Fn "struct type *name" "char *arg". 1654 * First strip away pointer symbol. 1655 * Then store the function name, then type. 1656 * Finally, store the arguments. 1657 */ 1658 1659 if (NULL == (cp = strrchr(n->string, ' '))) 1660 cp = n->string; 1661 1662 while ('*' == *cp) 1663 cp++; 1664 1665 putkey(mpage, cp, TYPE_Fn); 1666 1667 if (n->string < cp) 1668 putkeys(mpage, n->string, cp - n->string, TYPE_Ft); 1669 1670 for (n = n->next; NULL != n; n = n->next) 1671 if (MDOC_TEXT == n->type) 1672 putkey(mpage, n->string, TYPE_Fa); 1673 1674 return(0); 1675 } 1676 1677 static int 1678 parse_mdoc_Xr(struct mpage *mpage, const struct mdoc_node *n) 1679 { 1680 char *cp; 1681 1682 if (NULL == (n = n->child)) 1683 return(0); 1684 1685 if (NULL == n->next) { 1686 putkey(mpage, n->string, TYPE_Xr); 1687 return(0); 1688 } 1689 1690 mandoc_asprintf(&cp, "%s(%s)", n->string, n->next->string); 1691 putkey(mpage, cp, TYPE_Xr); 1692 free(cp); 1693 return(0); 1694 } 1695 1696 static int 1697 parse_mdoc_Nd(struct mpage *mpage, const struct mdoc_node *n) 1698 { 1699 1700 if (MDOC_BODY == n->type) 1701 mdoc_deroff(&mpage->desc, n); 1702 return(0); 1703 } 1704 1705 static int 1706 parse_mdoc_Nm(struct mpage *mpage, const struct mdoc_node *n) 1707 { 1708 1709 if (SEC_NAME == n->sec) 1710 putmdockey(mpage, n->child, NAME_TITLE); 1711 else if (SEC_SYNOPSIS == n->sec && MDOC_HEAD == n->type) 1712 putmdockey(mpage, n->child, NAME_SYN); 1713 return(0); 1714 } 1715 1716 static int 1717 parse_mdoc_Sh(struct mpage *mpage, const struct mdoc_node *n) 1718 { 1719 1720 return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type); 1721 } 1722 1723 static int 1724 parse_mdoc_head(struct mpage *mpage, const struct mdoc_node *n) 1725 { 1726 1727 return(MDOC_HEAD == n->type); 1728 } 1729 1730 static int 1731 parse_mdoc_body(struct mpage *mpage, const struct mdoc_node *n) 1732 { 1733 1734 return(MDOC_BODY == n->type); 1735 } 1736 1737 /* 1738 * Add a string to the hash table for the current manual. 1739 * Each string has a bitmask telling which macros it belongs to. 1740 * When we finish the manual, we'll dump the table. 1741 */ 1742 static void 1743 putkeys(const struct mpage *mpage, 1744 const char *cp, size_t sz, uint64_t v) 1745 { 1746 struct ohash *htab; 1747 struct str *s; 1748 const char *end; 1749 unsigned int slot; 1750 int i; 1751 1752 if (0 == sz) 1753 return; 1754 1755 if (TYPE_Nm & v) { 1756 htab = &names; 1757 v &= name_mask; 1758 name_mask &= ~NAME_FIRST; 1759 if (debug > 1) 1760 say(mpage->mlinks->file, 1761 "Adding name %*s", sz, cp); 1762 } else { 1763 htab = &strings; 1764 if (debug > 1) 1765 for (i = 0; i < mansearch_keymax; i++) 1766 if (1 << i & v) 1767 say(mpage->mlinks->file, 1768 "Adding key %s=%*s", 1769 mansearch_keynames[i], sz, cp); 1770 } 1771 1772 end = cp + sz; 1773 slot = ohash_qlookupi(htab, cp, &end); 1774 s = ohash_find(htab, slot); 1775 1776 if (NULL != s && mpage == s->mpage) { 1777 s->mask |= v; 1778 return; 1779 } else if (NULL == s) { 1780 s = mandoc_calloc(1, sizeof(struct str) + sz + 1); 1781 memcpy(s->key, cp, sz); 1782 ohash_insert(htab, slot, s); 1783 } 1784 s->mpage = mpage; 1785 s->mask = v; 1786 } 1787 1788 /* 1789 * Take a Unicode codepoint and produce its UTF-8 encoding. 1790 * This isn't the best way to do this, but it works. 1791 * The magic numbers are from the UTF-8 packaging. 1792 * They're not as scary as they seem: read the UTF-8 spec for details. 1793 */ 1794 static size_t 1795 utf8(unsigned int cp, char out[7]) 1796 { 1797 size_t rc; 1798 1799 rc = 0; 1800 if (cp <= 0x0000007F) { 1801 rc = 1; 1802 out[0] = (char)cp; 1803 } else if (cp <= 0x000007FF) { 1804 rc = 2; 1805 out[0] = (cp >> 6 & 31) | 192; 1806 out[1] = (cp & 63) | 128; 1807 } else if (cp <= 0x0000FFFF) { 1808 rc = 3; 1809 out[0] = (cp >> 12 & 15) | 224; 1810 out[1] = (cp >> 6 & 63) | 128; 1811 out[2] = (cp & 63) | 128; 1812 } else if (cp <= 0x001FFFFF) { 1813 rc = 4; 1814 out[0] = (cp >> 18 & 7) | 240; 1815 out[1] = (cp >> 12 & 63) | 128; 1816 out[2] = (cp >> 6 & 63) | 128; 1817 out[3] = (cp & 63) | 128; 1818 } else if (cp <= 0x03FFFFFF) { 1819 rc = 5; 1820 out[0] = (cp >> 24 & 3) | 248; 1821 out[1] = (cp >> 18 & 63) | 128; 1822 out[2] = (cp >> 12 & 63) | 128; 1823 out[3] = (cp >> 6 & 63) | 128; 1824 out[4] = (cp & 63) | 128; 1825 } else if (cp <= 0x7FFFFFFF) { 1826 rc = 6; 1827 out[0] = (cp >> 30 & 1) | 252; 1828 out[1] = (cp >> 24 & 63) | 128; 1829 out[2] = (cp >> 18 & 63) | 128; 1830 out[3] = (cp >> 12 & 63) | 128; 1831 out[4] = (cp >> 6 & 63) | 128; 1832 out[5] = (cp & 63) | 128; 1833 } else 1834 return(0); 1835 1836 out[rc] = '\0'; 1837 return(rc); 1838 } 1839 1840 /* 1841 * Store the rendered version of a key, or alias the pointer 1842 * if the key contains no escape sequences. 1843 */ 1844 static void 1845 render_key(struct mchars *mc, struct str *key) 1846 { 1847 size_t sz, bsz, pos; 1848 char utfbuf[7], res[6]; 1849 char *buf; 1850 const char *seq, *cpp, *val; 1851 int len, u; 1852 enum mandoc_esc esc; 1853 1854 assert(NULL == key->rendered); 1855 1856 res[0] = '\\'; 1857 res[1] = '\t'; 1858 res[2] = ASCII_NBRSP; 1859 res[3] = ASCII_HYPH; 1860 res[4] = ASCII_BREAK; 1861 res[5] = '\0'; 1862 1863 val = key->key; 1864 bsz = strlen(val); 1865 1866 /* 1867 * Pre-check: if we have no stop-characters, then set the 1868 * pointer as ourselvse and get out of here. 1869 */ 1870 if (strcspn(val, res) == bsz) { 1871 key->rendered = key->key; 1872 return; 1873 } 1874 1875 /* Pre-allocate by the length of the input */ 1876 1877 buf = mandoc_malloc(++bsz); 1878 pos = 0; 1879 1880 while ('\0' != *val) { 1881 /* 1882 * Halt on the first escape sequence. 1883 * This also halts on the end of string, in which case 1884 * we just copy, fallthrough, and exit the loop. 1885 */ 1886 if ((sz = strcspn(val, res)) > 0) { 1887 memcpy(&buf[pos], val, sz); 1888 pos += sz; 1889 val += sz; 1890 } 1891 1892 switch (*val) { 1893 case ASCII_HYPH: 1894 buf[pos++] = '-'; 1895 val++; 1896 continue; 1897 case '\t': 1898 /* FALLTHROUGH */ 1899 case ASCII_NBRSP: 1900 buf[pos++] = ' '; 1901 val++; 1902 /* FALLTHROUGH */ 1903 case ASCII_BREAK: 1904 continue; 1905 default: 1906 break; 1907 } 1908 if ('\\' != *val) 1909 break; 1910 1911 /* Read past the slash. */ 1912 1913 val++; 1914 1915 /* 1916 * Parse the escape sequence and see if it's a 1917 * predefined character or special character. 1918 */ 1919 1920 esc = mandoc_escape((const char **)&val, 1921 &seq, &len); 1922 if (ESCAPE_ERROR == esc) 1923 break; 1924 if (ESCAPE_SPECIAL != esc) 1925 continue; 1926 1927 /* 1928 * Render the special character 1929 * as either UTF-8 or ASCII. 1930 */ 1931 1932 if (write_utf8) { 1933 if (0 == (u = mchars_spec2cp(mc, seq, len))) 1934 continue; 1935 cpp = utfbuf; 1936 if (0 == (sz = utf8(u, utfbuf))) 1937 continue; 1938 sz = strlen(cpp); 1939 } else { 1940 cpp = mchars_spec2str(mc, seq, len, &sz); 1941 if (NULL == cpp) 1942 continue; 1943 if (ASCII_NBRSP == *cpp) { 1944 cpp = " "; 1945 sz = 1; 1946 } 1947 } 1948 1949 /* Copy the rendered glyph into the stream. */ 1950 1951 bsz += sz; 1952 buf = mandoc_realloc(buf, bsz); 1953 memcpy(&buf[pos], cpp, sz); 1954 pos += sz; 1955 } 1956 1957 buf[pos] = '\0'; 1958 key->rendered = buf; 1959 } 1960 1961 static void 1962 dbadd_mlink(const struct mlink *mlink) 1963 { 1964 size_t i; 1965 1966 i = 1; 1967 SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->dsec); 1968 SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->arch); 1969 SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->name); 1970 SQL_BIND_INT64(stmts[STMT_INSERT_LINK], i, mlink->mpage->pageid); 1971 SQL_STEP(stmts[STMT_INSERT_LINK]); 1972 sqlite3_reset(stmts[STMT_INSERT_LINK]); 1973 } 1974 1975 /* 1976 * Flush the current page's terms (and their bits) into the database. 1977 * Wrap the entire set of additions in a transaction to make sqlite be a 1978 * little faster. 1979 * Also, handle escape sequences at the last possible moment. 1980 */ 1981 static void 1982 dbadd(struct mpage *mpage, struct mchars *mc) 1983 { 1984 struct mlink *mlink; 1985 struct str *key; 1986 size_t i; 1987 unsigned int slot; 1988 1989 mlink = mpage->mlinks; 1990 1991 if (nodb) { 1992 for (key = ohash_first(&names, &slot); NULL != key; 1993 key = ohash_next(&names, &slot)) { 1994 if (key->rendered != key->key) 1995 free(key->rendered); 1996 free(key); 1997 } 1998 for (key = ohash_first(&strings, &slot); NULL != key; 1999 key = ohash_next(&strings, &slot)) { 2000 if (key->rendered != key->key) 2001 free(key->rendered); 2002 free(key); 2003 } 2004 if (0 == debug) 2005 return; 2006 while (NULL != mlink) { 2007 fputs(mlink->name, stdout); 2008 if (NULL == mlink->next || 2009 strcmp(mlink->dsec, mlink->next->dsec) || 2010 strcmp(mlink->fsec, mlink->next->fsec) || 2011 strcmp(mlink->arch, mlink->next->arch)) { 2012 putchar('('); 2013 if ('\0' == *mlink->dsec) 2014 fputs(mlink->fsec, stdout); 2015 else 2016 fputs(mlink->dsec, stdout); 2017 if ('\0' != *mlink->arch) 2018 printf("/%s", mlink->arch); 2019 putchar(')'); 2020 } 2021 mlink = mlink->next; 2022 if (NULL != mlink) 2023 fputs(", ", stdout); 2024 } 2025 printf(" - %s\n", mpage->desc); 2026 return; 2027 } 2028 2029 if (debug) 2030 say(mlink->file, "Adding to database"); 2031 2032 i = strlen(mpage->desc) + 1; 2033 key = mandoc_calloc(1, sizeof(struct str) + i); 2034 memcpy(key->key, mpage->desc, i); 2035 render_key(mc, key); 2036 2037 i = 1; 2038 SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, key->rendered); 2039 SQL_BIND_INT(stmts[STMT_INSERT_PAGE], i, FORM_SRC == mpage->form); 2040 SQL_STEP(stmts[STMT_INSERT_PAGE]); 2041 mpage->pageid = sqlite3_last_insert_rowid(db); 2042 sqlite3_reset(stmts[STMT_INSERT_PAGE]); 2043 2044 if (key->rendered != key->key) 2045 free(key->rendered); 2046 free(key); 2047 2048 while (NULL != mlink) { 2049 dbadd_mlink(mlink); 2050 mlink = mlink->next; 2051 } 2052 mlink = mpage->mlinks; 2053 2054 for (key = ohash_first(&names, &slot); NULL != key; 2055 key = ohash_next(&names, &slot)) { 2056 assert(key->mpage == mpage); 2057 if (NULL == key->rendered) 2058 render_key(mc, key); 2059 i = 1; 2060 SQL_BIND_INT64(stmts[STMT_INSERT_NAME], i, key->mask); 2061 SQL_BIND_TEXT(stmts[STMT_INSERT_NAME], i, key->rendered); 2062 SQL_BIND_INT64(stmts[STMT_INSERT_NAME], i, mpage->pageid); 2063 SQL_STEP(stmts[STMT_INSERT_NAME]); 2064 sqlite3_reset(stmts[STMT_INSERT_NAME]); 2065 if (key->rendered != key->key) 2066 free(key->rendered); 2067 free(key); 2068 } 2069 for (key = ohash_first(&strings, &slot); NULL != key; 2070 key = ohash_next(&strings, &slot)) { 2071 assert(key->mpage == mpage); 2072 if (NULL == key->rendered) 2073 render_key(mc, key); 2074 i = 1; 2075 SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask); 2076 SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->rendered); 2077 SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, mpage->pageid); 2078 SQL_STEP(stmts[STMT_INSERT_KEY]); 2079 sqlite3_reset(stmts[STMT_INSERT_KEY]); 2080 if (key->rendered != key->key) 2081 free(key->rendered); 2082 free(key); 2083 } 2084 } 2085 2086 static void 2087 dbprune(void) 2088 { 2089 struct mpage *mpage; 2090 struct mlink *mlink; 2091 size_t i; 2092 unsigned int slot; 2093 2094 if (0 == nodb) 2095 SQL_EXEC("BEGIN TRANSACTION"); 2096 2097 for (mpage = ohash_first(&mpages, &slot); NULL != mpage; 2098 mpage = ohash_next(&mpages, &slot)) { 2099 mlink = mpage->mlinks; 2100 if (debug) 2101 say(mlink->file, "Deleting from database"); 2102 if (nodb) 2103 continue; 2104 for ( ; NULL != mlink; mlink = mlink->next) { 2105 i = 1; 2106 SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], 2107 i, mlink->dsec); 2108 SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], 2109 i, mlink->arch); 2110 SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], 2111 i, mlink->name); 2112 SQL_STEP(stmts[STMT_DELETE_PAGE]); 2113 sqlite3_reset(stmts[STMT_DELETE_PAGE]); 2114 } 2115 } 2116 2117 if (0 == nodb) 2118 SQL_EXEC("END TRANSACTION"); 2119 } 2120 2121 /* 2122 * Close an existing database and its prepared statements. 2123 * If "real" is not set, rename the temporary file into the real one. 2124 */ 2125 static void 2126 dbclose(int real) 2127 { 2128 size_t i; 2129 int status; 2130 pid_t child; 2131 2132 if (nodb) 2133 return; 2134 2135 for (i = 0; i < STMT__MAX; i++) { 2136 sqlite3_finalize(stmts[i]); 2137 stmts[i] = NULL; 2138 } 2139 2140 sqlite3_close(db); 2141 db = NULL; 2142 2143 if (real) 2144 return; 2145 2146 if ('\0' == *tempfilename) { 2147 if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) { 2148 exitcode = (int)MANDOCLEVEL_SYSERR; 2149 say(MANDOC_DB, "&rename"); 2150 } 2151 return; 2152 } 2153 2154 switch (child = fork()) { 2155 case -1: 2156 exitcode = (int)MANDOCLEVEL_SYSERR; 2157 say("", "&fork cmp"); 2158 return; 2159 case 0: 2160 execlp("cmp", "cmp", "-s", 2161 tempfilename, MANDOC_DB, NULL); 2162 say("", "&exec cmp"); 2163 exit(0); 2164 default: 2165 break; 2166 } 2167 if (-1 == waitpid(child, &status, 0)) { 2168 exitcode = (int)MANDOCLEVEL_SYSERR; 2169 say("", "&wait cmp"); 2170 } else if (WIFSIGNALED(status)) { 2171 exitcode = (int)MANDOCLEVEL_SYSERR; 2172 say("", "cmp died from signal %d", WTERMSIG(status)); 2173 } else if (WEXITSTATUS(status)) { 2174 exitcode = (int)MANDOCLEVEL_SYSERR; 2175 say(MANDOC_DB, 2176 "Data changed, but cannot replace database"); 2177 } 2178 2179 *strrchr(tempfilename, '/') = '\0'; 2180 switch (child = fork()) { 2181 case -1: 2182 exitcode = (int)MANDOCLEVEL_SYSERR; 2183 say("", "&fork rm"); 2184 return; 2185 case 0: 2186 execlp("rm", "rm", "-rf", tempfilename, NULL); 2187 say("", "&exec rm"); 2188 exit((int)MANDOCLEVEL_SYSERR); 2189 default: 2190 break; 2191 } 2192 if (-1 == waitpid(child, &status, 0)) { 2193 exitcode = (int)MANDOCLEVEL_SYSERR; 2194 say("", "&wait rm"); 2195 } else if (WIFSIGNALED(status) || WEXITSTATUS(status)) { 2196 exitcode = (int)MANDOCLEVEL_SYSERR; 2197 say("", "%s: Cannot remove temporary directory", 2198 tempfilename); 2199 } 2200 } 2201 2202 /* 2203 * This is straightforward stuff. 2204 * Open a database connection to a "temporary" database, then open a set 2205 * of prepared statements we'll use over and over again. 2206 * If "real" is set, we use the existing database; if not, we truncate a 2207 * temporary one. 2208 * Must be matched by dbclose(). 2209 */ 2210 static int 2211 dbopen(int real) 2212 { 2213 const char *sql; 2214 int rc, ofl; 2215 2216 if (nodb) 2217 return(1); 2218 2219 *tempfilename = '\0'; 2220 ofl = SQLITE_OPEN_READWRITE; 2221 2222 if (real) { 2223 rc = sqlite3_open_v2(MANDOC_DB, &db, ofl, NULL); 2224 if (SQLITE_OK != rc) { 2225 exitcode = (int)MANDOCLEVEL_SYSERR; 2226 if (SQLITE_CANTOPEN != rc) 2227 say(MANDOC_DB, "%s", sqlite3_errstr(rc)); 2228 return(0); 2229 } 2230 goto prepare_statements; 2231 } 2232 2233 ofl |= SQLITE_OPEN_CREATE | SQLITE_OPEN_EXCLUSIVE; 2234 2235 remove(MANDOC_DB "~"); 2236 rc = sqlite3_open_v2(MANDOC_DB "~", &db, ofl, NULL); 2237 if (SQLITE_OK == rc) 2238 goto create_tables; 2239 if (MPARSE_QUICK & mparse_options) { 2240 exitcode = (int)MANDOCLEVEL_SYSERR; 2241 say(MANDOC_DB "~", "%s", sqlite3_errstr(rc)); 2242 return(0); 2243 } 2244 2245 (void)strlcpy(tempfilename, "/tmp/mandocdb.XXXXXX", 2246 sizeof(tempfilename)); 2247 if (NULL == mkdtemp(tempfilename)) { 2248 exitcode = (int)MANDOCLEVEL_SYSERR; 2249 say("", "&%s", tempfilename); 2250 return(0); 2251 } 2252 (void)strlcat(tempfilename, "/" MANDOC_DB, 2253 sizeof(tempfilename)); 2254 rc = sqlite3_open_v2(tempfilename, &db, ofl, NULL); 2255 if (SQLITE_OK != rc) { 2256 exitcode = (int)MANDOCLEVEL_SYSERR; 2257 say("", "%s: %s", tempfilename, sqlite3_errstr(rc)); 2258 return(0); 2259 } 2260 2261 create_tables: 2262 sql = "CREATE TABLE \"mpages\" (\n" 2263 " \"desc\" TEXT NOT NULL,\n" 2264 " \"form\" INTEGER NOT NULL,\n" 2265 " \"pageid\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n" 2266 ");\n" 2267 "\n" 2268 "CREATE TABLE \"mlinks\" (\n" 2269 " \"sec\" TEXT NOT NULL,\n" 2270 " \"arch\" TEXT NOT NULL,\n" 2271 " \"name\" TEXT NOT NULL,\n" 2272 " \"pageid\" INTEGER NOT NULL REFERENCES mpages(pageid) " 2273 "ON DELETE CASCADE\n" 2274 ");\n" 2275 "CREATE INDEX mlinks_pageid_idx ON mlinks (pageid);\n" 2276 "\n" 2277 "CREATE TABLE \"names\" (\n" 2278 " \"bits\" INTEGER NOT NULL,\n" 2279 " \"name\" TEXT NOT NULL,\n" 2280 " \"pageid\" INTEGER NOT NULL REFERENCES mpages(pageid) " 2281 "ON DELETE CASCADE\n" 2282 ");\n" 2283 "\n" 2284 "CREATE TABLE \"keys\" (\n" 2285 " \"bits\" INTEGER NOT NULL,\n" 2286 " \"key\" TEXT NOT NULL,\n" 2287 " \"pageid\" INTEGER NOT NULL REFERENCES mpages(pageid) " 2288 "ON DELETE CASCADE\n" 2289 ");\n" 2290 "CREATE INDEX keys_pageid_idx ON keys (pageid);\n"; 2291 2292 if (SQLITE_OK != sqlite3_exec(db, sql, NULL, NULL, NULL)) { 2293 exitcode = (int)MANDOCLEVEL_SYSERR; 2294 say(MANDOC_DB, "%s", sqlite3_errmsg(db)); 2295 sqlite3_close(db); 2296 return(0); 2297 } 2298 2299 prepare_statements: 2300 if (SQLITE_OK != sqlite3_exec(db, 2301 "PRAGMA foreign_keys = ON", NULL, NULL, NULL)) { 2302 exitcode = (int)MANDOCLEVEL_SYSERR; 2303 say(MANDOC_DB, "PRAGMA foreign_keys: %s", 2304 sqlite3_errmsg(db)); 2305 sqlite3_close(db); 2306 return(0); 2307 } 2308 2309 sql = "DELETE FROM mpages WHERE pageid IN " 2310 "(SELECT pageid FROM mlinks WHERE " 2311 "sec=? AND arch=? AND name=?)"; 2312 sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE_PAGE], NULL); 2313 sql = "INSERT INTO mpages " 2314 "(desc,form) VALUES (?,?)"; 2315 sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_PAGE], NULL); 2316 sql = "INSERT INTO mlinks " 2317 "(sec,arch,name,pageid) VALUES (?,?,?,?)"; 2318 sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_LINK], NULL); 2319 sql = "INSERT INTO names " 2320 "(bits,name,pageid) VALUES (?,?,?)"; 2321 sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_NAME], NULL); 2322 sql = "INSERT INTO keys " 2323 "(bits,key,pageid) VALUES (?,?,?)"; 2324 sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_KEY], NULL); 2325 2326 /* 2327 * When opening a new database, we can turn off 2328 * synchronous mode for much better performance. 2329 */ 2330 2331 if (real && SQLITE_OK != sqlite3_exec(db, 2332 "PRAGMA synchronous = OFF", NULL, NULL, NULL)) { 2333 exitcode = (int)MANDOCLEVEL_SYSERR; 2334 say(MANDOC_DB, "PRAGMA synchronous: %s", 2335 sqlite3_errmsg(db)); 2336 sqlite3_close(db); 2337 return(0); 2338 } 2339 2340 return(1); 2341 } 2342 2343 static void * 2344 hash_calloc(size_t n, size_t sz, void *arg) 2345 { 2346 2347 return(mandoc_calloc(n, sz)); 2348 } 2349 2350 static void * 2351 hash_alloc(size_t sz, void *arg) 2352 { 2353 2354 return(mandoc_malloc(sz)); 2355 } 2356 2357 static void 2358 hash_free(void *p, void *arg) 2359 { 2360 2361 free(p); 2362 } 2363 2364 static int 2365 set_basedir(const char *targetdir) 2366 { 2367 static char startdir[PATH_MAX]; 2368 static int getcwd_status; /* 1 = ok, 2 = failure */ 2369 static int chdir_status; /* 1 = changed directory */ 2370 char *cp; 2371 2372 /* 2373 * Remember the original working directory, if possible. 2374 * This will be needed if the second or a later directory 2375 * on the command line is given as a relative path. 2376 * Do not error out if the current directory is not 2377 * searchable: Maybe it won't be needed after all. 2378 */ 2379 if (0 == getcwd_status) { 2380 if (NULL == getcwd(startdir, sizeof(startdir))) { 2381 getcwd_status = 2; 2382 (void)strlcpy(startdir, strerror(errno), 2383 sizeof(startdir)); 2384 } else 2385 getcwd_status = 1; 2386 } 2387 2388 /* 2389 * We are leaving the old base directory. 2390 * Do not use it any longer, not even for messages. 2391 */ 2392 *basedir = '\0'; 2393 2394 /* 2395 * If and only if the directory was changed earlier and 2396 * the next directory to process is given as a relative path, 2397 * first go back, or bail out if that is impossible. 2398 */ 2399 if (chdir_status && '/' != *targetdir) { 2400 if (2 == getcwd_status) { 2401 exitcode = (int)MANDOCLEVEL_SYSERR; 2402 say("", "getcwd: %s", startdir); 2403 return(0); 2404 } 2405 if (-1 == chdir(startdir)) { 2406 exitcode = (int)MANDOCLEVEL_SYSERR; 2407 say("", "&chdir %s", startdir); 2408 return(0); 2409 } 2410 } 2411 2412 /* 2413 * Always resolve basedir to the canonicalized absolute 2414 * pathname and append a trailing slash, such that 2415 * we can reliably check whether files are inside. 2416 */ 2417 if (NULL == realpath(targetdir, basedir)) { 2418 exitcode = (int)MANDOCLEVEL_BADARG; 2419 say("", "&%s: realpath", targetdir); 2420 return(0); 2421 } else if (-1 == chdir(basedir)) { 2422 exitcode = (int)MANDOCLEVEL_BADARG; 2423 say("", "&chdir"); 2424 return(0); 2425 } 2426 chdir_status = 1; 2427 cp = strchr(basedir, '\0'); 2428 if ('/' != cp[-1]) { 2429 if (cp - basedir >= PATH_MAX - 1) { 2430 exitcode = (int)MANDOCLEVEL_SYSERR; 2431 say("", "Filename too long"); 2432 return(0); 2433 } 2434 *cp++ = '/'; 2435 *cp = '\0'; 2436 } 2437 return(1); 2438 } 2439 2440 static void 2441 say(const char *file, const char *format, ...) 2442 { 2443 va_list ap; 2444 int use_errno; 2445 2446 if ('\0' != *basedir) 2447 fprintf(stderr, "%s", basedir); 2448 if ('\0' != *basedir && '\0' != *file) 2449 fputc('/', stderr); 2450 if ('\0' != *file) 2451 fprintf(stderr, "%s", file); 2452 2453 use_errno = 1; 2454 if (NULL != format) { 2455 switch (*format) { 2456 case '&': 2457 format++; 2458 break; 2459 case '\0': 2460 format = NULL; 2461 break; 2462 default: 2463 use_errno = 0; 2464 break; 2465 } 2466 } 2467 if (NULL != format) { 2468 if ('\0' != *basedir || '\0' != *file) 2469 fputs(": ", stderr); 2470 va_start(ap, format); 2471 vfprintf(stderr, format, ap); 2472 va_end(ap); 2473 } 2474 if (use_errno) { 2475 if ('\0' != *basedir || '\0' != *file || NULL != format) 2476 fputs(": ", stderr); 2477 perror(NULL); 2478 } else 2479 fputc('\n', stderr); 2480 } 2481