1*a83ec176Sschwarze /* $OpenBSD: mandocdb.c,v 1.221 2024/05/14 21:12:44 schwarze Exp $ */
23899e304Sschwarze /*
3*a83ec176Sschwarze * Copyright (c) 2011-2021, 2024 Ingo Schwarze <schwarze@openbsd.org>
46a6803e4Sschwarze * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
50a4bed2cSschwarze * Copyright (c) 2016 Ed Maste <emaste@freebsd.org>
63899e304Sschwarze *
73899e304Sschwarze * Permission to use, copy, modify, and distribute this software for any
83899e304Sschwarze * purpose with or without fee is hereby granted, provided that the above
93899e304Sschwarze * copyright notice and this permission notice appear in all copies.
103899e304Sschwarze *
114de77decSschwarze * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
123899e304Sschwarze * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
134de77decSschwarze * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
143899e304Sschwarze * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
153899e304Sschwarze * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
163899e304Sschwarze * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
173899e304Sschwarze * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
186a6803e4Sschwarze *
196a6803e4Sschwarze * Implementation of the makewhatis(8) program.
203899e304Sschwarze */
21d395d87cSschwarze #include <sys/types.h>
221ed2eb27Sschwarze #include <sys/mman.h>
23eea1c63dSschwarze #include <sys/stat.h>
243899e304Sschwarze
253899e304Sschwarze #include <assert.h>
260bf0a29fSschwarze #include <ctype.h>
27eba1598bSschwarze #include <err.h>
28862132a7Sschwarze #include <errno.h>
293899e304Sschwarze #include <fcntl.h>
30eea1c63dSschwarze #include <fts.h>
31b0f904e9Sschwarze #include <limits.h>
32ff2dbb0fSschwarze #include <stdarg.h>
33eea1c63dSschwarze #include <stddef.h>
343899e304Sschwarze #include <stdio.h>
353899e304Sschwarze #include <stdint.h>
363899e304Sschwarze #include <stdlib.h>
373899e304Sschwarze #include <string.h>
38ed88dd7eSschwarze #include <unistd.h>
393899e304Sschwarze
40d1982c71Sschwarze #include "mandoc_aux.h"
41c4b66caeSschwarze #include "mandoc_ohash.h"
42d1982c71Sschwarze #include "mandoc.h"
43d1982c71Sschwarze #include "roff.h"
443899e304Sschwarze #include "mdoc.h"
45eea1c63dSschwarze #include "man.h"
4699acaf1eSschwarze #include "mandoc_parse.h"
474de77decSschwarze #include "manconf.h"
48eea1c63dSschwarze #include "mansearch.h"
49ff2dbb0fSschwarze #include "dba_array.h"
50ff2dbb0fSschwarze #include "dba.h"
513899e304Sschwarze
52a683219bSschwarze extern const char *const mansearch_keynames[];
53a683219bSschwarze
543899e304Sschwarze enum op {
55f8a05325Sschwarze OP_DEFAULT = 0, /* new dbs from dir list or default config */
56f8a05325Sschwarze OP_CONFFILE, /* new databases from custom config file */
57ae144658Sschwarze OP_UPDATE, /* delete/add entries in existing database */
58f8a05325Sschwarze OP_DELETE, /* delete entries from existing database */
59f8a05325Sschwarze OP_TEST /* change no databases, report potential problems */
603899e304Sschwarze };
613899e304Sschwarze
62eea1c63dSschwarze struct str {
63eea1c63dSschwarze const struct mpage *mpage; /* if set, the owning parse */
64eea1c63dSschwarze uint64_t mask; /* bitmask in sequence */
65803ae22eSschwarze char key[]; /* rendered text */
66eea1c63dSschwarze };
673899e304Sschwarze
68eea1c63dSschwarze struct inodev {
69eea1c63dSschwarze ino_t st_ino;
70eea1c63dSschwarze dev_t st_dev;
71eea1c63dSschwarze };
72eea1c63dSschwarze
73eea1c63dSschwarze struct mpage {
74eea1c63dSschwarze struct inodev inodev; /* used for hashing routine */
75ff2dbb0fSschwarze struct dba_array *dba;
76eea1c63dSschwarze char *sec; /* section from file content */
77eea1c63dSschwarze char *arch; /* architecture from file content */
78eea1c63dSschwarze char *title; /* title from file content */
79eea1c63dSschwarze char *desc; /* description from file content */
800a4bed2cSschwarze struct mpage *next; /* singly linked list */
81eea1c63dSschwarze struct mlink *mlinks; /* singly linked list */
828cea0557Sschwarze int name_head_done;
83ff2dbb0fSschwarze enum form form; /* format from file content */
84eea1c63dSschwarze };
85eea1c63dSschwarze
86eea1c63dSschwarze struct mlink {
87eea1c63dSschwarze char file[PATH_MAX]; /* filename rel. to manpath */
88eea1c63dSschwarze char *dsec; /* section from directory */
89eea1c63dSschwarze char *arch; /* architecture from directory */
90eea1c63dSschwarze char *name; /* name from file name (not empty) */
91eea1c63dSschwarze char *fsec; /* section from file name suffix */
92eea1c63dSschwarze struct mlink *next; /* singly linked list */
93b493b720Sschwarze struct mpage *mpage; /* parent */
94dba9b9e4Sschwarze int gzip; /* filename has a .gz suffix */
95ff2dbb0fSschwarze enum form dform; /* format from directory */
96ff2dbb0fSschwarze enum form fform; /* format from file name suffix */
97eea1c63dSschwarze };
98eea1c63dSschwarze
992a238f45Sschwarze typedef int (*mdoc_fp)(struct mpage *, const struct roff_meta *,
1003a0d07afSschwarze const struct roff_node *);
1013899e304Sschwarze
102d5d5d64fSschwarze struct mdoc_handler {
103eea1c63dSschwarze mdoc_fp fp; /* optional handler */
104eea1c63dSschwarze uint64_t mask; /* set unless handler returns 0 */
105816c3c54Sschwarze int taboo; /* node flags that must not be set */
106d5d5d64fSschwarze };
107d5d5d64fSschwarze
1089398f94cSschwarze
1099398f94cSschwarze int mandocdb(int, char *[]);
1109398f94cSschwarze
111ff2dbb0fSschwarze static void dbadd(struct dba *, struct mpage *);
1126a6803e4Sschwarze static void dbadd_mlink(const struct mlink *);
113ff2dbb0fSschwarze static void dbprune(struct dba *);
114ff2dbb0fSschwarze static void dbwrite(struct dba *);
115eea1c63dSschwarze static void filescan(const char *);
1160a4bed2cSschwarze static int fts_compare(const FTSENT **, const FTSENT **);
117eea1c63dSschwarze static void mlink_add(struct mlink *, const struct stat *);
118058df53fSschwarze static void mlink_check(struct mpage *, struct mlink *);
119eea1c63dSschwarze static void mlink_free(struct mlink *);
120eea1c63dSschwarze static void mlinks_undupe(struct mpage *);
121eea1c63dSschwarze static void mpages_free(void);
122ff2dbb0fSschwarze static void mpages_merge(struct dba *, struct mparse *);
123dba9b9e4Sschwarze static void parse_cat(struct mpage *, int);
1242a238f45Sschwarze static void parse_man(struct mpage *, const struct roff_meta *,
1253a0d07afSschwarze const struct roff_node *);
1262a238f45Sschwarze static void parse_mdoc(struct mpage *, const struct roff_meta *,
1273a0d07afSschwarze const struct roff_node *);
1282a238f45Sschwarze static int parse_mdoc_head(struct mpage *, const struct roff_meta *,
1293a0d07afSschwarze const struct roff_node *);
1303ddcc9e8Sschwarze static int parse_mdoc_Fa(struct mpage *, const struct roff_meta *,
1313ddcc9e8Sschwarze const struct roff_node *);
1322a238f45Sschwarze static int parse_mdoc_Fd(struct mpage *, const struct roff_meta *,
1333a0d07afSschwarze const struct roff_node *);
1343a0d07afSschwarze static void parse_mdoc_fname(struct mpage *, const struct roff_node *);
1352a238f45Sschwarze static int parse_mdoc_Fn(struct mpage *, const struct roff_meta *,
1363a0d07afSschwarze const struct roff_node *);
1372a238f45Sschwarze static int parse_mdoc_Fo(struct mpage *, const struct roff_meta *,
1383a0d07afSschwarze const struct roff_node *);
1392a238f45Sschwarze static int parse_mdoc_Nd(struct mpage *, const struct roff_meta *,
1403a0d07afSschwarze const struct roff_node *);
1412a238f45Sschwarze static int parse_mdoc_Nm(struct mpage *, const struct roff_meta *,
1423a0d07afSschwarze const struct roff_node *);
1432a238f45Sschwarze static int parse_mdoc_Sh(struct mpage *, const struct roff_meta *,
1443a0d07afSschwarze const struct roff_node *);
14529945e56Sschwarze static int parse_mdoc_Va(struct mpage *, const struct roff_meta *,
14629945e56Sschwarze const struct roff_node *);
1472a238f45Sschwarze static int parse_mdoc_Xr(struct mpage *, const struct roff_meta *,
1483a0d07afSschwarze const struct roff_node *);
14930ca5b8eSschwarze static void putkey(const struct mpage *, char *, uint64_t);
150803ae22eSschwarze static void putkeys(const struct mpage *, char *, size_t, uint64_t);
151eea1c63dSschwarze static void putmdockey(const struct mpage *,
152816c3c54Sschwarze const struct roff_node *, uint64_t, int);
153803ae22eSschwarze static int render_string(char **, size_t *);
15457c6a104Sschwarze static void say(const char *, const char *, ...)
155e6187497Sschwarze __attribute__((__format__ (__printf__, 2, 3)));
156bf40fec9Sschwarze static int set_basedir(const char *, int);
157eea1c63dSschwarze static int treescan(void);
158*a83ec176Sschwarze static size_t utf8(unsigned int, char[5]);
159eea1c63dSschwarze
160eea1c63dSschwarze static int nodb; /* no database changes */
161fee846f0Sschwarze static int mparse_options; /* abort the parse early */
162f1da507fSschwarze static int use_all; /* use all found files */
163dada979aSschwarze static int debug; /* print what we're doing */
164eea1c63dSschwarze static int warnings; /* warn about crap */
165562403eaSschwarze static int write_utf8; /* write UTF-8 output; else ASCII */
166eea1c63dSschwarze static int exitcode; /* to be returned by main */
167eea1c63dSschwarze static enum op op; /* operational mode */
168eea1c63dSschwarze static char basedir[PATH_MAX]; /* current base directory */
169fa397007Sschwarze static size_t basedir_len; /* strlen(basedir) */
1700a4bed2cSschwarze static struct mpage *mpage_head; /* list of distinct manual pages */
171eea1c63dSschwarze static struct ohash mpages; /* table of distinct manual pages */
172eea1c63dSschwarze static struct ohash mlinks; /* table of directory entries */
173cb2bcd5aSschwarze static struct ohash names; /* table of all names */
174eea1c63dSschwarze static struct ohash strings; /* table of all strings */
175cb2bcd5aSschwarze static uint64_t name_mask;
176eea1c63dSschwarze
17716fe0cfcSschwarze static const struct mdoc_handler mdoc_handlers[MDOC_MAX - MDOC_Dd] = {
178816c3c54Sschwarze { NULL, 0, NODE_NOPRT }, /* Dd */
179816c3c54Sschwarze { NULL, 0, NODE_NOPRT }, /* Dt */
180816c3c54Sschwarze { NULL, 0, NODE_NOPRT }, /* Os */
181816c3c54Sschwarze { parse_mdoc_Sh, TYPE_Sh, 0 }, /* Sh */
182816c3c54Sschwarze { parse_mdoc_head, TYPE_Ss, 0 }, /* Ss */
183816c3c54Sschwarze { NULL, 0, 0 }, /* Pp */
184816c3c54Sschwarze { NULL, 0, 0 }, /* D1 */
185816c3c54Sschwarze { NULL, 0, 0 }, /* Dl */
186816c3c54Sschwarze { NULL, 0, 0 }, /* Bd */
187816c3c54Sschwarze { NULL, 0, 0 }, /* Ed */
188816c3c54Sschwarze { NULL, 0, 0 }, /* Bl */
189816c3c54Sschwarze { NULL, 0, 0 }, /* El */
190816c3c54Sschwarze { NULL, 0, 0 }, /* It */
191816c3c54Sschwarze { NULL, 0, 0 }, /* Ad */
192816c3c54Sschwarze { NULL, TYPE_An, 0 }, /* An */
19314a309e3Sschwarze { NULL, 0, 0 }, /* Ap */
194816c3c54Sschwarze { NULL, TYPE_Ar, 0 }, /* Ar */
195816c3c54Sschwarze { NULL, TYPE_Cd, 0 }, /* Cd */
196816c3c54Sschwarze { NULL, TYPE_Cm, 0 }, /* Cm */
197816c3c54Sschwarze { NULL, TYPE_Dv, 0 }, /* Dv */
198816c3c54Sschwarze { NULL, TYPE_Er, 0 }, /* Er */
199816c3c54Sschwarze { NULL, TYPE_Ev, 0 }, /* Ev */
200816c3c54Sschwarze { NULL, 0, 0 }, /* Ex */
2013ddcc9e8Sschwarze { parse_mdoc_Fa, 0, 0 }, /* Fa */
202816c3c54Sschwarze { parse_mdoc_Fd, 0, 0 }, /* Fd */
203816c3c54Sschwarze { NULL, TYPE_Fl, 0 }, /* Fl */
204816c3c54Sschwarze { parse_mdoc_Fn, 0, 0 }, /* Fn */
2053ddcc9e8Sschwarze { NULL, TYPE_Ft | TYPE_Vt, 0 }, /* Ft */
206816c3c54Sschwarze { NULL, TYPE_Ic, 0 }, /* Ic */
207816c3c54Sschwarze { NULL, TYPE_In, 0 }, /* In */
208816c3c54Sschwarze { NULL, TYPE_Li, 0 }, /* Li */
209816c3c54Sschwarze { parse_mdoc_Nd, 0, 0 }, /* Nd */
210816c3c54Sschwarze { parse_mdoc_Nm, 0, 0 }, /* Nm */
211816c3c54Sschwarze { NULL, 0, 0 }, /* Op */
212816c3c54Sschwarze { NULL, 0, 0 }, /* Ot */
213816c3c54Sschwarze { NULL, TYPE_Pa, NODE_NOSRC }, /* Pa */
214816c3c54Sschwarze { NULL, 0, 0 }, /* Rv */
215816c3c54Sschwarze { NULL, TYPE_St, 0 }, /* St */
216816c3c54Sschwarze { parse_mdoc_Va, TYPE_Va, 0 }, /* Va */
217816c3c54Sschwarze { parse_mdoc_Va, TYPE_Vt, 0 }, /* Vt */
218816c3c54Sschwarze { parse_mdoc_Xr, 0, 0 }, /* Xr */
219816c3c54Sschwarze { NULL, 0, 0 }, /* %A */
220816c3c54Sschwarze { NULL, 0, 0 }, /* %B */
221816c3c54Sschwarze { NULL, 0, 0 }, /* %D */
222816c3c54Sschwarze { NULL, 0, 0 }, /* %I */
223816c3c54Sschwarze { NULL, 0, 0 }, /* %J */
224816c3c54Sschwarze { NULL, 0, 0 }, /* %N */
225816c3c54Sschwarze { NULL, 0, 0 }, /* %O */
226816c3c54Sschwarze { NULL, 0, 0 }, /* %P */
227816c3c54Sschwarze { NULL, 0, 0 }, /* %R */
228816c3c54Sschwarze { NULL, 0, 0 }, /* %T */
229816c3c54Sschwarze { NULL, 0, 0 }, /* %V */
230816c3c54Sschwarze { NULL, 0, 0 }, /* Ac */
231816c3c54Sschwarze { NULL, 0, 0 }, /* Ao */
232816c3c54Sschwarze { NULL, 0, 0 }, /* Aq */
2338ccddcd3Sschwarze { NULL, TYPE_At, 0 }, /* At */
234816c3c54Sschwarze { NULL, 0, 0 }, /* Bc */
235816c3c54Sschwarze { NULL, 0, 0 }, /* Bf */
236816c3c54Sschwarze { NULL, 0, 0 }, /* Bo */
237816c3c54Sschwarze { NULL, 0, 0 }, /* Bq */
238816c3c54Sschwarze { NULL, TYPE_Bsx, NODE_NOSRC }, /* Bsx */
2393af8e8d7Sschwarze { NULL, TYPE_Bx, NODE_NOSRC }, /* Bx */
240816c3c54Sschwarze { NULL, 0, 0 }, /* Db */
241816c3c54Sschwarze { NULL, 0, 0 }, /* Dc */
242816c3c54Sschwarze { NULL, 0, 0 }, /* Do */
243816c3c54Sschwarze { NULL, 0, 0 }, /* Dq */
244816c3c54Sschwarze { NULL, 0, 0 }, /* Ec */
245816c3c54Sschwarze { NULL, 0, 0 }, /* Ef */
246816c3c54Sschwarze { NULL, TYPE_Em, 0 }, /* Em */
247816c3c54Sschwarze { NULL, 0, 0 }, /* Eo */
248816c3c54Sschwarze { NULL, TYPE_Fx, NODE_NOSRC }, /* Fx */
249816c3c54Sschwarze { NULL, TYPE_Ms, 0 }, /* Ms */
250816c3c54Sschwarze { NULL, 0, 0 }, /* No */
251816c3c54Sschwarze { NULL, 0, 0 }, /* Ns */
252816c3c54Sschwarze { NULL, TYPE_Nx, NODE_NOSRC }, /* Nx */
253816c3c54Sschwarze { NULL, TYPE_Ox, NODE_NOSRC }, /* Ox */
254816c3c54Sschwarze { NULL, 0, 0 }, /* Pc */
255816c3c54Sschwarze { NULL, 0, 0 }, /* Pf */
256816c3c54Sschwarze { NULL, 0, 0 }, /* Po */
257816c3c54Sschwarze { NULL, 0, 0 }, /* Pq */
258816c3c54Sschwarze { NULL, 0, 0 }, /* Qc */
259816c3c54Sschwarze { NULL, 0, 0 }, /* Ql */
260816c3c54Sschwarze { NULL, 0, 0 }, /* Qo */
261816c3c54Sschwarze { NULL, 0, 0 }, /* Qq */
262816c3c54Sschwarze { NULL, 0, 0 }, /* Re */
263816c3c54Sschwarze { NULL, 0, 0 }, /* Rs */
264816c3c54Sschwarze { NULL, 0, 0 }, /* Sc */
265816c3c54Sschwarze { NULL, 0, 0 }, /* So */
266816c3c54Sschwarze { NULL, 0, 0 }, /* Sq */
267816c3c54Sschwarze { NULL, 0, 0 }, /* Sm */
268816c3c54Sschwarze { NULL, 0, 0 }, /* Sx */
269816c3c54Sschwarze { NULL, TYPE_Sy, 0 }, /* Sy */
270816c3c54Sschwarze { NULL, TYPE_Tn, 0 }, /* Tn */
271816c3c54Sschwarze { NULL, 0, NODE_NOSRC }, /* Ux */
272816c3c54Sschwarze { NULL, 0, 0 }, /* Xc */
273816c3c54Sschwarze { NULL, 0, 0 }, /* Xo */
274816c3c54Sschwarze { parse_mdoc_Fo, 0, 0 }, /* Fo */
275816c3c54Sschwarze { NULL, 0, 0 }, /* Fc */
276816c3c54Sschwarze { NULL, 0, 0 }, /* Oo */
277816c3c54Sschwarze { NULL, 0, 0 }, /* Oc */
278816c3c54Sschwarze { NULL, 0, 0 }, /* Bk */
279816c3c54Sschwarze { NULL, 0, 0 }, /* Ek */
280816c3c54Sschwarze { NULL, 0, 0 }, /* Bt */
281816c3c54Sschwarze { NULL, 0, 0 }, /* Hf */
282816c3c54Sschwarze { NULL, 0, 0 }, /* Fr */
283816c3c54Sschwarze { NULL, 0, 0 }, /* Ud */
2848ccddcd3Sschwarze { NULL, TYPE_Lb, NODE_NOSRC }, /* Lb */
285816c3c54Sschwarze { NULL, 0, 0 }, /* Lp */
286816c3c54Sschwarze { NULL, TYPE_Lk, 0 }, /* Lk */
287816c3c54Sschwarze { NULL, TYPE_Mt, NODE_NOSRC }, /* Mt */
288816c3c54Sschwarze { NULL, 0, 0 }, /* Brq */
289816c3c54Sschwarze { NULL, 0, 0 }, /* Bro */
290816c3c54Sschwarze { NULL, 0, 0 }, /* Brc */
291816c3c54Sschwarze { NULL, 0, 0 }, /* %C */
292816c3c54Sschwarze { NULL, 0, 0 }, /* Es */
293816c3c54Sschwarze { NULL, 0, 0 }, /* En */
294816c3c54Sschwarze { NULL, TYPE_Dx, NODE_NOSRC }, /* Dx */
295816c3c54Sschwarze { NULL, 0, 0 }, /* %Q */
296816c3c54Sschwarze { NULL, 0, 0 }, /* %U */
297816c3c54Sschwarze { NULL, 0, 0 }, /* Ta */
2983899e304Sschwarze };
2993899e304Sschwarze
30049aff9f8Sschwarze
3013899e304Sschwarze int
mandocdb(int argc,char * argv[])3028dbd610cSschwarze mandocdb(int argc, char *argv[])
3033899e304Sschwarze {
3044de77decSschwarze struct manconf conf;
3054de77decSschwarze struct mparse *mp;
306ff2dbb0fSschwarze struct dba *dba;
3070aad8377Sschwarze const char *path_arg, *progname;
3084de77decSschwarze size_t j, sz;
3094de77decSschwarze int ch, i;
310eea1c63dSschwarze
3111ed2eb27Sschwarze if (pledge("stdio rpath wpath cpath", NULL) == -1) {
312dd978576Sschwarze warn("pledge");
313648641bcSschwarze return (int)MANDOCLEVEL_SYSERR;
314648641bcSschwarze }
315648641bcSschwarze
3164de77decSschwarze memset(&conf, 0, sizeof(conf));
317eea1c63dSschwarze
318eea1c63dSschwarze /*
319eea1c63dSschwarze * We accept a few different invocations.
320eea1c63dSschwarze * The CHECKOP macro makes sure that invocation styles don't
321eea1c63dSschwarze * clobber each other.
322eea1c63dSschwarze */
323eea1c63dSschwarze #define CHECKOP(_op, _ch) do \
324fa397007Sschwarze if ((_op) != OP_DEFAULT) { \
325eba1598bSschwarze warnx("-%c: Conflicting option", (_ch)); \
326eea1c63dSschwarze goto usage; \
327eea1c63dSschwarze } while (/*CONSTCOND*/0)
32827255502Sschwarze
32970212363Sschwarze mparse_options = MPARSE_UTF8 | MPARSE_LATIN1 | MPARSE_VALIDATE;
330eea1c63dSschwarze path_arg = NULL;
331f8a05325Sschwarze op = OP_DEFAULT;
3323899e304Sschwarze
333fa397007Sschwarze while ((ch = getopt(argc, argv, "aC:Dd:npQT:tu:v")) != -1)
3343899e304Sschwarze switch (ch) {
33549aff9f8Sschwarze case 'a':
336d6ea6627Sschwarze use_all = 1;
337d6ea6627Sschwarze break;
33849aff9f8Sschwarze case 'C':
339eea1c63dSschwarze CHECKOP(op, ch);
340eea1c63dSschwarze path_arg = optarg;
341f8a05325Sschwarze op = OP_CONFFILE;
34224547daeSschwarze break;
34349aff9f8Sschwarze case 'D':
344dada979aSschwarze debug++;
345dada979aSschwarze break;
34649aff9f8Sschwarze case 'd':
347eea1c63dSschwarze CHECKOP(op, ch);
348eea1c63dSschwarze path_arg = optarg;
349ae144658Sschwarze op = OP_UPDATE;
3503899e304Sschwarze break;
35149aff9f8Sschwarze case 'n':
352eea1c63dSschwarze nodb = 1;
353eea1c63dSschwarze break;
35449aff9f8Sschwarze case 'p':
355a10a1a6fSschwarze warnings = 1;
356a10a1a6fSschwarze break;
35749aff9f8Sschwarze case 'Q':
358fee846f0Sschwarze mparse_options |= MPARSE_QUICK;
359f1da507fSschwarze break;
36049aff9f8Sschwarze case 'T':
361fa397007Sschwarze if (strcmp(optarg, "utf8") != 0) {
362eba1598bSschwarze warnx("-T%s: Unsupported output format",
363eba1598bSschwarze optarg);
364562403eaSschwarze goto usage;
365562403eaSschwarze }
366562403eaSschwarze write_utf8 = 1;
367562403eaSschwarze break;
36849aff9f8Sschwarze case 't':
369eea1c63dSschwarze CHECKOP(op, ch);
370f8a05325Sschwarze dup2(STDOUT_FILENO, STDERR_FILENO);
371f8a05325Sschwarze op = OP_TEST;
372eea1c63dSschwarze nodb = warnings = 1;
373f8a05325Sschwarze break;
37449aff9f8Sschwarze case 'u':
375eea1c63dSschwarze CHECKOP(op, ch);
376eea1c63dSschwarze path_arg = optarg;
377ae144658Sschwarze op = OP_DELETE;
3783899e304Sschwarze break;
37949aff9f8Sschwarze case 'v':
380a10a1a6fSschwarze /* Compatibility with espie@'s makewhatis. */
381f8a05325Sschwarze break;
3823899e304Sschwarze default:
383f8a05325Sschwarze goto usage;
3843899e304Sschwarze }
3853899e304Sschwarze
3863899e304Sschwarze argc -= optind;
3873899e304Sschwarze argv += optind;
3883899e304Sschwarze
3892571552dSschwarze if (nodb) {
3902571552dSschwarze if (pledge("stdio rpath", NULL) == -1) {
391dd978576Sschwarze warn("pledge");
392648641bcSschwarze return (int)MANDOCLEVEL_SYSERR;
393648641bcSschwarze }
3942571552dSschwarze }
395648641bcSschwarze
396fa397007Sschwarze if (op == OP_CONFFILE && argc > 0) {
397eba1598bSschwarze warnx("-C: Too many arguments");
398f8a05325Sschwarze goto usage;
399f8a05325Sschwarze }
400f8a05325Sschwarze
401eea1c63dSschwarze exitcode = (int)MANDOCLEVEL_OK;
40216536faaSschwarze mchars_alloc();
403e501e731Sschwarze mp = mparse_alloc(mparse_options, MANDOC_OS_OTHER, NULL);
404c4b66caeSschwarze mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev));
405c4b66caeSschwarze mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file));
4063899e304Sschwarze
407fa397007Sschwarze if (op == OP_UPDATE || op == OP_DELETE || op == OP_TEST) {
408ae144658Sschwarze
409990911a9Sschwarze /*
41076c93bc9Sschwarze * Most of these deal with a specific directory.
411517a7a54Sschwarze * Jump into that directory first.
412990911a9Sschwarze */
413fa397007Sschwarze if (op != OP_TEST && set_basedir(path_arg, 1) == 0)
414ae144658Sschwarze goto out;
415517a7a54Sschwarze
4160e9b6a18Sschwarze dba = nodb ? dba_new(128) : dba_read(MANDOC_DB);
4170e9b6a18Sschwarze if (dba != NULL) {
418517a7a54Sschwarze /*
419517a7a54Sschwarze * The existing database is usable. Process
420517a7a54Sschwarze * all files specified on the command-line.
421517a7a54Sschwarze */
422517a7a54Sschwarze use_all = 1;
423eea1c63dSschwarze for (i = 0; i < argc; i++)
424eea1c63dSschwarze filescan(argv[i]);
4250e9b6a18Sschwarze if (nodb == 0)
426ff2dbb0fSschwarze dbprune(dba);
427a3d69d4aSschwarze } else {
428de1bf378Sschwarze /* Database missing or corrupt. */
429293af550Sschwarze if (op != OP_UPDATE || errno != ENOENT)
430293af550Sschwarze say(MANDOC_DB, "%s: Automatically recreating"
431293af550Sschwarze " from scratch", strerror(errno));
432517a7a54Sschwarze exitcode = (int)MANDOCLEVEL_OK;
433a3d69d4aSschwarze op = OP_DEFAULT;
434fa397007Sschwarze if (treescan() == 0)
435a3d69d4aSschwarze goto out;
436ff2dbb0fSschwarze dba = dba_new(128);
437a3d69d4aSschwarze }
438fa397007Sschwarze if (op != OP_DELETE)
439ff2dbb0fSschwarze mpages_merge(dba, mp);
440ff2dbb0fSschwarze if (nodb == 0)
441ff2dbb0fSschwarze dbwrite(dba);
442ff2dbb0fSschwarze dba_free(dba);
443eea1c63dSschwarze } else {
44427255502Sschwarze /*
445eea1c63dSschwarze * If we have arguments, use them as our manpaths.
446352c261eSschwarze * If we don't, use man.conf(5).
44727255502Sschwarze */
44827255502Sschwarze if (argc > 0) {
4494de77decSschwarze conf.manpath.paths = mandoc_reallocarray(NULL,
4508286bf36Sschwarze argc, sizeof(char *));
4514de77decSschwarze conf.manpath.sz = (size_t)argc;
452eea1c63dSschwarze for (i = 0; i < argc; i++)
4534de77decSschwarze conf.manpath.paths[i] = mandoc_strdup(argv[i]);
45427255502Sschwarze } else
4554de77decSschwarze manconf_parse(&conf, path_arg, NULL, NULL);
4560a056e84Sderaadt
4574de77decSschwarze if (conf.manpath.sz == 0) {
45860ebc352Sschwarze exitcode = (int)MANDOCLEVEL_BADARG;
45960ebc352Sschwarze say("", "Empty manpath");
46060ebc352Sschwarze }
46160ebc352Sschwarze
462990911a9Sschwarze /*
463eea1c63dSschwarze * First scan the tree rooted at a base directory, then
464eea1c63dSschwarze * build a new database and finally move it into place.
465eea1c63dSschwarze * Ignore zero-length directories and strip trailing
466eea1c63dSschwarze * slashes.
467990911a9Sschwarze */
4684de77decSschwarze for (j = 0; j < conf.manpath.sz; j++) {
4694de77decSschwarze sz = strlen(conf.manpath.paths[j]);
4704de77decSschwarze if (sz && conf.manpath.paths[j][sz - 1] == '/')
4714de77decSschwarze conf.manpath.paths[j][--sz] = '\0';
472fa397007Sschwarze if (sz == 0)
473eea1c63dSschwarze continue;
474990911a9Sschwarze
475eea1c63dSschwarze if (j) {
476c4b66caeSschwarze mandoc_ohash_init(&mpages, 6,
477c4b66caeSschwarze offsetof(struct mpage, inodev));
478c4b66caeSschwarze mandoc_ohash_init(&mlinks, 6,
479c4b66caeSschwarze offsetof(struct mlink, file));
48018eee2d9Sschwarze }
48118eee2d9Sschwarze
482fa397007Sschwarze if (set_basedir(conf.manpath.paths[j], argc > 0) == 0)
483bf40fec9Sschwarze continue;
484fa397007Sschwarze if (treescan() == 0)
485bf40fec9Sschwarze continue;
486ff2dbb0fSschwarze dba = dba_new(128);
487ff2dbb0fSschwarze mpages_merge(dba, mp);
488ff2dbb0fSschwarze if (nodb == 0)
489ff2dbb0fSschwarze dbwrite(dba);
490ff2dbb0fSschwarze dba_free(dba);
491862132a7Sschwarze
4924de77decSschwarze if (j + 1 < conf.manpath.sz) {
493eea1c63dSschwarze mpages_free();
494eea1c63dSschwarze ohash_delete(&mpages);
495eea1c63dSschwarze ohash_delete(&mlinks);
496862132a7Sschwarze }
497ae144658Sschwarze }
498eea1c63dSschwarze }
499ae144658Sschwarze out:
5004de77decSschwarze manconf_free(&conf);
501eea1c63dSschwarze mparse_free(mp);
50216536faaSschwarze mchars_free();
503eea1c63dSschwarze mpages_free();
504eea1c63dSschwarze ohash_delete(&mpages);
505eea1c63dSschwarze ohash_delete(&mlinks);
506526e306bSschwarze return exitcode;
507f8a05325Sschwarze usage:
5080aad8377Sschwarze progname = getprogname();
509a10a1a6fSschwarze fprintf(stderr, "usage: %s [-aDnpQ] [-C file] [-Tutf8]\n"
510a10a1a6fSschwarze " %s [-aDnpQ] [-Tutf8] dir ...\n"
511a10a1a6fSschwarze " %s [-DnpQ] [-Tutf8] -d dir [file ...]\n"
512a10a1a6fSschwarze " %s [-Dnp] -u dir [file ...]\n"
513f1da507fSschwarze " %s [-Q] -t file ...\n",
5140aad8377Sschwarze progname, progname, progname, progname, progname);
515f8a05325Sschwarze
516526e306bSschwarze return (int)MANDOCLEVEL_BADARG;
517ae144658Sschwarze }
518ae144658Sschwarze
519eea1c63dSschwarze /*
5200a4bed2cSschwarze * To get a singly linked list in alpha order while inserting entries
5210a4bed2cSschwarze * at the beginning, process directory entries in reverse alpha order.
5220a4bed2cSschwarze */
5230a4bed2cSschwarze static int
fts_compare(const FTSENT ** a,const FTSENT ** b)5240a4bed2cSschwarze fts_compare(const FTSENT **a, const FTSENT **b)
5250a4bed2cSschwarze {
5260a4bed2cSschwarze return -strcmp((*a)->fts_name, (*b)->fts_name);
5270a4bed2cSschwarze }
5280a4bed2cSschwarze
5290a4bed2cSschwarze /*
530eea1c63dSschwarze * Scan a directory tree rooted at "basedir" for manpages.
531eea1c63dSschwarze * We use fts(), scanning directory parts along the way for clues to our
532eea1c63dSschwarze * section and architecture.
533eea1c63dSschwarze *
534eea1c63dSschwarze * If use_all has been specified, grok all files.
535eea1c63dSschwarze * If not, sanitise paths to the following:
536eea1c63dSschwarze *
537eea1c63dSschwarze * [./]man*[/<arch>]/<name>.<section>
538eea1c63dSschwarze * or
539eea1c63dSschwarze * [./]cat<section>[/<arch>]/<name>.0
540eea1c63dSschwarze *
54134a62e53Skrw * TODO: accommodate for multi-language directories.
542eea1c63dSschwarze */
543eea1c63dSschwarze static int
treescan(void)544eea1c63dSschwarze treescan(void)
545ae144658Sschwarze {
546f91dff1eSschwarze char buf[PATH_MAX];
547eea1c63dSschwarze FTS *f;
548eea1c63dSschwarze FTSENT *ff;
549eea1c63dSschwarze struct mlink *mlink;
550ff2dbb0fSschwarze int gzip;
551ff2dbb0fSschwarze enum form dform;
5520af7fc1aSschwarze char *dsec, *arch, *fsec, *cp;
5530af7fc1aSschwarze const char *path;
554eea1c63dSschwarze const char *argv[2];
555ae144658Sschwarze
556eea1c63dSschwarze argv[0] = ".";
5575d1b7f4fSschwarze argv[1] = NULL;
5585918f6b7Sschwarze
5590a4bed2cSschwarze f = fts_open((char * const *)argv, FTS_PHYSICAL | FTS_NOCHDIR,
5600a4bed2cSschwarze fts_compare);
5616434b1e6Sschwarze if (f == NULL) {
562eea1c63dSschwarze exitcode = (int)MANDOCLEVEL_SYSERR;
563d007b464Sschwarze say("", "&fts_open");
564526e306bSschwarze return 0;
565f06481acSschwarze }
566f06481acSschwarze
567eea1c63dSschwarze dsec = arch = NULL;
568eea1c63dSschwarze dform = FORM_NONE;
569eea1c63dSschwarze
5706434b1e6Sschwarze while ((ff = fts_read(f)) != NULL) {
571eea1c63dSschwarze path = ff->fts_path + 2;
572f91dff1eSschwarze switch (ff->fts_info) {
573f91dff1eSschwarze
574f91dff1eSschwarze /*
575f91dff1eSschwarze * Symbolic links require various sanity checks,
576f91dff1eSschwarze * then get handled just like regular files.
577f91dff1eSschwarze */
57849aff9f8Sschwarze case FTS_SL:
5796434b1e6Sschwarze if (realpath(path, buf) == NULL) {
580f91dff1eSschwarze if (warnings)
581f91dff1eSschwarze say(path, "&realpath");
582f91dff1eSschwarze continue;
583f91dff1eSschwarze }
584fa397007Sschwarze if (strncmp(buf, basedir, basedir_len) != 0) {
585f91dff1eSschwarze if (warnings) say("",
586f91dff1eSschwarze "%s: outside base directory", buf);
587f91dff1eSschwarze continue;
588f91dff1eSschwarze }
589f91dff1eSschwarze /* Use logical inode to avoid mpages dupe. */
5906434b1e6Sschwarze if (stat(path, ff->fts_statp) == -1) {
591f91dff1eSschwarze if (warnings)
592f91dff1eSschwarze say(path, "&stat");
593f91dff1eSschwarze continue;
594f91dff1eSschwarze }
595f4d6373fSschwarze if ((ff->fts_statp->st_mode & S_IFMT) != S_IFREG)
596f4d6373fSschwarze continue;
597f91dff1eSschwarze /* FALLTHROUGH */
598f91dff1eSschwarze
599eea1c63dSschwarze /*
600eea1c63dSschwarze * If we're a regular file, add an mlink by using the
601eea1c63dSschwarze * stored directory data and handling the filename.
602eea1c63dSschwarze */
60349aff9f8Sschwarze case FTS_F:
6046434b1e6Sschwarze if ( ! strcmp(path, MANDOC_DB))
605eea1c63dSschwarze continue;
606eea1c63dSschwarze if ( ! use_all && ff->fts_level < 2) {
607eea1c63dSschwarze if (warnings)
608eea1c63dSschwarze say(path, "Extraneous file");
609eea1c63dSschwarze continue;
610dba9b9e4Sschwarze }
611dba9b9e4Sschwarze gzip = 0;
612dba9b9e4Sschwarze fsec = NULL;
6136434b1e6Sschwarze while (fsec == NULL) {
614dba9b9e4Sschwarze fsec = strrchr(ff->fts_name, '.');
6156434b1e6Sschwarze if (fsec == NULL || strcmp(fsec+1, "gz"))
616dba9b9e4Sschwarze break;
617dba9b9e4Sschwarze gzip = 1;
618dba9b9e4Sschwarze *fsec = '\0';
619dba9b9e4Sschwarze fsec = NULL;
620dba9b9e4Sschwarze }
6216434b1e6Sschwarze if (fsec == NULL) {
622eea1c63dSschwarze if ( ! use_all) {
623eea1c63dSschwarze if (warnings)
624eea1c63dSschwarze say(path,
625eea1c63dSschwarze "No filename suffix");
626eea1c63dSschwarze continue;
627eea1c63dSschwarze }
6286434b1e6Sschwarze } else if ( ! strcmp(++fsec, "html")) {
629eea1c63dSschwarze if (warnings)
630eea1c63dSschwarze say(path, "Skip html");
631eea1c63dSschwarze continue;
6326434b1e6Sschwarze } else if ( ! strcmp(fsec, "ps")) {
633eea1c63dSschwarze if (warnings)
634eea1c63dSschwarze say(path, "Skip ps");
635eea1c63dSschwarze continue;
6366434b1e6Sschwarze } else if ( ! strcmp(fsec, "pdf")) {
637eea1c63dSschwarze if (warnings)
638eea1c63dSschwarze say(path, "Skip pdf");
639eea1c63dSschwarze continue;
640eea1c63dSschwarze } else if ( ! use_all &&
6416434b1e6Sschwarze ((dform == FORM_SRC &&
642a539fd7bSschwarze strncmp(fsec, dsec, strlen(dsec))) ||
6436434b1e6Sschwarze (dform == FORM_CAT && strcmp(fsec, "0")))) {
644eea1c63dSschwarze if (warnings)
645eea1c63dSschwarze say(path, "Wrong filename suffix");
646eea1c63dSschwarze continue;
647eea1c63dSschwarze } else
648eea1c63dSschwarze fsec[-1] = '\0';
6490af7fc1aSschwarze
650eea1c63dSschwarze mlink = mandoc_calloc(1, sizeof(struct mlink));
6513617218bSschwarze if (strlcpy(mlink->file, path,
6523617218bSschwarze sizeof(mlink->file)) >=
6533617218bSschwarze sizeof(mlink->file)) {
6543617218bSschwarze say(path, "Filename too long");
6553617218bSschwarze free(mlink);
6563617218bSschwarze continue;
6573617218bSschwarze }
658eea1c63dSschwarze mlink->dform = dform;
6590af7fc1aSschwarze mlink->dsec = dsec;
6600af7fc1aSschwarze mlink->arch = arch;
6610af7fc1aSschwarze mlink->name = ff->fts_name;
6620af7fc1aSschwarze mlink->fsec = fsec;
663dba9b9e4Sschwarze mlink->gzip = gzip;
664eea1c63dSschwarze mlink_add(mlink, ff->fts_statp);
665eea1c63dSschwarze continue;
666f91dff1eSschwarze
66749aff9f8Sschwarze case FTS_D:
66849aff9f8Sschwarze case FTS_DP:
669f91dff1eSschwarze break;
670f91dff1eSschwarze
671f91dff1eSschwarze default:
672eea1c63dSschwarze if (warnings)
673eea1c63dSschwarze say(path, "Not a regular file");
674eea1c63dSschwarze continue;
675eea1c63dSschwarze }
676eea1c63dSschwarze
677eea1c63dSschwarze switch (ff->fts_level) {
67849aff9f8Sschwarze case 0:
679eea1c63dSschwarze /* Ignore the root directory. */
680eea1c63dSschwarze break;
68149aff9f8Sschwarze case 1:
682eea1c63dSschwarze /*
683eea1c63dSschwarze * This might contain manX/ or catX/.
684eea1c63dSschwarze * Try to infer this from the name.
685eea1c63dSschwarze * If we're not in use_all, enforce it.
686eea1c63dSschwarze */
687eea1c63dSschwarze cp = ff->fts_name;
6886434b1e6Sschwarze if (ff->fts_info == FTS_DP) {
6896434b1e6Sschwarze dform = FORM_NONE;
6906434b1e6Sschwarze dsec = NULL;
691eea1c63dSschwarze break;
6926434b1e6Sschwarze }
693eea1c63dSschwarze
6946434b1e6Sschwarze if ( ! strncmp(cp, "man", 3)) {
695eea1c63dSschwarze dform = FORM_SRC;
696eea1c63dSschwarze dsec = cp + 3;
6976434b1e6Sschwarze } else if ( ! strncmp(cp, "cat", 3)) {
698eea1c63dSschwarze dform = FORM_CAT;
699eea1c63dSschwarze dsec = cp + 3;
7000af7fc1aSschwarze } else {
7010af7fc1aSschwarze dform = FORM_NONE;
7020af7fc1aSschwarze dsec = NULL;
703eea1c63dSschwarze }
704eea1c63dSschwarze
7056434b1e6Sschwarze if (dsec != NULL || use_all)
706eea1c63dSschwarze break;
707eea1c63dSschwarze
708eea1c63dSschwarze if (warnings)
709eea1c63dSschwarze say(path, "Unknown directory part");
710eea1c63dSschwarze fts_set(f, ff, FTS_SKIP);
711eea1c63dSschwarze break;
71249aff9f8Sschwarze case 2:
713eea1c63dSschwarze /*
714eea1c63dSschwarze * Possibly our architecture.
715eea1c63dSschwarze * If we're descending, keep tabs on it.
716eea1c63dSschwarze */
7176434b1e6Sschwarze if (ff->fts_info != FTS_DP && dsec != NULL)
718eea1c63dSschwarze arch = ff->fts_name;
7190af7fc1aSschwarze else
7200af7fc1aSschwarze arch = NULL;
721eea1c63dSschwarze break;
722eea1c63dSschwarze default:
7236434b1e6Sschwarze if (ff->fts_info == FTS_DP || use_all)
724eea1c63dSschwarze break;
725eea1c63dSschwarze if (warnings)
726eea1c63dSschwarze say(path, "Extraneous directory part");
727eea1c63dSschwarze fts_set(f, ff, FTS_SKIP);
728eea1c63dSschwarze break;
729eea1c63dSschwarze }
730eea1c63dSschwarze }
731eea1c63dSschwarze
732eea1c63dSschwarze fts_close(f);
733526e306bSschwarze return 1;
734eea1c63dSschwarze }
735eea1c63dSschwarze
736eea1c63dSschwarze /*
737eea1c63dSschwarze * Add a file to the mlinks table.
738eea1c63dSschwarze * Do not verify that it's a "valid" looking manpage (we'll do that
739eea1c63dSschwarze * later).
740eea1c63dSschwarze *
741eea1c63dSschwarze * Try to infer the manual section, architecture, and page name from the
742eea1c63dSschwarze * path, assuming it looks like
743eea1c63dSschwarze *
744eea1c63dSschwarze * [./]man*[/<arch>]/<name>.<section>
745eea1c63dSschwarze * or
746eea1c63dSschwarze * [./]cat<section>[/<arch>]/<name>.0
747eea1c63dSschwarze *
748eea1c63dSschwarze * See treescan() for the fts(3) version of this.
749eea1c63dSschwarze */
750eea1c63dSschwarze static void
filescan(const char * infile)7514f2bc1d1Sschwarze filescan(const char *infile)
752eea1c63dSschwarze {
753eea1c63dSschwarze struct stat st;
754eea1c63dSschwarze struct mlink *mlink;
7554f2bc1d1Sschwarze char *linkfile, *p, *realdir, *start, *usefile;
7564f2bc1d1Sschwarze size_t realdir_len;
757eea1c63dSschwarze
758eea1c63dSschwarze assert(use_all);
759eea1c63dSschwarze
7604f2bc1d1Sschwarze if (strncmp(infile, "./", 2) == 0)
7614f2bc1d1Sschwarze infile += 2;
762eea1c63dSschwarze
763f91dff1eSschwarze /*
764f91dff1eSschwarze * We have to do lstat(2) before realpath(3) loses
765f91dff1eSschwarze * the information whether this is a symbolic link.
766f91dff1eSschwarze * We need to know that because for symbolic links,
767d9a51c35Sjmc * we want to use the original file name, while for
768f91dff1eSschwarze * regular files, we want to use the real path.
769f91dff1eSschwarze */
7704f2bc1d1Sschwarze if (lstat(infile, &st) == -1) {
771f91dff1eSschwarze exitcode = (int)MANDOCLEVEL_BADARG;
7724f2bc1d1Sschwarze say(infile, "&lstat");
773f91dff1eSschwarze return;
7747fec9d6aSschwarze } else if (S_ISREG(st.st_mode) == 0 && S_ISLNK(st.st_mode) == 0) {
775f91dff1eSschwarze exitcode = (int)MANDOCLEVEL_BADARG;
7764f2bc1d1Sschwarze say(infile, "Not a regular file");
777f91dff1eSschwarze return;
778f91dff1eSschwarze }
779f91dff1eSschwarze
780f91dff1eSschwarze /*
781f91dff1eSschwarze * We have to resolve the file name to the real path
782f91dff1eSschwarze * in any case for the base directory check.
783f91dff1eSschwarze */
7844f2bc1d1Sschwarze if ((usefile = realpath(infile, NULL)) == NULL) {
785eea1c63dSschwarze exitcode = (int)MANDOCLEVEL_BADARG;
7864f2bc1d1Sschwarze say(infile, "&realpath");
787eea1c63dSschwarze return;
78877dd9b85Sschwarze }
78977dd9b85Sschwarze
790fa397007Sschwarze if (op == OP_TEST)
7914f2bc1d1Sschwarze start = usefile;
7924f2bc1d1Sschwarze else if (strncmp(usefile, basedir, basedir_len) == 0)
7934f2bc1d1Sschwarze start = usefile + basedir_len;
79477dd9b85Sschwarze else {
795eea1c63dSschwarze exitcode = (int)MANDOCLEVEL_BADARG;
7964f2bc1d1Sschwarze say("", "%s: outside base directory", infile);
7974f2bc1d1Sschwarze free(usefile);
798eea1c63dSschwarze return;
79977dd9b85Sschwarze }
80077dd9b85Sschwarze
801f91dff1eSschwarze /*
802f91dff1eSschwarze * Now we are sure the file is inside our tree.
803f91dff1eSschwarze * If it is a symbolic link, ignore the real path
804f91dff1eSschwarze * and use the original name.
805f91dff1eSschwarze */
8064f2bc1d1Sschwarze do {
8074f2bc1d1Sschwarze if (S_ISLNK(st.st_mode) == 0)
8084f2bc1d1Sschwarze break;
8094f2bc1d1Sschwarze
8104f2bc1d1Sschwarze /*
8114f2bc1d1Sschwarze * Some implementations of realpath(3) may succeed
8124f2bc1d1Sschwarze * even if the target of the link does not exist,
8134f2bc1d1Sschwarze * so check again for extra safety.
8144f2bc1d1Sschwarze */
8154f2bc1d1Sschwarze if (stat(usefile, &st) == -1) {
816eea1c63dSschwarze exitcode = (int)MANDOCLEVEL_BADARG;
8174f2bc1d1Sschwarze say(infile, "&stat");
8184f2bc1d1Sschwarze free(usefile);
819eea1c63dSschwarze return;
820f91dff1eSschwarze }
8214f2bc1d1Sschwarze linkfile = mandoc_strdup(infile);
8224f2bc1d1Sschwarze if (op == OP_TEST) {
8234f2bc1d1Sschwarze free(usefile);
8244f2bc1d1Sschwarze start = usefile = linkfile;
8254f2bc1d1Sschwarze break;
8264f2bc1d1Sschwarze }
8274f2bc1d1Sschwarze if (strncmp(infile, basedir, basedir_len) == 0) {
8284f2bc1d1Sschwarze free(usefile);
8294f2bc1d1Sschwarze usefile = linkfile;
8304f2bc1d1Sschwarze start = usefile + basedir_len;
8314f2bc1d1Sschwarze break;
8324f2bc1d1Sschwarze }
8334f2bc1d1Sschwarze
8344f2bc1d1Sschwarze /*
8354f2bc1d1Sschwarze * This symbolic link points into the basedir
8364f2bc1d1Sschwarze * from the outside. Let's see whether any of
8374f2bc1d1Sschwarze * the parent directories resolve to the basedir.
8384f2bc1d1Sschwarze */
8394f2bc1d1Sschwarze p = strchr(linkfile, '\0');
8404f2bc1d1Sschwarze do {
8414f2bc1d1Sschwarze while (*--p != '/')
8424f2bc1d1Sschwarze continue;
8434f2bc1d1Sschwarze *p = '\0';
8444f2bc1d1Sschwarze if ((realdir = realpath(linkfile, NULL)) == NULL) {
8454f2bc1d1Sschwarze exitcode = (int)MANDOCLEVEL_BADARG;
8464f2bc1d1Sschwarze say(infile, "&realpath");
8474f2bc1d1Sschwarze free(linkfile);
8484f2bc1d1Sschwarze free(usefile);
8493617218bSschwarze return;
8503617218bSschwarze }
8514f2bc1d1Sschwarze realdir_len = strlen(realdir) + 1;
8524f2bc1d1Sschwarze free(realdir);
8534f2bc1d1Sschwarze *p = '/';
8544f2bc1d1Sschwarze } while (realdir_len > basedir_len);
8554f2bc1d1Sschwarze
8564f2bc1d1Sschwarze /*
8574f2bc1d1Sschwarze * If one of the directories resolves to the basedir,
8584f2bc1d1Sschwarze * use the rest of the original name.
8594f2bc1d1Sschwarze * Otherwise, the best we can do
8604f2bc1d1Sschwarze * is to use the filename pointed to.
8614f2bc1d1Sschwarze */
8624f2bc1d1Sschwarze if (realdir_len == basedir_len) {
8634f2bc1d1Sschwarze free(usefile);
8644f2bc1d1Sschwarze usefile = linkfile;
8654f2bc1d1Sschwarze start = p + 1;
8664f2bc1d1Sschwarze } else {
8674f2bc1d1Sschwarze free(linkfile);
8684f2bc1d1Sschwarze start = usefile + basedir_len;
869eea1c63dSschwarze }
8704f2bc1d1Sschwarze } while (/* CONSTCOND */ 0);
87177dd9b85Sschwarze
872eea1c63dSschwarze mlink = mandoc_calloc(1, sizeof(struct mlink));
8732f08085dSschwarze mlink->dform = FORM_NONE;
8743617218bSschwarze if (strlcpy(mlink->file, start, sizeof(mlink->file)) >=
8753617218bSschwarze sizeof(mlink->file)) {
8763617218bSschwarze say(start, "Filename too long");
877ad6020cdSschwarze free(mlink);
8784f2bc1d1Sschwarze free(usefile);
8793617218bSschwarze return;
8803617218bSschwarze }
881eea1c63dSschwarze
882eea1c63dSschwarze /*
88390f1715cSschwarze * In test mode or when the original name is absolute
88490f1715cSschwarze * but outside our tree, guess the base directory.
88590f1715cSschwarze */
88690f1715cSschwarze
8874f2bc1d1Sschwarze if (op == OP_TEST || (start == usefile && *start == '/')) {
8884f2bc1d1Sschwarze if (strncmp(usefile, "man/", 4) == 0)
8894f2bc1d1Sschwarze start = usefile + 4;
8904f2bc1d1Sschwarze else if ((start = strstr(usefile, "/man/")) != NULL)
89190f1715cSschwarze start += 5;
89290f1715cSschwarze else
8934f2bc1d1Sschwarze start = usefile;
89490f1715cSschwarze }
89590f1715cSschwarze
89690f1715cSschwarze /*
897eea1c63dSschwarze * First try to guess our directory structure.
898eea1c63dSschwarze * If we find a separator, try to look for man* or cat*.
899eea1c63dSschwarze * If we find one of these and what's underneath is a directory,
900eea1c63dSschwarze * assume it's an architecture.
901eea1c63dSschwarze */
902fa397007Sschwarze if ((p = strchr(start, '/')) != NULL) {
903eea1c63dSschwarze *p++ = '\0';
904fa397007Sschwarze if (strncmp(start, "man", 3) == 0) {
905eea1c63dSschwarze mlink->dform = FORM_SRC;
9060af7fc1aSschwarze mlink->dsec = start + 3;
907fa397007Sschwarze } else if (strncmp(start, "cat", 3) == 0) {
908eea1c63dSschwarze mlink->dform = FORM_CAT;
9090af7fc1aSschwarze mlink->dsec = start + 3;
910eea1c63dSschwarze }
911eea1c63dSschwarze
912eea1c63dSschwarze start = p;
913fa397007Sschwarze if (mlink->dsec != NULL && (p = strchr(start, '/')) != NULL) {
914eea1c63dSschwarze *p++ = '\0';
9150af7fc1aSschwarze mlink->arch = start;
916eea1c63dSschwarze start = p;
917eea1c63dSschwarze }
918eea1c63dSschwarze }
919eea1c63dSschwarze
920eea1c63dSschwarze /*
921eea1c63dSschwarze * Now check the file suffix.
922eea1c63dSschwarze * Suffix of `.0' indicates a catpage, `.1-9' is a manpage.
923eea1c63dSschwarze */
924eea1c63dSschwarze p = strrchr(start, '\0');
925fa397007Sschwarze while (p-- > start && *p != '/' && *p != '.')
926fa397007Sschwarze continue;
927eea1c63dSschwarze
928fa397007Sschwarze if (*p == '.') {
929eea1c63dSschwarze *p++ = '\0';
9300af7fc1aSschwarze mlink->fsec = p;
931eea1c63dSschwarze }
932eea1c63dSschwarze
933eea1c63dSschwarze /*
934eea1c63dSschwarze * Now try to parse the name.
935eea1c63dSschwarze * Use the filename portion of the path.
936eea1c63dSschwarze */
937eea1c63dSschwarze mlink->name = start;
938fa397007Sschwarze if ((p = strrchr(start, '/')) != NULL) {
939eea1c63dSschwarze mlink->name = p + 1;
940eea1c63dSschwarze *p = '\0';
941eea1c63dSschwarze }
942eea1c63dSschwarze mlink_add(mlink, &st);
9434f2bc1d1Sschwarze free(usefile);
944eea1c63dSschwarze }
945eea1c63dSschwarze
946eea1c63dSschwarze static void
mlink_add(struct mlink * mlink,const struct stat * st)947eea1c63dSschwarze mlink_add(struct mlink *mlink, const struct stat *st)
948eea1c63dSschwarze {
949eea1c63dSschwarze struct inodev inodev;
950eea1c63dSschwarze struct mpage *mpage;
951eea1c63dSschwarze unsigned int slot;
952eea1c63dSschwarze
953eea1c63dSschwarze assert(NULL != mlink->file);
954eea1c63dSschwarze
9550af7fc1aSschwarze mlink->dsec = mandoc_strdup(mlink->dsec ? mlink->dsec : "");
9560af7fc1aSschwarze mlink->arch = mandoc_strdup(mlink->arch ? mlink->arch : "");
9570af7fc1aSschwarze mlink->name = mandoc_strdup(mlink->name ? mlink->name : "");
9580af7fc1aSschwarze mlink->fsec = mandoc_strdup(mlink->fsec ? mlink->fsec : "");
959eea1c63dSschwarze
960eea1c63dSschwarze if ('0' == *mlink->fsec) {
961eea1c63dSschwarze free(mlink->fsec);
962eea1c63dSschwarze mlink->fsec = mandoc_strdup(mlink->dsec);
963eea1c63dSschwarze mlink->fform = FORM_CAT;
964eea1c63dSschwarze } else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec)
965eea1c63dSschwarze mlink->fform = FORM_SRC;
966eea1c63dSschwarze else
967eea1c63dSschwarze mlink->fform = FORM_NONE;
968eea1c63dSschwarze
969eea1c63dSschwarze slot = ohash_qlookup(&mlinks, mlink->file);
970eea1c63dSschwarze assert(NULL == ohash_find(&mlinks, slot));
971eea1c63dSschwarze ohash_insert(&mlinks, slot, mlink);
972eea1c63dSschwarze
973a42eec62Sschwarze memset(&inodev, 0, sizeof(inodev)); /* Clear padding. */
974eea1c63dSschwarze inodev.st_ino = st->st_ino;
975eea1c63dSschwarze inodev.st_dev = st->st_dev;
976eea1c63dSschwarze slot = ohash_lookup_memory(&mpages, (char *)&inodev,
977eea1c63dSschwarze sizeof(struct inodev), inodev.st_ino);
978eea1c63dSschwarze mpage = ohash_find(&mpages, slot);
979eea1c63dSschwarze if (NULL == mpage) {
980eea1c63dSschwarze mpage = mandoc_calloc(1, sizeof(struct mpage));
981eea1c63dSschwarze mpage->inodev.st_ino = inodev.st_ino;
982eea1c63dSschwarze mpage->inodev.st_dev = inodev.st_dev;
983ff2dbb0fSschwarze mpage->form = FORM_NONE;
9840a4bed2cSschwarze mpage->next = mpage_head;
9850a4bed2cSschwarze mpage_head = mpage;
986eea1c63dSschwarze ohash_insert(&mpages, slot, mpage);
987eea1c63dSschwarze } else
988eea1c63dSschwarze mlink->next = mpage->mlinks;
989eea1c63dSschwarze mpage->mlinks = mlink;
990b493b720Sschwarze mlink->mpage = mpage;
991eea1c63dSschwarze }
992eea1c63dSschwarze
993eea1c63dSschwarze static void
mlink_free(struct mlink * mlink)994eea1c63dSschwarze mlink_free(struct mlink *mlink)
995eea1c63dSschwarze {
996eea1c63dSschwarze
997eea1c63dSschwarze free(mlink->dsec);
998eea1c63dSschwarze free(mlink->arch);
999eea1c63dSschwarze free(mlink->name);
1000eea1c63dSschwarze free(mlink->fsec);
1001eea1c63dSschwarze free(mlink);
1002eea1c63dSschwarze }
1003eea1c63dSschwarze
1004eea1c63dSschwarze static void
mpages_free(void)1005eea1c63dSschwarze mpages_free(void)
1006eea1c63dSschwarze {
1007eea1c63dSschwarze struct mpage *mpage;
1008eea1c63dSschwarze struct mlink *mlink;
1009eea1c63dSschwarze
10100a4bed2cSschwarze while ((mpage = mpage_head) != NULL) {
10110a4bed2cSschwarze while ((mlink = mpage->mlinks) != NULL) {
1012eea1c63dSschwarze mpage->mlinks = mlink->next;
1013eea1c63dSschwarze mlink_free(mlink);
1014eea1c63dSschwarze }
10150a4bed2cSschwarze mpage_head = mpage->next;
1016eea1c63dSschwarze free(mpage->sec);
1017eea1c63dSschwarze free(mpage->arch);
1018eea1c63dSschwarze free(mpage->title);
1019eea1c63dSschwarze free(mpage->desc);
1020eea1c63dSschwarze free(mpage);
1021eea1c63dSschwarze }
1022eea1c63dSschwarze }
1023eea1c63dSschwarze
1024eea1c63dSschwarze /*
1025eea1c63dSschwarze * For each mlink to the mpage, check whether the path looks like
1026eea1c63dSschwarze * it is formatted, and if it does, check whether a source manual
1027eea1c63dSschwarze * exists by the same name, ignoring the suffix.
1028eea1c63dSschwarze * If both conditions hold, drop the mlink.
1029eea1c63dSschwarze */
1030eea1c63dSschwarze static void
mlinks_undupe(struct mpage * mpage)1031eea1c63dSschwarze mlinks_undupe(struct mpage *mpage)
1032eea1c63dSschwarze {
1033eea1c63dSschwarze char buf[PATH_MAX];
1034eea1c63dSschwarze struct mlink **prev;
1035eea1c63dSschwarze struct mlink *mlink;
1036eea1c63dSschwarze char *bufp;
1037eea1c63dSschwarze
1038eea1c63dSschwarze mpage->form = FORM_CAT;
1039eea1c63dSschwarze prev = &mpage->mlinks;
1040eea1c63dSschwarze while (NULL != (mlink = *prev)) {
1041eea1c63dSschwarze if (FORM_CAT != mlink->dform) {
1042eea1c63dSschwarze mpage->form = FORM_NONE;
1043eea1c63dSschwarze goto nextlink;
1044eea1c63dSschwarze }
10453617218bSschwarze (void)strlcpy(buf, mlink->file, sizeof(buf));
1046eea1c63dSschwarze bufp = strstr(buf, "cat");
1047eea1c63dSschwarze assert(NULL != bufp);
1048eea1c63dSschwarze memcpy(bufp, "man", 3);
1049eea1c63dSschwarze if (NULL != (bufp = strrchr(buf, '.')))
1050eea1c63dSschwarze *++bufp = '\0';
10513617218bSschwarze (void)strlcat(buf, mlink->dsec, sizeof(buf));
1052eea1c63dSschwarze if (NULL == ohash_find(&mlinks,
1053eea1c63dSschwarze ohash_qlookup(&mlinks, buf)))
1054eea1c63dSschwarze goto nextlink;
1055eea1c63dSschwarze if (warnings)
1056eea1c63dSschwarze say(mlink->file, "Man source exists: %s", buf);
1057eea1c63dSschwarze if (use_all)
1058eea1c63dSschwarze goto nextlink;
1059eea1c63dSschwarze *prev = mlink->next;
1060eea1c63dSschwarze mlink_free(mlink);
1061eea1c63dSschwarze continue;
1062eea1c63dSschwarze nextlink:
1063eea1c63dSschwarze prev = &(*prev)->next;
1064eea1c63dSschwarze }
1065eea1c63dSschwarze }
1066eea1c63dSschwarze
1067058df53fSschwarze static void
mlink_check(struct mpage * mpage,struct mlink * mlink)1068b894d9c6Sschwarze mlink_check(struct mpage *mpage, struct mlink *mlink)
1069b894d9c6Sschwarze {
1070058df53fSschwarze struct str *str;
1071058df53fSschwarze unsigned int slot;
1072b894d9c6Sschwarze
1073b894d9c6Sschwarze /*
1074b894d9c6Sschwarze * Check whether the manual section given in a file
1075b894d9c6Sschwarze * agrees with the directory where the file is located.
1076b894d9c6Sschwarze * Some manuals have suffixes like (3p) on their
1077b894d9c6Sschwarze * section number either inside the file or in the
1078b894d9c6Sschwarze * directory name, some are linked into more than one
1079b894d9c6Sschwarze * section, like encrypt(1) = makekey(8).
1080b894d9c6Sschwarze */
1081b894d9c6Sschwarze
1082b894d9c6Sschwarze if (FORM_SRC == mpage->form &&
1083058df53fSschwarze strcasecmp(mpage->sec, mlink->dsec))
1084b894d9c6Sschwarze say(mlink->file, "Section \"%s\" manual in %s directory",
1085b894d9c6Sschwarze mpage->sec, mlink->dsec);
1086b894d9c6Sschwarze
1087b894d9c6Sschwarze /*
1088b894d9c6Sschwarze * Manual page directories exist for each kernel
1089b894d9c6Sschwarze * architecture as returned by machine(1).
1090b894d9c6Sschwarze * However, many manuals only depend on the
1091b894d9c6Sschwarze * application architecture as returned by arch(1).
1092b894d9c6Sschwarze * For example, some (2/ARM) manuals are shared
1093b894d9c6Sschwarze * across the "armish" and "zaurus" kernel
1094b894d9c6Sschwarze * architectures.
1095b894d9c6Sschwarze * A few manuals are even shared across completely
1096b894d9c6Sschwarze * different architectures, for example fdformat(1)
10977f7e8da4Sschwarze * on amd64, i386, and sparc64.
1098b894d9c6Sschwarze */
1099b894d9c6Sschwarze
1100058df53fSschwarze if (strcasecmp(mpage->arch, mlink->arch))
1101b894d9c6Sschwarze say(mlink->file, "Architecture \"%s\" manual in "
1102b894d9c6Sschwarze "\"%s\" directory", mpage->arch, mlink->arch);
1103b894d9c6Sschwarze
1104058df53fSschwarze /*
1105058df53fSschwarze * XXX
1106cb2bcd5aSschwarze * parse_cat() doesn't set NAME_TITLE yet.
1107058df53fSschwarze */
1108b894d9c6Sschwarze
1109058df53fSschwarze if (FORM_CAT == mpage->form)
1110058df53fSschwarze return;
1111058df53fSschwarze
1112058df53fSschwarze /*
1113058df53fSschwarze * Check whether this mlink
1114058df53fSschwarze * appears as a name in the NAME section.
1115058df53fSschwarze */
1116058df53fSschwarze
1117cb2bcd5aSschwarze slot = ohash_qlookup(&names, mlink->name);
1118cb2bcd5aSschwarze str = ohash_find(&names, slot);
1119058df53fSschwarze assert(NULL != str);
1120cb2bcd5aSschwarze if ( ! (NAME_TITLE & str->mask))
1121058df53fSschwarze say(mlink->file, "Name missing in NAME section");
1122b894d9c6Sschwarze }
1123b894d9c6Sschwarze
1124eea1c63dSschwarze /*
1125eea1c63dSschwarze * Run through the files in the global vector "mpages"
1126eea1c63dSschwarze * and add them to the database specified in "basedir".
1127eea1c63dSschwarze *
1128eea1c63dSschwarze * This handles the parsing scheme itself, using the cues of directory
1129eea1c63dSschwarze * and filename to determine whether the file is parsable or not.
1130eea1c63dSschwarze */
1131eea1c63dSschwarze static void
mpages_merge(struct dba * dba,struct mparse * mp)1132ff2dbb0fSschwarze mpages_merge(struct dba *dba, struct mparse *mp)
1133eea1c63dSschwarze {
11340a4bed2cSschwarze struct mpage *mpage, *mpage_dest;
1135b493b720Sschwarze struct mlink *mlink, *mlink_dest;
11366b86842eSschwarze struct roff_meta *meta;
113730ca5b8eSschwarze char *cp;
1138d395d87cSschwarze int fd;
1139eea1c63dSschwarze
11400a4bed2cSschwarze for (mpage = mpage_head; mpage != NULL; mpage = mpage->next) {
1141eea1c63dSschwarze mlinks_undupe(mpage);
11420a4bed2cSschwarze if ((mlink = mpage->mlinks) == NULL)
11430a4bed2cSschwarze continue;
1144eea1c63dSschwarze
1145cb2bcd5aSschwarze name_mask = NAME_MASK;
1146c4b66caeSschwarze mandoc_ohash_init(&names, 4, offsetof(struct str, key));
1147c4b66caeSschwarze mandoc_ohash_init(&strings, 6, offsetof(struct str, key));
1148eea1c63dSschwarze mparse_reset(mp);
11496b86842eSschwarze meta = NULL;
1150dba9b9e4Sschwarze
1151723ae0efSschwarze if ((fd = mparse_open(mp, mlink->file)) == -1) {
1152fdc26c85Sschwarze say(mlink->file, "&open");
1153dba9b9e4Sschwarze goto nextpage;
1154dba9b9e4Sschwarze }
11559fcb6c6dSschwarze
11569fcb6c6dSschwarze /*
1157df927bb6Sschwarze * Interpret the file as mdoc(7) or man(7) source
1158df927bb6Sschwarze * code, unless it is known to be formatted.
11599fcb6c6dSschwarze */
1160fdc26c85Sschwarze if (mlink->dform != FORM_CAT || mlink->fform != FORM_CAT) {
1161df927bb6Sschwarze mparse_readfd(mp, fd, mlink->file);
11627a6e7816Sschwarze close(fd);
11636418b05dSschwarze fd = -1;
11646b86842eSschwarze meta = mparse_result(mp);
1165eea1c63dSschwarze }
11669fcb6c6dSschwarze
11676b86842eSschwarze if (meta != NULL && meta->sodest != NULL) {
1168b493b720Sschwarze mlink_dest = ohash_find(&mlinks,
11696b86842eSschwarze ohash_qlookup(&mlinks, meta->sodest));
11706327d332Sschwarze if (mlink_dest == NULL) {
11716b86842eSschwarze mandoc_asprintf(&cp, "%s.gz", meta->sodest);
11726327d332Sschwarze mlink_dest = ohash_find(&mlinks,
11736327d332Sschwarze ohash_qlookup(&mlinks, cp));
11746327d332Sschwarze free(cp);
11756327d332Sschwarze }
11766327d332Sschwarze if (mlink_dest != NULL) {
1177b493b720Sschwarze
1178b493b720Sschwarze /* The .so target exists. */
1179b493b720Sschwarze
1180b493b720Sschwarze mpage_dest = mlink_dest->mpage;
1181b493b720Sschwarze while (1) {
1182b493b720Sschwarze mlink->mpage = mpage_dest;
1183b493b720Sschwarze
1184b493b720Sschwarze /*
1185b493b720Sschwarze * If the target was already
1186b493b720Sschwarze * processed, add the links
1187b493b720Sschwarze * to the database now.
1188b493b720Sschwarze * Otherwise, this will
1189b493b720Sschwarze * happen when we come
1190b493b720Sschwarze * to the target.
1191b493b720Sschwarze */
1192b493b720Sschwarze
1193ff2dbb0fSschwarze if (mpage_dest->dba != NULL)
1194ff2dbb0fSschwarze dbadd_mlink(mlink);
1195b493b720Sschwarze
11966327d332Sschwarze if (mlink->next == NULL)
1197b493b720Sschwarze break;
1198b493b720Sschwarze mlink = mlink->next;
1199b493b720Sschwarze }
1200b493b720Sschwarze
1201b493b720Sschwarze /* Move all links to the target. */
1202b493b720Sschwarze
1203b493b720Sschwarze mlink->next = mlink_dest->next;
1204b493b720Sschwarze mlink_dest->next = mpage->mlinks;
1205b493b720Sschwarze mpage->mlinks = NULL;
1206dba9b9e4Sschwarze goto nextpage;
12077389b0e3Sschwarze }
12087389b0e3Sschwarze meta->macroset = MACROSET_NONE;
12097389b0e3Sschwarze }
12107389b0e3Sschwarze if (meta != NULL && meta->macroset == MACROSET_MDOC) {
1211eea1c63dSschwarze mpage->form = FORM_SRC;
12126b86842eSschwarze mpage->sec = meta->msec;
12133fdead0cSschwarze mpage->sec = mandoc_strdup(
12146327d332Sschwarze mpage->sec == NULL ? "" : mpage->sec);
12156b86842eSschwarze mpage->arch = meta->arch;
1216eea1c63dSschwarze mpage->arch = mandoc_strdup(
12176327d332Sschwarze mpage->arch == NULL ? "" : mpage->arch);
12186b86842eSschwarze mpage->title = mandoc_strdup(meta->title);
12196b86842eSschwarze } else if (meta != NULL && meta->macroset == MACROSET_MAN) {
12206b86842eSschwarze if (*meta->msec != '\0' || *meta->title != '\0') {
1221eea1c63dSschwarze mpage->form = FORM_SRC;
12226b86842eSschwarze mpage->sec = mandoc_strdup(meta->msec);
1223fdc26c85Sschwarze mpage->arch = mandoc_strdup(mlink->arch);
12246b86842eSschwarze mpage->title = mandoc_strdup(meta->title);
12256418b05dSschwarze } else
12266b86842eSschwarze meta = NULL;
12276418b05dSschwarze }
12286418b05dSschwarze
12296418b05dSschwarze assert(mpage->desc == NULL);
12307389b0e3Sschwarze if (meta == NULL || meta->sodest != NULL) {
1231fdc26c85Sschwarze mpage->sec = mandoc_strdup(mlink->dsec);
1232fdc26c85Sschwarze mpage->arch = mandoc_strdup(mlink->arch);
1233fdc26c85Sschwarze mpage->title = mandoc_strdup(mlink->name);
12347389b0e3Sschwarze if (meta == NULL) {
12357389b0e3Sschwarze mpage->form = FORM_CAT;
1236d395d87cSschwarze parse_cat(mpage, fd);
12377389b0e3Sschwarze } else
12387389b0e3Sschwarze mpage->form = FORM_SRC;
12396b86842eSschwarze } else if (meta->macroset == MACROSET_MDOC)
12406b86842eSschwarze parse_mdoc(mpage, meta, meta->first);
12416418b05dSschwarze else
12426b86842eSschwarze parse_man(mpage, meta, meta->first);
1243db59a096Sschwarze if (mpage->desc == NULL) {
1244db59a096Sschwarze mpage->desc = mandoc_strdup(mlink->name);
1245db59a096Sschwarze if (warnings)
1246db59a096Sschwarze say(mlink->file, "No one-line description, "
1247db59a096Sschwarze "using filename \"%s\"", mlink->name);
1248db59a096Sschwarze }
1249f8a05325Sschwarze
125075169803Sschwarze for (mlink = mpage->mlinks;
125175169803Sschwarze mlink != NULL;
125275169803Sschwarze mlink = mlink->next) {
125375169803Sschwarze putkey(mpage, mlink->name, NAME_FILE);
1254058df53fSschwarze if (warnings && !use_all)
1255058df53fSschwarze mlink_check(mpage, mlink);
125675169803Sschwarze }
1257058df53fSschwarze
1258ff2dbb0fSschwarze dbadd(dba, mpage);
1259dba9b9e4Sschwarze
1260dba9b9e4Sschwarze nextpage:
1261eea1c63dSschwarze ohash_delete(&strings);
1262cb2bcd5aSschwarze ohash_delete(&names);
1263ff2dbb0fSschwarze }
1264c0befed7Sschwarze }
1265c0befed7Sschwarze
1266c0befed7Sschwarze static void
parse_cat(struct mpage * mpage,int fd)1267dba9b9e4Sschwarze parse_cat(struct mpage *mpage, int fd)
12689fcb6c6dSschwarze {
12699fcb6c6dSschwarze FILE *stream;
1270902ac8e0Sschwarze struct mlink *mlink;
1271902ac8e0Sschwarze char *line, *p, *title, *sec;
127231f93c25Sschwarze size_t linesz, plen, titlesz;
127331f93c25Sschwarze ssize_t len;
127431f93c25Sschwarze int offs;
12759fcb6c6dSschwarze
1276902ac8e0Sschwarze mlink = mpage->mlinks;
1277902ac8e0Sschwarze stream = fd == -1 ? fopen(mlink->file, "r") : fdopen(fd, "r");
1278902ac8e0Sschwarze if (stream == NULL) {
1279902ac8e0Sschwarze if (fd != -1)
12805238361fSdoug close(fd);
12810a056e84Sderaadt if (warnings)
1282902ac8e0Sschwarze say(mlink->file, "&fopen");
12839fcb6c6dSschwarze return;
12849fcb6c6dSschwarze }
12859fcb6c6dSschwarze
128631f93c25Sschwarze line = NULL;
128731f93c25Sschwarze linesz = 0;
128831f93c25Sschwarze
1289902ac8e0Sschwarze /* Parse the section number from the header line. */
1290902ac8e0Sschwarze
1291902ac8e0Sschwarze while (getline(&line, &linesz, stream) != -1) {
1292902ac8e0Sschwarze if (*line == '\n')
1293902ac8e0Sschwarze continue;
1294902ac8e0Sschwarze if ((sec = strchr(line, '(')) == NULL)
1295902ac8e0Sschwarze break;
1296902ac8e0Sschwarze if ((p = strchr(++sec, ')')) == NULL)
1297902ac8e0Sschwarze break;
1298902ac8e0Sschwarze free(mpage->sec);
1299902ac8e0Sschwarze mpage->sec = mandoc_strndup(sec, p - sec);
1300902ac8e0Sschwarze if (warnings && *mlink->dsec != '\0' &&
1301902ac8e0Sschwarze strcasecmp(mpage->sec, mlink->dsec))
1302902ac8e0Sschwarze say(mlink->file,
1303902ac8e0Sschwarze "Section \"%s\" manual in %s directory",
1304902ac8e0Sschwarze mpage->sec, mlink->dsec);
1305902ac8e0Sschwarze break;
1306902ac8e0Sschwarze }
1307902ac8e0Sschwarze
1308af4ca828Sschwarze /* Skip to first blank line. */
13099fcb6c6dSschwarze
1310902ac8e0Sschwarze while (line == NULL || *line != '\n')
1311902ac8e0Sschwarze if (getline(&line, &linesz, stream) == -1)
1312af4ca828Sschwarze break;
13139fcb6c6dSschwarze
13149fcb6c6dSschwarze /*
1315af4ca828Sschwarze * Assume the first line that is not indented
1316af4ca828Sschwarze * is the first section header. Skip to it.
13179fcb6c6dSschwarze */
13189fcb6c6dSschwarze
131931f93c25Sschwarze while (getline(&line, &linesz, stream) != -1)
132031f93c25Sschwarze if (*line != '\n' && *line != ' ')
1321af4ca828Sschwarze break;
1322af4ca828Sschwarze
1323af4ca828Sschwarze /*
13240bf0a29fSschwarze * Read up until the next section into a buffer.
13250bf0a29fSschwarze * Strip the leading and trailing newline from each read line,
13260bf0a29fSschwarze * appending a trailing space.
13270bf0a29fSschwarze * Ignore empty (whitespace-only) lines.
13280bf0a29fSschwarze */
13290bf0a29fSschwarze
13300bf0a29fSschwarze titlesz = 0;
13310bf0a29fSschwarze title = NULL;
13320bf0a29fSschwarze
133331f93c25Sschwarze while ((len = getline(&line, &linesz, stream)) != -1) {
133431f93c25Sschwarze if (*line != ' ')
13350bf0a29fSschwarze break;
133631f93c25Sschwarze offs = 0;
133731f93c25Sschwarze while (isspace((unsigned char)line[offs]))
133831f93c25Sschwarze offs++;
133931f93c25Sschwarze if (line[offs] == '\0')
13400bf0a29fSschwarze continue;
134131f93c25Sschwarze title = mandoc_realloc(title, titlesz + len - offs);
134231f93c25Sschwarze memcpy(title + titlesz, line + offs, len - offs);
134331f93c25Sschwarze titlesz += len - offs;
1344eea1c63dSschwarze title[titlesz - 1] = ' ';
13450bf0a29fSschwarze }
134631f93c25Sschwarze free(line);
13470bf0a29fSschwarze
13480bf0a29fSschwarze /*
1349af4ca828Sschwarze * If no page content can be found, or the input line
1350af4ca828Sschwarze * is already the next section header, or there is no
1351af4ca828Sschwarze * trailing newline, reuse the page title as the page
1352af4ca828Sschwarze * description.
1353af4ca828Sschwarze */
1354af4ca828Sschwarze
13550bf0a29fSschwarze if (NULL == title || '\0' == *title) {
13560a056e84Sderaadt if (warnings)
1357902ac8e0Sschwarze say(mlink->file, "Cannot find NAME section");
13589fcb6c6dSschwarze fclose(stream);
13590bf0a29fSschwarze free(title);
13609fcb6c6dSschwarze return;
13619fcb6c6dSschwarze }
1362af4ca828Sschwarze
136331f93c25Sschwarze title[titlesz - 1] = '\0';
13649fcb6c6dSschwarze
13659fcb6c6dSschwarze /*
1366af4ca828Sschwarze * Skip to the first dash.
1367af4ca828Sschwarze * Use the remaining line as the description (no more than 70
1368af4ca828Sschwarze * bytes).
13699fcb6c6dSschwarze */
13709fcb6c6dSschwarze
13710bf0a29fSschwarze if (NULL != (p = strstr(title, "- "))) {
1372af4ca828Sschwarze for (p += 2; ' ' == *p || '\b' == *p; p++)
1373af4ca828Sschwarze /* Skip to next word. */ ;
1374f8a05325Sschwarze } else {
13750a056e84Sderaadt if (warnings)
1376db59a096Sschwarze say(mlink->file, "No dash in title line, "
1377db59a096Sschwarze "reusing \"%s\" as one-line description", title);
13780bf0a29fSschwarze p = title;
1379f8a05325Sschwarze }
1380af4ca828Sschwarze
13810bf0a29fSschwarze plen = strlen(p);
13829fcb6c6dSschwarze
1383af4ca828Sschwarze /* Strip backspace-encoding from line. */
13849fcb6c6dSschwarze
1385af4ca828Sschwarze while (NULL != (line = memchr(p, '\b', plen))) {
1386af4ca828Sschwarze len = line - p;
1387af4ca828Sschwarze if (0 == len) {
1388af4ca828Sschwarze memmove(line, line + 1, plen--);
1389af4ca828Sschwarze continue;
1390af4ca828Sschwarze }
1391af4ca828Sschwarze memmove(line - 1, line + 1, plen - len);
1392af4ca828Sschwarze plen -= 2;
1393af4ca828Sschwarze }
1394af4ca828Sschwarze
139578b03aa7Sschwarze /*
139678b03aa7Sschwarze * Cut off excessive one-line descriptions.
139778b03aa7Sschwarze * Bad pages are not worth better heuristics.
139878b03aa7Sschwarze */
139978b03aa7Sschwarze
140078b03aa7Sschwarze mpage->desc = mandoc_strndup(p, 150);
1401af4ca828Sschwarze fclose(stream);
14020bf0a29fSschwarze free(title);
14039fcb6c6dSschwarze }
14049fcb6c6dSschwarze
1405eea1c63dSschwarze /*
1406eea1c63dSschwarze * Put a type/word pair into the word database for this particular file.
1407eea1c63dSschwarze */
14083899e304Sschwarze static void
putkey(const struct mpage * mpage,char * value,uint64_t type)140930ca5b8eSschwarze putkey(const struct mpage *mpage, char *value, uint64_t type)
1410ae144658Sschwarze {
1411eea1c63dSschwarze putkeys(mpage, value, strlen(value), type);
14120a056e84Sderaadt }
1413d6ea6627Sschwarze
1414d6ea6627Sschwarze /*
1415eea1c63dSschwarze * Grok all nodes at or below a certain mdoc node into putkey().
1416eea1c63dSschwarze */
1417eea1c63dSschwarze static void
putmdockey(const struct mpage * mpage,const struct roff_node * n,uint64_t m,int taboo)1418eea1c63dSschwarze putmdockey(const struct mpage *mpage,
1419816c3c54Sschwarze const struct roff_node *n, uint64_t m, int taboo)
1420eea1c63dSschwarze {
1421eea1c63dSschwarze
1422eea1c63dSschwarze for ( ; NULL != n; n = n->next) {
1423816c3c54Sschwarze if (n->flags & taboo)
1424816c3c54Sschwarze continue;
1425eea1c63dSschwarze if (NULL != n->child)
1426816c3c54Sschwarze putmdockey(mpage, n->child, m, taboo);
1427d1982c71Sschwarze if (n->type == ROFFT_TEXT)
1428eea1c63dSschwarze putkey(mpage, n->string, m);
1429eea1c63dSschwarze }
1430eea1c63dSschwarze }
1431eea1c63dSschwarze
1432eea1c63dSschwarze static void
parse_man(struct mpage * mpage,const struct roff_meta * meta,const struct roff_node * n)14332a238f45Sschwarze parse_man(struct mpage *mpage, const struct roff_meta *meta,
14343a0d07afSschwarze const struct roff_node *n)
1435eea1c63dSschwarze {
14363a0d07afSschwarze const struct roff_node *head, *body;
1437fd5b70b0Sschwarze char *start, *title;
1438eea1c63dSschwarze char byte;
1439fd5b70b0Sschwarze size_t sz;
1440eea1c63dSschwarze
144130e5ee06Sschwarze if (n == NULL)
1442eea1c63dSschwarze return;
1443eea1c63dSschwarze
1444eea1c63dSschwarze /*
1445eea1c63dSschwarze * We're only searching for one thing: the first text child in
1446eea1c63dSschwarze * the BODY of a NAME section. Since we don't keep track of
1447eea1c63dSschwarze * sections in -man, run some hoops to find out whether we're in
1448eea1c63dSschwarze * the correct section or not.
1449d6ea6627Sschwarze */
1450d6ea6627Sschwarze
1451d1982c71Sschwarze if (n->type == ROFFT_BODY && n->tok == MAN_SH) {
1452eea1c63dSschwarze body = n;
145330e5ee06Sschwarze if ((head = body->parent->head) != NULL &&
145430e5ee06Sschwarze (head = head->child) != NULL &&
145530e5ee06Sschwarze head->next == NULL &&
1456d1982c71Sschwarze head->type == ROFFT_TEXT &&
145730e5ee06Sschwarze strcmp(head->string, "NAME") == 0 &&
145830e5ee06Sschwarze body->child != NULL) {
1459eea1c63dSschwarze
1460eea1c63dSschwarze /*
1461eea1c63dSschwarze * Suck the entire NAME section into memory.
1462eea1c63dSschwarze * Yes, we might run away.
1463eea1c63dSschwarze * But too many manuals have big, spread-out
1464eea1c63dSschwarze * NAME sections over many lines.
1465eea1c63dSschwarze */
1466eea1c63dSschwarze
1467fd5b70b0Sschwarze title = NULL;
1468423631c9Sschwarze deroff(&title, body);
1469eea1c63dSschwarze if (NULL == title)
1470eea1c63dSschwarze return;
1471d6ea6627Sschwarze
1472d6ea6627Sschwarze /*
1473eea1c63dSschwarze * Go through a special heuristic dance here.
1474eea1c63dSschwarze * Conventionally, one or more manual names are
1475eea1c63dSschwarze * comma-specified prior to a whitespace, then a
1476eea1c63dSschwarze * dash, then a description. Try to puzzle out
1477eea1c63dSschwarze * the name parts here.
1478d6ea6627Sschwarze */
1479d6ea6627Sschwarze
1480fd5b70b0Sschwarze start = title;
1481eea1c63dSschwarze for ( ;; ) {
1482eea1c63dSschwarze sz = strcspn(start, " ,");
1483eea1c63dSschwarze if ('\0' == start[sz])
14847453a7b4Sschwarze break;
1485eea1c63dSschwarze
1486eea1c63dSschwarze byte = start[sz];
1487eea1c63dSschwarze start[sz] = '\0';
1488eea1c63dSschwarze
1489a512ec3cSschwarze /*
1490a512ec3cSschwarze * Assume a stray trailing comma in the
1491a512ec3cSschwarze * name list if a name begins with a dash.
1492a512ec3cSschwarze */
1493a512ec3cSschwarze
1494a512ec3cSschwarze if ('-' == start[0] ||
1495a512ec3cSschwarze ('\\' == start[0] && '-' == start[1]))
1496a512ec3cSschwarze break;
1497a512ec3cSschwarze
1498cb2bcd5aSschwarze putkey(mpage, start, NAME_TITLE);
14998cea0557Sschwarze if ( ! (mpage->name_head_done ||
15008cea0557Sschwarze strcasecmp(start, meta->title))) {
15018cea0557Sschwarze putkey(mpage, start, NAME_HEAD);
15028cea0557Sschwarze mpage->name_head_done = 1;
15038cea0557Sschwarze }
1504eea1c63dSschwarze
1505eea1c63dSschwarze if (' ' == byte) {
1506eea1c63dSschwarze start += sz + 1;
1507eea1c63dSschwarze break;
15089fcb6c6dSschwarze }
1509eea1c63dSschwarze
1510eea1c63dSschwarze assert(',' == byte);
1511eea1c63dSschwarze start += sz + 1;
1512eea1c63dSschwarze while (' ' == *start)
1513eea1c63dSschwarze start++;
15149fcb6c6dSschwarze }
1515eea1c63dSschwarze
1516fd5b70b0Sschwarze if (start == title) {
1517cb2bcd5aSschwarze putkey(mpage, start, NAME_TITLE);
15188cea0557Sschwarze if ( ! (mpage->name_head_done ||
15198cea0557Sschwarze strcasecmp(start, meta->title))) {
15208cea0557Sschwarze putkey(mpage, start, NAME_HEAD);
15218cea0557Sschwarze mpage->name_head_done = 1;
15228cea0557Sschwarze }
1523eea1c63dSschwarze free(title);
1524eea1c63dSschwarze return;
1525eea1c63dSschwarze }
1526eea1c63dSschwarze
1527eea1c63dSschwarze while (isspace((unsigned char)*start))
1528eea1c63dSschwarze start++;
1529eea1c63dSschwarze
1530eea1c63dSschwarze if (0 == strncmp(start, "-", 1))
1531eea1c63dSschwarze start += 1;
1532eea1c63dSschwarze else if (0 == strncmp(start, "\\-\\-", 4))
1533eea1c63dSschwarze start += 4;
1534eea1c63dSschwarze else if (0 == strncmp(start, "\\-", 2))
1535eea1c63dSschwarze start += 2;
1536eea1c63dSschwarze else if (0 == strncmp(start, "\\(en", 4))
1537eea1c63dSschwarze start += 4;
1538eea1c63dSschwarze else if (0 == strncmp(start, "\\(em", 4))
1539eea1c63dSschwarze start += 4;
1540eea1c63dSschwarze
1541eea1c63dSschwarze while (' ' == *start)
1542eea1c63dSschwarze start++;
1543eea1c63dSschwarze
154478b03aa7Sschwarze /*
154578b03aa7Sschwarze * Cut off excessive one-line descriptions.
154678b03aa7Sschwarze * Bad pages are not worth better heuristics.
154778b03aa7Sschwarze */
154878b03aa7Sschwarze
154978b03aa7Sschwarze mpage->desc = mandoc_strndup(start, 150);
1550eea1c63dSschwarze free(title);
1551eea1c63dSschwarze return;
15529fcb6c6dSschwarze }
15539fcb6c6dSschwarze }
1554ae144658Sschwarze
1555eea1c63dSschwarze for (n = n->child; n; n = n->next) {
1556eea1c63dSschwarze if (NULL != mpage->desc)
1557eea1c63dSschwarze break;
15588cea0557Sschwarze parse_man(mpage, meta, n);
1559f8a05325Sschwarze }
1560ae144658Sschwarze }
1561ae144658Sschwarze
1562ae144658Sschwarze static void
parse_mdoc(struct mpage * mpage,const struct roff_meta * meta,const struct roff_node * n)15632a238f45Sschwarze parse_mdoc(struct mpage *mpage, const struct roff_meta *meta,
15643a0d07afSschwarze const struct roff_node *n)
1565ae144658Sschwarze {
156616fe0cfcSschwarze const struct mdoc_handler *handler;
1567ae144658Sschwarze
156814a309e3Sschwarze for (n = n->child; n != NULL; n = n->next) {
156916fe0cfcSschwarze if (n->tok == TOKEN_NONE || n->tok < ROFF_MAX)
1570816c3c54Sschwarze continue;
157129478532Sschwarze assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX);
157216fe0cfcSschwarze handler = mdoc_handlers + (n->tok - MDOC_Dd);
157316fe0cfcSschwarze if (n->flags & handler->taboo)
157416fe0cfcSschwarze continue;
157516fe0cfcSschwarze
1576eea1c63dSschwarze switch (n->type) {
1577d1982c71Sschwarze case ROFFT_ELEM:
1578d1982c71Sschwarze case ROFFT_BLOCK:
1579d1982c71Sschwarze case ROFFT_HEAD:
1580d1982c71Sschwarze case ROFFT_BODY:
1581d1982c71Sschwarze case ROFFT_TAIL:
158216fe0cfcSschwarze if (handler->fp != NULL &&
158316fe0cfcSschwarze (*handler->fp)(mpage, meta, n) == 0)
1584eea1c63dSschwarze break;
158516fe0cfcSschwarze if (handler->mask)
1586eea1c63dSschwarze putmdockey(mpage, n->child,
158716fe0cfcSschwarze handler->mask, handler->taboo);
1588eea1c63dSschwarze break;
1589eea1c63dSschwarze default:
1590eea1c63dSschwarze continue;
1591ae144658Sschwarze }
1592eea1c63dSschwarze if (NULL != n->child)
1593609b2a4cSschwarze parse_mdoc(mpage, meta, n);
1594eea1c63dSschwarze }
1595eea1c63dSschwarze }
1596eea1c63dSschwarze
1597eea1c63dSschwarze static int
parse_mdoc_Fa(struct mpage * mpage,const struct roff_meta * meta,const struct roff_node * n)15983ddcc9e8Sschwarze parse_mdoc_Fa(struct mpage *mpage, const struct roff_meta *meta,
15993ddcc9e8Sschwarze const struct roff_node *n)
16003ddcc9e8Sschwarze {
16013ddcc9e8Sschwarze uint64_t mask;
16023ddcc9e8Sschwarze
16033ddcc9e8Sschwarze mask = TYPE_Fa;
16043ddcc9e8Sschwarze if (n->sec == SEC_SYNOPSIS)
16053ddcc9e8Sschwarze mask |= TYPE_Vt;
16063ddcc9e8Sschwarze
16073ddcc9e8Sschwarze putmdockey(mpage, n->child, mask, 0);
16083ddcc9e8Sschwarze return 0;
16093ddcc9e8Sschwarze }
16103ddcc9e8Sschwarze
16113ddcc9e8Sschwarze static int
parse_mdoc_Fd(struct mpage * mpage,const struct roff_meta * meta,const struct roff_node * n)16122a238f45Sschwarze parse_mdoc_Fd(struct mpage *mpage, const struct roff_meta *meta,
16133a0d07afSschwarze const struct roff_node *n)
1614eea1c63dSschwarze {
1615803ae22eSschwarze char *start, *end;
1616eea1c63dSschwarze size_t sz;
1617eea1c63dSschwarze
1618eea1c63dSschwarze if (SEC_SYNOPSIS != n->sec ||
1619eea1c63dSschwarze NULL == (n = n->child) ||
1620d1982c71Sschwarze n->type != ROFFT_TEXT)
1621526e306bSschwarze return 0;
1622eea1c63dSschwarze
1623eea1c63dSschwarze /*
1624eea1c63dSschwarze * Only consider those `Fd' macro fields that begin with an
1625eea1c63dSschwarze * "inclusion" token (versus, e.g., #define).
1626eea1c63dSschwarze */
1627eea1c63dSschwarze
1628eea1c63dSschwarze if (strcmp("#include", n->string))
1629526e306bSschwarze return 0;
1630eea1c63dSschwarze
1631d1982c71Sschwarze if ((n = n->next) == NULL || n->type != ROFFT_TEXT)
1632526e306bSschwarze return 0;
1633eea1c63dSschwarze
1634eea1c63dSschwarze /*
1635eea1c63dSschwarze * Strip away the enclosing angle brackets and make sure we're
1636eea1c63dSschwarze * not zero-length.
1637eea1c63dSschwarze */
1638eea1c63dSschwarze
1639eea1c63dSschwarze start = n->string;
1640eea1c63dSschwarze if ('<' == *start || '"' == *start)
1641eea1c63dSschwarze start++;
1642eea1c63dSschwarze
1643eea1c63dSschwarze if (0 == (sz = strlen(start)))
1644526e306bSschwarze return 0;
1645eea1c63dSschwarze
1646eea1c63dSschwarze end = &start[(int)sz - 1];
1647eea1c63dSschwarze if ('>' == *end || '"' == *end)
1648eea1c63dSschwarze end--;
1649eea1c63dSschwarze
1650eea1c63dSschwarze if (end > start)
1651eea1c63dSschwarze putkeys(mpage, start, end - start + 1, TYPE_In);
1652526e306bSschwarze return 0;
1653eea1c63dSschwarze }
1654eea1c63dSschwarze
1655f1007f96Sschwarze static void
parse_mdoc_fname(struct mpage * mpage,const struct roff_node * n)16563a0d07afSschwarze parse_mdoc_fname(struct mpage *mpage, const struct roff_node *n)
1657f1007f96Sschwarze {
1658f1007f96Sschwarze char *cp;
1659f1007f96Sschwarze size_t sz;
1660f1007f96Sschwarze
1661d1982c71Sschwarze if (n->type != ROFFT_TEXT)
1662f1007f96Sschwarze return;
1663f1007f96Sschwarze
1664f1007f96Sschwarze /* Skip function pointer punctuation. */
1665f1007f96Sschwarze
1666f1007f96Sschwarze cp = n->string;
1667f1007f96Sschwarze while (*cp == '(' || *cp == '*')
1668f1007f96Sschwarze cp++;
1669f1007f96Sschwarze sz = strcspn(cp, "()");
1670f1007f96Sschwarze
1671f1007f96Sschwarze putkeys(mpage, cp, sz, TYPE_Fn);
1672f1007f96Sschwarze if (n->sec == SEC_SYNOPSIS)
1673f1007f96Sschwarze putkeys(mpage, cp, sz, NAME_SYN);
1674f1007f96Sschwarze }
1675f1007f96Sschwarze
1676eea1c63dSschwarze static int
parse_mdoc_Fn(struct mpage * mpage,const struct roff_meta * meta,const struct roff_node * n)16772a238f45Sschwarze parse_mdoc_Fn(struct mpage *mpage, const struct roff_meta *meta,
16783a0d07afSschwarze const struct roff_node *n)
1679eea1c63dSschwarze {
16803ddcc9e8Sschwarze uint64_t mask;
1681eea1c63dSschwarze
1682f1007f96Sschwarze if (n->child == NULL)
1683526e306bSschwarze return 0;
1684eea1c63dSschwarze
1685f1007f96Sschwarze parse_mdoc_fname(mpage, n->child);
1686eea1c63dSschwarze
16873ddcc9e8Sschwarze n = n->child->next;
16883ddcc9e8Sschwarze if (n != NULL && n->type == ROFFT_TEXT) {
16893ddcc9e8Sschwarze mask = TYPE_Fa;
16903ddcc9e8Sschwarze if (n->sec == SEC_SYNOPSIS)
16913ddcc9e8Sschwarze mask |= TYPE_Vt;
16923ddcc9e8Sschwarze putmdockey(mpage, n, mask, 0);
16933ddcc9e8Sschwarze }
1694eea1c63dSschwarze
1695526e306bSschwarze return 0;
1696eea1c63dSschwarze }
1697eea1c63dSschwarze
1698eea1c63dSschwarze static int
parse_mdoc_Fo(struct mpage * mpage,const struct roff_meta * meta,const struct roff_node * n)16992a238f45Sschwarze parse_mdoc_Fo(struct mpage *mpage, const struct roff_meta *meta,
17003a0d07afSschwarze const struct roff_node *n)
1701d487c156Sschwarze {
1702d487c156Sschwarze
1703d1982c71Sschwarze if (n->type != ROFFT_HEAD)
1704526e306bSschwarze return 1;
17058284db48Sschwarze
1706f1007f96Sschwarze if (n->child != NULL)
1707f1007f96Sschwarze parse_mdoc_fname(mpage, n->child);
1708f1007f96Sschwarze
1709526e306bSschwarze return 0;
1710d487c156Sschwarze }
1711d487c156Sschwarze
1712d487c156Sschwarze static int
parse_mdoc_Va(struct mpage * mpage,const struct roff_meta * meta,const struct roff_node * n)171329945e56Sschwarze parse_mdoc_Va(struct mpage *mpage, const struct roff_meta *meta,
171429945e56Sschwarze const struct roff_node *n)
171529945e56Sschwarze {
171629945e56Sschwarze char *cp;
171729945e56Sschwarze
171829945e56Sschwarze if (n->type != ROFFT_ELEM && n->type != ROFFT_BODY)
171929945e56Sschwarze return 0;
172029945e56Sschwarze
172130e5ee06Sschwarze if (n->child != NULL &&
172230e5ee06Sschwarze n->child->next == NULL &&
172330e5ee06Sschwarze n->child->type == ROFFT_TEXT)
172429945e56Sschwarze return 1;
172529945e56Sschwarze
172629945e56Sschwarze cp = NULL;
172729945e56Sschwarze deroff(&cp, n);
172829945e56Sschwarze if (cp != NULL) {
172929945e56Sschwarze putkey(mpage, cp, TYPE_Vt | (n->tok == MDOC_Va ||
173029945e56Sschwarze n->type == ROFFT_BODY ? TYPE_Va : 0));
173129945e56Sschwarze free(cp);
173229945e56Sschwarze }
173329945e56Sschwarze
173429945e56Sschwarze return 0;
173529945e56Sschwarze }
173629945e56Sschwarze
173729945e56Sschwarze static int
parse_mdoc_Xr(struct mpage * mpage,const struct roff_meta * meta,const struct roff_node * n)17382a238f45Sschwarze parse_mdoc_Xr(struct mpage *mpage, const struct roff_meta *meta,
17393a0d07afSschwarze const struct roff_node *n)
1740eea1c63dSschwarze {
1741eea1c63dSschwarze char *cp;
1742eea1c63dSschwarze
1743eea1c63dSschwarze if (NULL == (n = n->child))
1744526e306bSschwarze return 0;
1745eea1c63dSschwarze
1746eea1c63dSschwarze if (NULL == n->next) {
1747eea1c63dSschwarze putkey(mpage, n->string, TYPE_Xr);
1748526e306bSschwarze return 0;
1749eea1c63dSschwarze }
1750eea1c63dSschwarze
1751a450f7c4Sschwarze mandoc_asprintf(&cp, "%s(%s)", n->string, n->next->string);
1752eea1c63dSschwarze putkey(mpage, cp, TYPE_Xr);
1753eea1c63dSschwarze free(cp);
1754526e306bSschwarze return 0;
1755eea1c63dSschwarze }
1756eea1c63dSschwarze
1757eea1c63dSschwarze static int
parse_mdoc_Nd(struct mpage * mpage,const struct roff_meta * meta,const struct roff_node * n)17582a238f45Sschwarze parse_mdoc_Nd(struct mpage *mpage, const struct roff_meta *meta,
17593a0d07afSschwarze const struct roff_node *n)
1760eea1c63dSschwarze {
1761eea1c63dSschwarze
1762d1982c71Sschwarze if (n->type == ROFFT_BODY)
1763423631c9Sschwarze deroff(&mpage->desc, n);
1764526e306bSschwarze return 0;
1765eea1c63dSschwarze }
1766eea1c63dSschwarze
1767eea1c63dSschwarze static int
parse_mdoc_Nm(struct mpage * mpage,const struct roff_meta * meta,const struct roff_node * n)17682a238f45Sschwarze parse_mdoc_Nm(struct mpage *mpage, const struct roff_meta *meta,
17693a0d07afSschwarze const struct roff_node *n)
1770eea1c63dSschwarze {
1771eea1c63dSschwarze
1772a5ccd03aSschwarze if (SEC_NAME == n->sec)
1773816c3c54Sschwarze putmdockey(mpage, n->child, NAME_TITLE, 0);
1774d1982c71Sschwarze else if (n->sec == SEC_SYNOPSIS && n->type == ROFFT_HEAD) {
1775609b2a4cSschwarze if (n->child == NULL)
1776609b2a4cSschwarze putkey(mpage, meta->name, NAME_SYN);
1777609b2a4cSschwarze else
1778816c3c54Sschwarze putmdockey(mpage, n->child, NAME_SYN, 0);
1779609b2a4cSschwarze }
17808cea0557Sschwarze if ( ! (mpage->name_head_done ||
17818cea0557Sschwarze n->child == NULL || n->child->string == NULL ||
17828cea0557Sschwarze strcasecmp(n->child->string, meta->title))) {
17833cc36c2bSschwarze putkey(mpage, n->child->string, NAME_HEAD);
17848cea0557Sschwarze mpage->name_head_done = 1;
17858cea0557Sschwarze }
1786526e306bSschwarze return 0;
1787eea1c63dSschwarze }
1788eea1c63dSschwarze
1789eea1c63dSschwarze static int
parse_mdoc_Sh(struct mpage * mpage,const struct roff_meta * meta,const struct roff_node * n)17902a238f45Sschwarze parse_mdoc_Sh(struct mpage *mpage, const struct roff_meta *meta,
17913a0d07afSschwarze const struct roff_node *n)
1792eea1c63dSschwarze {
1793eea1c63dSschwarze
1794526e306bSschwarze return n->sec == SEC_CUSTOM && n->type == ROFFT_HEAD;
1795eea1c63dSschwarze }
1796eea1c63dSschwarze
1797eea1c63dSschwarze static int
parse_mdoc_head(struct mpage * mpage,const struct roff_meta * meta,const struct roff_node * n)17982a238f45Sschwarze parse_mdoc_head(struct mpage *mpage, const struct roff_meta *meta,
17993a0d07afSschwarze const struct roff_node *n)
1800eea1c63dSschwarze {
1801eea1c63dSschwarze
1802526e306bSschwarze return n->type == ROFFT_HEAD;
1803eea1c63dSschwarze }
1804eea1c63dSschwarze
1805eea1c63dSschwarze /*
1806eea1c63dSschwarze * Add a string to the hash table for the current manual.
1807eea1c63dSschwarze * Each string has a bitmask telling which macros it belongs to.
1808eea1c63dSschwarze * When we finish the manual, we'll dump the table.
1809eea1c63dSschwarze */
1810eea1c63dSschwarze static void
putkeys(const struct mpage * mpage,char * cp,size_t sz,uint64_t v)1811803ae22eSschwarze putkeys(const struct mpage *mpage, char *cp, size_t sz, uint64_t v)
1812eea1c63dSschwarze {
1813cb2bcd5aSschwarze struct ohash *htab;
1814eea1c63dSschwarze struct str *s;
1815eea1c63dSschwarze const char *end;
1816a683219bSschwarze unsigned int slot;
1817803ae22eSschwarze int i, mustfree;
1818eea1c63dSschwarze
1819eea1c63dSschwarze if (0 == sz)
1820eea1c63dSschwarze return;
1821eea1c63dSschwarze
1822803ae22eSschwarze mustfree = render_string(&cp, &sz);
1823803ae22eSschwarze
1824cb2bcd5aSschwarze if (TYPE_Nm & v) {
1825cb2bcd5aSschwarze htab = &names;
1826cb2bcd5aSschwarze v &= name_mask;
1827ac2b0badSschwarze if (v & NAME_FIRST)
1828cb2bcd5aSschwarze name_mask &= ~NAME_FIRST;
1829cb2bcd5aSschwarze if (debug > 1)
1830cb2bcd5aSschwarze say(mpage->mlinks->file,
1831753b3b59Sschwarze "Adding name %*s, bits=0x%llx", (int)sz, cp,
1832753b3b59Sschwarze (unsigned long long)v);
1833cb2bcd5aSschwarze } else {
1834cb2bcd5aSschwarze htab = &strings;
1835cb2bcd5aSschwarze if (debug > 1)
1836ff2dbb0fSschwarze for (i = 0; i < KEY_MAX; i++)
18377c51d50cSschwarze if ((uint64_t)1 << i & v)
1838cb2bcd5aSschwarze say(mpage->mlinks->file,
1839cb2bcd5aSschwarze "Adding key %s=%*s",
184057c6a104Sschwarze mansearch_keynames[i], (int)sz, cp);
1841a683219bSschwarze }
1842a683219bSschwarze
1843eea1c63dSschwarze end = cp + sz;
1844cb2bcd5aSschwarze slot = ohash_qlookupi(htab, cp, &end);
1845cb2bcd5aSschwarze s = ohash_find(htab, slot);
1846eea1c63dSschwarze
1847eea1c63dSschwarze if (NULL != s && mpage == s->mpage) {
1848eea1c63dSschwarze s->mask |= v;
1849eea1c63dSschwarze return;
1850eea1c63dSschwarze } else if (NULL == s) {
18518286bf36Sschwarze s = mandoc_calloc(1, sizeof(struct str) + sz + 1);
1852eea1c63dSschwarze memcpy(s->key, cp, sz);
1853cb2bcd5aSschwarze ohash_insert(htab, slot, s);
1854eea1c63dSschwarze }
1855eea1c63dSschwarze s->mpage = mpage;
1856eea1c63dSschwarze s->mask = v;
1857803ae22eSschwarze
1858803ae22eSschwarze if (mustfree)
1859803ae22eSschwarze free(cp);
1860eea1c63dSschwarze }
1861eea1c63dSschwarze
1862eea1c63dSschwarze /*
1863eea1c63dSschwarze * Take a Unicode codepoint and produce its UTF-8 encoding.
1864eea1c63dSschwarze * This isn't the best way to do this, but it works.
1865eea1c63dSschwarze * The magic numbers are from the UTF-8 packaging.
1866*a83ec176Sschwarze * Read the UTF-8 spec or the utf8(7) manual page for details.
1867eea1c63dSschwarze */
1868eea1c63dSschwarze static size_t
utf8(unsigned int cp,char out[5])1869*a83ec176Sschwarze utf8(unsigned int cp, char out[5])
1870eea1c63dSschwarze {
1871eea1c63dSschwarze size_t rc;
1872eea1c63dSschwarze
1873*a83ec176Sschwarze if (cp <= 0x7f) {
1874eea1c63dSschwarze rc = 1;
1875eea1c63dSschwarze out[0] = (char)cp;
1876*a83ec176Sschwarze } else if (cp <= 0x7ff) {
1877eea1c63dSschwarze rc = 2;
1878eea1c63dSschwarze out[0] = (cp >> 6 & 31) | 192;
1879eea1c63dSschwarze out[1] = (cp & 63) | 128;
1880*a83ec176Sschwarze } else if (cp >= 0xd800 && cp <= 0xdfff) {
1881*a83ec176Sschwarze rc = 0; /* reject UTF-16 surrogate */
1882*a83ec176Sschwarze } else if (cp <= 0xffff) {
1883eea1c63dSschwarze rc = 3;
1884eea1c63dSschwarze out[0] = (cp >> 12 & 15) | 224;
1885eea1c63dSschwarze out[1] = (cp >> 6 & 63) | 128;
1886eea1c63dSschwarze out[2] = (cp & 63) | 128;
1887*a83ec176Sschwarze } else if (cp <= 0x10ffff) {
1888eea1c63dSschwarze rc = 4;
1889eea1c63dSschwarze out[0] = (cp >> 18 & 7) | 240;
1890eea1c63dSschwarze out[1] = (cp >> 12 & 63) | 128;
1891eea1c63dSschwarze out[2] = (cp >> 6 & 63) | 128;
1892eea1c63dSschwarze out[3] = (cp & 63) | 128;
1893eea1c63dSschwarze } else
1894*a83ec176Sschwarze rc = 0;
1895eea1c63dSschwarze
1896eea1c63dSschwarze out[rc] = '\0';
1897526e306bSschwarze return rc;
1898eea1c63dSschwarze }
1899eea1c63dSschwarze
1900eea1c63dSschwarze /*
1901803ae22eSschwarze * If the string contains escape sequences,
1902803ae22eSschwarze * replace it with an allocated rendering and return 1,
1903803ae22eSschwarze * such that the caller can free it after use.
1904803ae22eSschwarze * Otherwise, do nothing and return 0.
1905eea1c63dSschwarze */
1906803ae22eSschwarze static int
render_string(char ** public,size_t * psz)1907803ae22eSschwarze render_string(char **public, size_t *psz)
1908eea1c63dSschwarze {
1909803ae22eSschwarze const char *src, *scp, *addcp, *seq;
1910803ae22eSschwarze char *dst;
1911803ae22eSschwarze size_t ssz, dsz, addsz;
19121281a50cSschwarze char utfbuf[7], res[6];
1913803ae22eSschwarze int seqlen, unicode;
1914eea1c63dSschwarze
1915eea1c63dSschwarze res[0] = '\\';
1916eea1c63dSschwarze res[1] = '\t';
1917eea1c63dSschwarze res[2] = ASCII_NBRSP;
1918eea1c63dSschwarze res[3] = ASCII_HYPH;
19191281a50cSschwarze res[4] = ASCII_BREAK;
19201281a50cSschwarze res[5] = '\0';
1921eea1c63dSschwarze
1922803ae22eSschwarze src = scp = *public;
1923803ae22eSschwarze ssz = *psz;
1924803ae22eSschwarze dst = NULL;
1925803ae22eSschwarze dsz = 0;
1926eea1c63dSschwarze
1927803ae22eSschwarze while (scp < src + *psz) {
1928eea1c63dSschwarze
1929803ae22eSschwarze /* Leave normal characters unchanged. */
1930eea1c63dSschwarze
1931803ae22eSschwarze if (strchr(res, *scp) == NULL) {
1932803ae22eSschwarze if (dst != NULL)
1933803ae22eSschwarze dst[dsz++] = *scp;
1934803ae22eSschwarze scp++;
1935eea1c63dSschwarze continue;
1936803ae22eSschwarze }
1937803ae22eSschwarze
1938803ae22eSschwarze /*
1939803ae22eSschwarze * Found something that requires replacing,
1940803ae22eSschwarze * make sure we have a destination buffer.
1941803ae22eSschwarze */
1942803ae22eSschwarze
1943803ae22eSschwarze if (dst == NULL) {
1944803ae22eSschwarze dst = mandoc_malloc(ssz + 1);
1945803ae22eSschwarze dsz = scp - src;
1946803ae22eSschwarze memcpy(dst, src, dsz);
1947803ae22eSschwarze }
1948803ae22eSschwarze
1949803ae22eSschwarze /* Handle single-char special characters. */
1950803ae22eSschwarze
1951803ae22eSschwarze switch (*scp) {
1952803ae22eSschwarze case '\\':
1953803ae22eSschwarze break;
195449aff9f8Sschwarze case '\t':
195549aff9f8Sschwarze case ASCII_NBRSP:
1956803ae22eSschwarze dst[dsz++] = ' ';
1957803ae22eSschwarze scp++;
1958803ae22eSschwarze continue;
1959803ae22eSschwarze case ASCII_HYPH:
1960803ae22eSschwarze dst[dsz++] = '-';
19611281a50cSschwarze /* FALLTHROUGH */
196249aff9f8Sschwarze case ASCII_BREAK:
1963803ae22eSschwarze scp++;
1964eea1c63dSschwarze continue;
19651281a50cSschwarze default:
1966803ae22eSschwarze abort();
19671281a50cSschwarze }
1968eea1c63dSschwarze
1969eea1c63dSschwarze /*
1970803ae22eSschwarze * Found an escape sequence.
1971803ae22eSschwarze * Read past the slash, then parse it.
1972803ae22eSschwarze * Ignore everything except characters.
1973eea1c63dSschwarze */
1974562403eaSschwarze
1975803ae22eSschwarze scp++;
197670212363Sschwarze switch (mandoc_escape(&scp, &seq, &seqlen)) {
197770212363Sschwarze case ESCAPE_UNICODE:
197870212363Sschwarze unicode = mchars_num2uc(seq + 1, seqlen - 1);
197970212363Sschwarze break;
198070212363Sschwarze case ESCAPE_NUMBERED:
198170212363Sschwarze unicode = mchars_num2char(seq, seqlen);
198270212363Sschwarze break;
198370212363Sschwarze case ESCAPE_SPECIAL:
198470212363Sschwarze unicode = mchars_spec2cp(seq, seqlen);
198570212363Sschwarze break;
198670212363Sschwarze default:
198770212363Sschwarze unicode = -1;
198870212363Sschwarze break;
198970212363Sschwarze }
199070212363Sschwarze if (unicode <= 0)
1991eea1c63dSschwarze continue;
1992eea1c63dSschwarze
1993eea1c63dSschwarze /*
1994562403eaSschwarze * Render the special character
1995562403eaSschwarze * as either UTF-8 or ASCII.
1996eea1c63dSschwarze */
1997562403eaSschwarze
1998562403eaSschwarze if (write_utf8) {
1999803ae22eSschwarze addsz = utf8(unicode, utfbuf);
2000803ae22eSschwarze if (addsz == 0)
2001eea1c63dSschwarze continue;
2002803ae22eSschwarze addcp = utfbuf;
2003562403eaSschwarze } else {
200470212363Sschwarze addcp = mchars_uc2str(unicode);
2005803ae22eSschwarze if (addcp == NULL)
2006562403eaSschwarze continue;
200770212363Sschwarze if (*addcp == ASCII_NBRSP)
2008803ae22eSschwarze addcp = " ";
200970212363Sschwarze addsz = strlen(addcp);
2010562403eaSschwarze }
2011eea1c63dSschwarze
2012eea1c63dSschwarze /* Copy the rendered glyph into the stream. */
2013eea1c63dSschwarze
2014803ae22eSschwarze ssz += addsz;
2015803ae22eSschwarze dst = mandoc_realloc(dst, ssz + 1);
2016803ae22eSschwarze memcpy(dst + dsz, addcp, addsz);
2017803ae22eSschwarze dsz += addsz;
2018803ae22eSschwarze }
2019803ae22eSschwarze if (dst != NULL) {
2020803ae22eSschwarze *public = dst;
2021803ae22eSschwarze *psz = dsz;
2022eea1c63dSschwarze }
2023eea1c63dSschwarze
2024803ae22eSschwarze /* Trim trailing whitespace and NUL-terminate. */
2025803ae22eSschwarze
2026803ae22eSschwarze while (*psz > 0 && (*public)[*psz - 1] == ' ')
2027803ae22eSschwarze --*psz;
2028803ae22eSschwarze if (dst != NULL) {
2029803ae22eSschwarze (*public)[*psz] = '\0';
2030526e306bSschwarze return 1;
2031803ae22eSschwarze } else
2032526e306bSschwarze return 0;
2033eea1c63dSschwarze }
2034eea1c63dSschwarze
2035b493b720Sschwarze static void
dbadd_mlink(const struct mlink * mlink)2036b493b720Sschwarze dbadd_mlink(const struct mlink *mlink)
2037b493b720Sschwarze {
2038ff2dbb0fSschwarze dba_page_alias(mlink->mpage->dba, mlink->name, NAME_FILE);
2039ff2dbb0fSschwarze dba_page_add(mlink->mpage->dba, DBP_SECT, mlink->dsec);
2040ff2dbb0fSschwarze dba_page_add(mlink->mpage->dba, DBP_SECT, mlink->fsec);
2041ff2dbb0fSschwarze dba_page_add(mlink->mpage->dba, DBP_ARCH, mlink->arch);
2042ff2dbb0fSschwarze dba_page_add(mlink->mpage->dba, DBP_FILE, mlink->file);
2043b493b720Sschwarze }
2044b493b720Sschwarze
2045eea1c63dSschwarze /*
2046eea1c63dSschwarze * Flush the current page's terms (and their bits) into the database.
20477bf1e222Sschwarze * Also, handle escape sequences at the last possible moment.
2048eea1c63dSschwarze */
2049eea1c63dSschwarze static void
dbadd(struct dba * dba,struct mpage * mpage)2050ff2dbb0fSschwarze dbadd(struct dba *dba, struct mpage *mpage)
2051eea1c63dSschwarze {
2052eea1c63dSschwarze struct mlink *mlink;
2053eea1c63dSschwarze struct str *key;
2054803ae22eSschwarze char *cp;
2055ff2dbb0fSschwarze uint64_t mask;
2056eea1c63dSschwarze size_t i;
2057eea1c63dSschwarze unsigned int slot;
2058803ae22eSschwarze int mustfree;
2059eea1c63dSschwarze
2060e4570e6cSschwarze mlink = mpage->mlinks;
2061eea1c63dSschwarze
2062e4570e6cSschwarze if (nodb) {
2063fb191a42Sschwarze for (key = ohash_first(&names, &slot); NULL != key;
2064803ae22eSschwarze key = ohash_next(&names, &slot))
2065fb191a42Sschwarze free(key);
2066fb191a42Sschwarze for (key = ohash_first(&strings, &slot); NULL != key;
2067803ae22eSschwarze key = ohash_next(&strings, &slot))
2068fb191a42Sschwarze free(key);
2069dbb14b0bSschwarze if (0 == debug)
2070dbb14b0bSschwarze return;
2071e4570e6cSschwarze while (NULL != mlink) {
2072e4570e6cSschwarze fputs(mlink->name, stdout);
2073e4570e6cSschwarze if (NULL == mlink->next ||
2074e4570e6cSschwarze strcmp(mlink->dsec, mlink->next->dsec) ||
2075e4570e6cSschwarze strcmp(mlink->fsec, mlink->next->fsec) ||
2076e4570e6cSschwarze strcmp(mlink->arch, mlink->next->arch)) {
2077e4570e6cSschwarze putchar('(');
2078e4570e6cSschwarze if ('\0' == *mlink->dsec)
2079e4570e6cSschwarze fputs(mlink->fsec, stdout);
2080e4570e6cSschwarze else
2081e4570e6cSschwarze fputs(mlink->dsec, stdout);
2082e4570e6cSschwarze if ('\0' != *mlink->arch)
2083e4570e6cSschwarze printf("/%s", mlink->arch);
2084e4570e6cSschwarze putchar(')');
2085e4570e6cSschwarze }
2086e4570e6cSschwarze mlink = mlink->next;
2087e4570e6cSschwarze if (NULL != mlink)
2088e4570e6cSschwarze fputs(", ", stdout);
2089e4570e6cSschwarze }
2090d16c09f4Sschwarze printf(" - %s\n", mpage->desc);
2091eea1c63dSschwarze return;
2092e4570e6cSschwarze }
2093e4570e6cSschwarze
2094e4570e6cSschwarze if (debug)
2095e4570e6cSschwarze say(mlink->file, "Adding to database");
2096eea1c63dSschwarze
2097803ae22eSschwarze cp = mpage->desc;
2098803ae22eSschwarze i = strlen(cp);
2099803ae22eSschwarze mustfree = render_string(&cp, &i);
2100289fdc1aSschwarze mpage->dba = dba_page_new(dba->pages,
2101ff2dbb0fSschwarze *mpage->arch == '\0' ? mlink->arch : mpage->arch,
2102ff2dbb0fSschwarze cp, mlink->file, mpage->form);
2103803ae22eSschwarze if (mustfree)
2104803ae22eSschwarze free(cp);
2105289fdc1aSschwarze dba_page_add(mpage->dba, DBP_SECT, mpage->sec);
2106d6b43e11Sschwarze
2107ff2dbb0fSschwarze while (mlink != NULL) {
2108b493b720Sschwarze dbadd_mlink(mlink);
2109e4570e6cSschwarze mlink = mlink->next;
2110e4570e6cSschwarze }
2111eea1c63dSschwarze
2112cb2bcd5aSschwarze for (key = ohash_first(&names, &slot); NULL != key;
2113cb2bcd5aSschwarze key = ohash_next(&names, &slot)) {
2114cb2bcd5aSschwarze assert(key->mpage == mpage);
2115ff2dbb0fSschwarze dba_page_alias(mpage->dba, key->key, key->mask);
2116cb2bcd5aSschwarze free(key);
2117cb2bcd5aSschwarze }
2118eea1c63dSschwarze for (key = ohash_first(&strings, &slot); NULL != key;
2119eea1c63dSschwarze key = ohash_next(&strings, &slot)) {
2120eea1c63dSschwarze assert(key->mpage == mpage);
2121ff2dbb0fSschwarze i = 0;
2122ff2dbb0fSschwarze for (mask = TYPE_Xr; mask <= TYPE_Lb; mask *= 2) {
2123ff2dbb0fSschwarze if (key->mask & mask)
2124ff2dbb0fSschwarze dba_macro_add(dba->macros, i,
2125ff2dbb0fSschwarze key->key, mpage->dba);
2126ff2dbb0fSschwarze i++;
2127ff2dbb0fSschwarze }
2128eea1c63dSschwarze free(key);
2129eea1c63dSschwarze }
2130eea1c63dSschwarze }
2131eea1c63dSschwarze
2132eea1c63dSschwarze static void
dbprune(struct dba * dba)2133ff2dbb0fSschwarze dbprune(struct dba *dba)
2134eea1c63dSschwarze {
2135ff2dbb0fSschwarze struct dba_array *page, *files;
2136ff2dbb0fSschwarze char *file;
2137eea1c63dSschwarze
2138ff2dbb0fSschwarze dba_array_FOREACH(dba->pages, page) {
2139ff2dbb0fSschwarze files = dba_array_get(page, DBP_FILE);
2140ff2dbb0fSschwarze dba_array_FOREACH(files, file) {
2141ff2dbb0fSschwarze if (*file < ' ')
2142ff2dbb0fSschwarze file++;
2143ff2dbb0fSschwarze if (ohash_find(&mlinks, ohash_qlookup(&mlinks,
2144ff2dbb0fSschwarze file)) != NULL) {
2145dada979aSschwarze if (debug)
2146ff2dbb0fSschwarze say(file, "Deleting from database");
2147ff2dbb0fSschwarze dba_array_del(dba->pages);
2148ff2dbb0fSschwarze break;
2149eea1c63dSschwarze }
2150eea1c63dSschwarze }
2151ff2dbb0fSschwarze }
215277dd9b85Sschwarze }
215377dd9b85Sschwarze
2154eea1c63dSschwarze /*
2155ff2dbb0fSschwarze * Write the database from memory to disk.
2156eea1c63dSschwarze */
2157eea1c63dSschwarze static void
dbwrite(struct dba * dba)2158ff2dbb0fSschwarze dbwrite(struct dba *dba)
2159eea1c63dSschwarze {
21601ed2eb27Sschwarze struct stat sb1, sb2;
21611ed2eb27Sschwarze char tfn[33], *cp1, *cp2;
21621ed2eb27Sschwarze off_t i;
21631ed2eb27Sschwarze int fd1, fd2;
2164eea1c63dSschwarze
216595d9e8b1Sschwarze /*
216695d9e8b1Sschwarze * Do not write empty databases, and delete existing ones
216795d9e8b1Sschwarze * when makewhatis -u causes them to become empty.
216895d9e8b1Sschwarze */
216995d9e8b1Sschwarze
217095d9e8b1Sschwarze dba_array_start(dba->pages);
217195d9e8b1Sschwarze if (dba_array_next(dba->pages) == NULL) {
2172ea5b0906Sschwarze if (unlink(MANDOC_DB) == -1 && errno != ENOENT)
217395d9e8b1Sschwarze say(MANDOC_DB, "&unlink");
217495d9e8b1Sschwarze return;
217595d9e8b1Sschwarze }
217695d9e8b1Sschwarze
217795d9e8b1Sschwarze /*
217895d9e8b1Sschwarze * Build the database in a temporary file,
217995d9e8b1Sschwarze * then atomically move it into place.
218095d9e8b1Sschwarze */
218195d9e8b1Sschwarze
2182ff2dbb0fSschwarze if (dba_write(MANDOC_DB "~", dba) != -1) {
2183ff2dbb0fSschwarze if (rename(MANDOC_DB "~", MANDOC_DB) == -1) {
2184eea1c63dSschwarze exitcode = (int)MANDOCLEVEL_SYSERR;
2185d007b464Sschwarze say(MANDOC_DB, "&rename");
2186ff2dbb0fSschwarze unlink(MANDOC_DB "~");
2187bfc59dbbSschwarze }
2188bfc59dbbSschwarze return;
2189bfc59dbbSschwarze }
2190bfc59dbbSschwarze
219195d9e8b1Sschwarze /*
219295d9e8b1Sschwarze * We lack write permission and cannot replace the database
219395d9e8b1Sschwarze * file, but let's at least check whether the data changed.
219495d9e8b1Sschwarze */
219595d9e8b1Sschwarze
2196ff2dbb0fSschwarze (void)strlcpy(tfn, "/tmp/mandocdb.XXXXXXXX", sizeof(tfn));
2197ff2dbb0fSschwarze if (mkdtemp(tfn) == NULL) {
2198ff2dbb0fSschwarze exitcode = (int)MANDOCLEVEL_SYSERR;
2199ff2dbb0fSschwarze say("", "&%s", tfn);
2200ff2dbb0fSschwarze return;
2201ff2dbb0fSschwarze }
220285087438Sschwarze cp1 = cp2 = MAP_FAILED;
22031ed2eb27Sschwarze fd1 = fd2 = -1;
2204ff2dbb0fSschwarze (void)strlcat(tfn, "/" MANDOC_DB, sizeof(tfn));
2205ff2dbb0fSschwarze if (dba_write(tfn, dba) == -1) {
2206ff2dbb0fSschwarze say(tfn, "&dba_write");
22071ed2eb27Sschwarze goto err;
22081ed2eb27Sschwarze }
2209b7041c07Sderaadt if ((fd1 = open(MANDOC_DB, O_RDONLY)) == -1) {
22101ed2eb27Sschwarze say(MANDOC_DB, "&open");
22111ed2eb27Sschwarze goto err;
22121ed2eb27Sschwarze }
2213b7041c07Sderaadt if ((fd2 = open(tfn, O_RDONLY)) == -1) {
22141ed2eb27Sschwarze say(tfn, "&open");
22151ed2eb27Sschwarze goto err;
22161ed2eb27Sschwarze }
22171ed2eb27Sschwarze if (fstat(fd1, &sb1) == -1) {
22181ed2eb27Sschwarze say(MANDOC_DB, "&fstat");
22191ed2eb27Sschwarze goto err;
22201ed2eb27Sschwarze }
22211ed2eb27Sschwarze if (fstat(fd2, &sb2) == -1) {
22221ed2eb27Sschwarze say(tfn, "&fstat");
22231ed2eb27Sschwarze goto err;
22241ed2eb27Sschwarze }
22251ed2eb27Sschwarze if (sb1.st_size != sb2.st_size)
22261ed2eb27Sschwarze goto err;
22271ed2eb27Sschwarze if ((cp1 = mmap(NULL, sb1.st_size, PROT_READ, MAP_PRIVATE,
222885087438Sschwarze fd1, 0)) == MAP_FAILED) {
22291ed2eb27Sschwarze say(MANDOC_DB, "&mmap");
22301ed2eb27Sschwarze goto err;
22311ed2eb27Sschwarze }
22321ed2eb27Sschwarze if ((cp2 = mmap(NULL, sb2.st_size, PROT_READ, MAP_PRIVATE,
223385087438Sschwarze fd2, 0)) == MAP_FAILED) {
22341ed2eb27Sschwarze say(tfn, "&mmap");
22351ed2eb27Sschwarze goto err;
22361ed2eb27Sschwarze }
22371ed2eb27Sschwarze for (i = 0; i < sb1.st_size; i++)
22381ed2eb27Sschwarze if (cp1[i] != cp2[i])
22391ed2eb27Sschwarze goto err;
2240ff2dbb0fSschwarze goto out;
2241ff2dbb0fSschwarze
22421ed2eb27Sschwarze err:
2243bfc59dbbSschwarze exitcode = (int)MANDOCLEVEL_SYSERR;
22441ed2eb27Sschwarze say(MANDOC_DB, "Data changed, but cannot replace database");
2245bfc59dbbSschwarze
2246ff2dbb0fSschwarze out:
224785087438Sschwarze if (cp1 != MAP_FAILED)
22481ed2eb27Sschwarze munmap(cp1, sb1.st_size);
224985087438Sschwarze if (cp2 != MAP_FAILED)
22501ed2eb27Sschwarze munmap(cp2, sb2.st_size);
22511ed2eb27Sschwarze if (fd1 != -1)
22521ed2eb27Sschwarze close(fd1);
22531ed2eb27Sschwarze if (fd2 != -1)
22541ed2eb27Sschwarze close(fd2);
2255458b6f49Sschwarze unlink(tfn);
2256ff2dbb0fSschwarze *strrchr(tfn, '/') = '\0';
2257458b6f49Sschwarze rmdir(tfn);
2258eea1c63dSschwarze }
2259eea1c63dSschwarze
2260eea1c63dSschwarze static int
set_basedir(const char * targetdir,int report_baddir)2261bf40fec9Sschwarze set_basedir(const char *targetdir, int report_baddir)
2262eea1c63dSschwarze {
2263eea1c63dSschwarze static char startdir[PATH_MAX];
2264fbc96d89Sschwarze static int getcwd_status; /* 1 = ok, 2 = failure */
2265fbc96d89Sschwarze static int chdir_status; /* 1 = changed directory */
2266eea1c63dSschwarze
2267eea1c63dSschwarze /*
2268fbc96d89Sschwarze * Remember the original working directory, if possible.
2269fbc96d89Sschwarze * This will be needed if the second or a later directory
2270fbc96d89Sschwarze * on the command line is given as a relative path.
2271fbc96d89Sschwarze * Do not error out if the current directory is not
2272fbc96d89Sschwarze * searchable: Maybe it won't be needed after all.
2273eea1c63dSschwarze */
2274fa397007Sschwarze if (getcwd_status == 0) {
2275fa397007Sschwarze if (getcwd(startdir, sizeof(startdir)) == NULL) {
2276fbc96d89Sschwarze getcwd_status = 2;
2277fbc96d89Sschwarze (void)strlcpy(startdir, strerror(errno),
2278fbc96d89Sschwarze sizeof(startdir));
2279fbc96d89Sschwarze } else
2280fbc96d89Sschwarze getcwd_status = 1;
2281fbc96d89Sschwarze }
2282fbc96d89Sschwarze
2283fbc96d89Sschwarze /*
2284fbc96d89Sschwarze * We are leaving the old base directory.
2285fbc96d89Sschwarze * Do not use it any longer, not even for messages.
2286fbc96d89Sschwarze */
2287fbc96d89Sschwarze *basedir = '\0';
2288fa397007Sschwarze basedir_len = 0;
2289fbc96d89Sschwarze
2290fbc96d89Sschwarze /*
2291fbc96d89Sschwarze * If and only if the directory was changed earlier and
2292fbc96d89Sschwarze * the next directory to process is given as a relative path,
2293fbc96d89Sschwarze * first go back, or bail out if that is impossible.
2294fbc96d89Sschwarze */
2295fa397007Sschwarze if (chdir_status && *targetdir != '/') {
2296fa397007Sschwarze if (getcwd_status == 2) {
2297eea1c63dSschwarze exitcode = (int)MANDOCLEVEL_SYSERR;
2298fbc96d89Sschwarze say("", "getcwd: %s", startdir);
2299526e306bSschwarze return 0;
2300eea1c63dSschwarze }
2301fa397007Sschwarze if (chdir(startdir) == -1) {
2302eea1c63dSschwarze exitcode = (int)MANDOCLEVEL_SYSERR;
2303d007b464Sschwarze say("", "&chdir %s", startdir);
2304526e306bSschwarze return 0;
2305eea1c63dSschwarze }
2306eea1c63dSschwarze }
2307fbc96d89Sschwarze
2308fbc96d89Sschwarze /*
2309fbc96d89Sschwarze * Always resolve basedir to the canonicalized absolute
2310fbc96d89Sschwarze * pathname and append a trailing slash, such that
2311fbc96d89Sschwarze * we can reliably check whether files are inside.
2312fbc96d89Sschwarze */
2313fa397007Sschwarze if (realpath(targetdir, basedir) == NULL) {
2314bf40fec9Sschwarze if (report_baddir || errno != ENOENT) {
2315eea1c63dSschwarze exitcode = (int)MANDOCLEVEL_BADARG;
2316d007b464Sschwarze say("", "&%s: realpath", targetdir);
2317bf40fec9Sschwarze }
2318fa397007Sschwarze *basedir = '\0';
2319526e306bSschwarze return 0;
2320fa397007Sschwarze } else if (chdir(basedir) == -1) {
2321bf40fec9Sschwarze if (report_baddir || errno != ENOENT) {
2322eea1c63dSschwarze exitcode = (int)MANDOCLEVEL_BADARG;
2323d007b464Sschwarze say("", "&chdir");
2324bf40fec9Sschwarze }
2325fa397007Sschwarze *basedir = '\0';
2326526e306bSschwarze return 0;
2327eea1c63dSschwarze }
2328fbc96d89Sschwarze chdir_status = 1;
2329fa397007Sschwarze basedir_len = strlen(basedir);
2330fa397007Sschwarze if (basedir[basedir_len - 1] != '/') {
2331fa397007Sschwarze if (basedir_len >= PATH_MAX - 1) {
233276c93bc9Sschwarze exitcode = (int)MANDOCLEVEL_SYSERR;
233376c93bc9Sschwarze say("", "Filename too long");
2334fa397007Sschwarze *basedir = '\0';
2335fa397007Sschwarze basedir_len = 0;
2336526e306bSschwarze return 0;
233776c93bc9Sschwarze }
2338fa397007Sschwarze basedir[basedir_len++] = '/';
2339fa397007Sschwarze basedir[basedir_len] = '\0';
234076c93bc9Sschwarze }
2341526e306bSschwarze return 1;
2342eea1c63dSschwarze }
2343eea1c63dSschwarze
2344eea1c63dSschwarze static void
say(const char * file,const char * format,...)2345eea1c63dSschwarze say(const char *file, const char *format, ...)
2346eea1c63dSschwarze {
2347eea1c63dSschwarze va_list ap;
2348d007b464Sschwarze int use_errno;
2349eea1c63dSschwarze
2350fa397007Sschwarze if (*basedir != '\0')
2351eea1c63dSschwarze fprintf(stderr, "%s", basedir);
2352fa397007Sschwarze if (*basedir != '\0' && *file != '\0')
2353fbc96d89Sschwarze fputc('/', stderr);
2354fa397007Sschwarze if (*file != '\0')
2355eea1c63dSschwarze fprintf(stderr, "%s", file);
2356eea1c63dSschwarze
2357d007b464Sschwarze use_errno = 1;
2358fa397007Sschwarze if (format != NULL) {
2359d007b464Sschwarze switch (*format) {
236049aff9f8Sschwarze case '&':
2361d007b464Sschwarze format++;
2362d007b464Sschwarze break;
236349aff9f8Sschwarze case '\0':
2364d007b464Sschwarze format = NULL;
2365d007b464Sschwarze break;
2366d007b464Sschwarze default:
2367d007b464Sschwarze use_errno = 0;
2368d007b464Sschwarze break;
2369eea1c63dSschwarze }
2370d007b464Sschwarze }
2371fa397007Sschwarze if (format != NULL) {
2372fa397007Sschwarze if (*basedir != '\0' || *file != '\0')
2373d007b464Sschwarze fputs(": ", stderr);
2374eea1c63dSschwarze va_start(ap, format);
2375eea1c63dSschwarze vfprintf(stderr, format, ap);
2376eea1c63dSschwarze va_end(ap);
2377d007b464Sschwarze }
2378d007b464Sschwarze if (use_errno) {
2379fa397007Sschwarze if (*basedir != '\0' || *file != '\0' || format != NULL)
2380d007b464Sschwarze fputs(": ", stderr);
2381d007b464Sschwarze perror(NULL);
2382d007b464Sschwarze } else
2383eea1c63dSschwarze fputc('\n', stderr);
2384ae144658Sschwarze }
2385