xref: /openbsd-src/usr.bin/mandoc/main.c (revision 43003dfe3ad45d1698bed8a37f2b0f5b14f20d4f)
1 /*	$Id: main.c,v 1.16 2009/09/21 20:57:57 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <sys/stat.h>
18 
19 #include <assert.h>
20 #include <err.h>
21 #include <fcntl.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26 
27 #include "mdoc.h"
28 #include "man.h"
29 
30 typedef	void		(*out_mdoc)(void *, const struct mdoc *);
31 typedef	void		(*out_man)(void *, const struct man *);
32 typedef	void		(*out_free)(void *);
33 
34 struct	buf {
35 	char	 	 *buf;
36 	size_t		  sz;
37 };
38 
39 enum	intt {
40 	INTT_AUTO,
41 	INTT_MDOC,
42 	INTT_MAN
43 };
44 
45 enum	outt {
46 	OUTT_ASCII = 0,
47 	OUTT_TREE,
48 	OUTT_LINT
49 };
50 
51 struct	curparse {
52 	const char	 *file;		/* Current parse. */
53 	int		  fd;		/* Current parse. */
54 	int		  wflags;
55 #define	WARN_WALL	 (1 << 0)	/* All-warnings mask. */
56 #define	WARN_WERR	 (1 << 2)	/* Warnings->errors. */
57 	int		  fflags;
58 #define	IGN_SCOPE	 (1 << 0) 	/* Ignore scope errors. */
59 #define	NO_IGN_ESCAPE	 (1 << 1) 	/* Don't ignore bad escapes. */
60 #define	NO_IGN_MACRO	 (1 << 2) 	/* Don't ignore bad macros. */
61 #define	NO_IGN_CHARS	 (1 << 3)	/* Don't ignore bad chars. */
62 #define	IGN_ERRORS	 (1 << 4)	/* Ignore failed parse. */
63 	enum intt	  inttype;	/* Input parsers... */
64 	struct man	 *man;
65 	struct man	 *lastman;
66 	struct mdoc	 *mdoc;
67 	struct mdoc	 *lastmdoc;
68 	enum outt	  outtype;	/* Output devices... */
69 	out_mdoc	  outmdoc;
70 	out_man	  	  outman;
71 	out_free	  outfree;
72 	void		 *outdata;
73 };
74 
75 extern	void		 *ascii_alloc(void);
76 extern	void		  tree_mdoc(void *, const struct mdoc *);
77 extern	void		  tree_man(void *, const struct man *);
78 extern	void		  terminal_mdoc(void *, const struct mdoc *);
79 extern	void		  terminal_man(void *, const struct man *);
80 extern	void		  terminal_free(void *);
81 
82 static	int		  foptions(int *, char *);
83 static	int		  toptions(enum outt *, char *);
84 static	int		  moptions(enum intt *, char *);
85 static	int		  woptions(int *, char *);
86 static	int		  merr(void *, int, int, const char *);
87 static	int		  mwarn(void *, int, int, const char *);
88 static	int		  ffile(struct buf *, struct buf *,
89 				const char *, struct curparse *);
90 static	int		  fdesc(struct buf *, struct buf *,
91 				struct curparse *);
92 static	int		  pset(const char *, int, struct curparse *,
93 				struct man **, struct mdoc **);
94 static	struct man	 *man_init(struct curparse *);
95 static	struct mdoc	 *mdoc_init(struct curparse *);
96 __dead	static void	  version(void);
97 __dead	static void	  usage(void);
98 
99 extern	char		 *__progname;
100 
101 
102 int
103 main(int argc, char *argv[])
104 {
105 	int		 c, rc;
106 	struct buf	 ln, blk;
107 	struct curparse	 curp;
108 
109 	bzero(&curp, sizeof(struct curparse));
110 
111 	curp.inttype = INTT_AUTO;
112 	curp.outtype = OUTT_ASCII;
113 
114 	/* LINTED */
115 	while (-1 != (c = getopt(argc, argv, "f:m:VW:T:")))
116 		switch (c) {
117 		case ('f'):
118 			if ( ! foptions(&curp.fflags, optarg))
119 				return(EXIT_FAILURE);
120 			break;
121 		case ('m'):
122 			if ( ! moptions(&curp.inttype, optarg))
123 				return(EXIT_FAILURE);
124 			break;
125 		case ('T'):
126 			if ( ! toptions(&curp.outtype, optarg))
127 				return(EXIT_FAILURE);
128 			break;
129 		case ('W'):
130 			if ( ! woptions(&curp.wflags, optarg))
131 				return(EXIT_FAILURE);
132 			break;
133 		case ('V'):
134 			version();
135 			/* NOTREACHED */
136 		default:
137 			usage();
138 			/* NOTREACHED */
139 		}
140 
141 	argc -= optind;
142 	argv += optind;
143 
144 	bzero(&ln, sizeof(struct buf));
145 	bzero(&blk, sizeof(struct buf));
146 
147 	rc = 1;
148 
149 	if (NULL == *argv) {
150 		curp.file = "<stdin>";
151 		curp.fd = STDIN_FILENO;
152 
153 		c = fdesc(&blk, &ln, &curp);
154 		if ( ! (IGN_ERRORS & curp.fflags))
155 			rc = 1 == c ? 1 : 0;
156 		else
157 			rc = -1 == c ? 0 : 1;
158 	}
159 
160 	while (rc && *argv) {
161 		c = ffile(&blk, &ln, *argv, &curp);
162 		if ( ! (IGN_ERRORS & curp.fflags))
163 			rc = 1 == c ? 1 : 0;
164 		else
165 			rc = -1 == c ? 0 : 1;
166 
167 		argv++;
168 		if (*argv && rc) {
169 			if (curp.lastman)
170 				if ( ! man_reset(curp.lastman))
171 					rc = 0;
172 			if (curp.lastmdoc)
173 				if ( ! mdoc_reset(curp.lastmdoc))
174 					rc = 0;
175 			curp.lastman = NULL;
176 			curp.lastmdoc = NULL;
177 		}
178 	}
179 
180 	if (blk.buf)
181 		free(blk.buf);
182 	if (ln.buf)
183 		free(ln.buf);
184 	if (curp.outfree)
185 		(*curp.outfree)(curp.outdata);
186 	if (curp.mdoc)
187 		mdoc_free(curp.mdoc);
188 	if (curp.man)
189 		man_free(curp.man);
190 
191 	return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
192 }
193 
194 
195 __dead static void
196 version(void)
197 {
198 
199 	(void)printf("%s %s\n", __progname, VERSION);
200 	exit(EXIT_SUCCESS);
201 }
202 
203 
204 __dead static void
205 usage(void)
206 {
207 
208 	(void)fprintf(stderr, "usage: %s [-V] [-foption...] "
209 			"[-mformat] [-Toutput] [-Werr...]\n",
210 			__progname);
211 	exit(EXIT_FAILURE);
212 }
213 
214 
215 static struct man *
216 man_init(struct curparse *curp)
217 {
218 	int		 pflags;
219 	struct man	*man;
220 	struct man_cb	 mancb;
221 
222 	mancb.man_err = merr;
223 	mancb.man_warn = mwarn;
224 
225 	/* Defaults from mandoc.1. */
226 
227 	pflags = MAN_IGN_MACRO | MAN_IGN_ESCAPE | MAN_IGN_CHARS;
228 
229 	if (curp->fflags & NO_IGN_MACRO)
230 		pflags &= ~MAN_IGN_MACRO;
231 	if (curp->fflags & NO_IGN_CHARS)
232 		pflags &= ~MAN_IGN_CHARS;
233 	if (curp->fflags & NO_IGN_ESCAPE)
234 		pflags &= ~MAN_IGN_ESCAPE;
235 
236 	if (NULL == (man = man_alloc(curp, pflags, &mancb)))
237 		warnx("memory exhausted");
238 
239 	return(man);
240 }
241 
242 
243 static struct mdoc *
244 mdoc_init(struct curparse *curp)
245 {
246 	int		 pflags;
247 	struct mdoc	*mdoc;
248 	struct mdoc_cb	 mdoccb;
249 
250 	mdoccb.mdoc_err = merr;
251 	mdoccb.mdoc_warn = mwarn;
252 
253 	/* Defaults from mandoc.1. */
254 
255 	pflags = MDOC_IGN_MACRO | MDOC_IGN_ESCAPE | MDOC_IGN_CHARS;
256 
257 	if (curp->fflags & IGN_SCOPE)
258 		pflags |= MDOC_IGN_SCOPE;
259 	if (curp->fflags & NO_IGN_ESCAPE)
260 		pflags &= ~MDOC_IGN_ESCAPE;
261 	if (curp->fflags & NO_IGN_MACRO)
262 		pflags &= ~MDOC_IGN_MACRO;
263 	if (curp->fflags & NO_IGN_CHARS)
264 		pflags &= ~MDOC_IGN_CHARS;
265 
266 	if (NULL == (mdoc = mdoc_alloc(curp, pflags, &mdoccb)))
267 		warnx("memory exhausted");
268 
269 	return(mdoc);
270 }
271 
272 
273 static int
274 ffile(struct buf *blk, struct buf *ln,
275 		const char *file, struct curparse *curp)
276 {
277 	int		 c;
278 
279 	curp->file = file;
280 	if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) {
281 		warn("%s", curp->file);
282 		return(-1);
283 	}
284 
285 	c = fdesc(blk, ln, curp);
286 
287 	if (-1 == close(curp->fd))
288 		warn("%s", curp->file);
289 
290 	return(c);
291 }
292 
293 
294 static int
295 fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
296 {
297 	size_t		 sz;
298 	ssize_t		 ssz;
299 	struct stat	 st;
300 	int		 j, i, pos, lnn, comment;
301 	struct man	*man;
302 	struct mdoc	*mdoc;
303 
304 	sz = BUFSIZ;
305 	man = NULL;
306 	mdoc = NULL;
307 
308 	/*
309 	 * Two buffers: ln and buf.  buf is the input buffer optimised
310 	 * here for each file's block size.  ln is a line buffer.  Both
311 	 * growable, hence passed in by ptr-ptr.
312 	 */
313 
314 	if (-1 == fstat(curp->fd, &st))
315 		warn("%s", curp->file);
316 	else if ((size_t)st.st_blksize > sz)
317 		sz = st.st_blksize;
318 
319 	if (sz > blk->sz) {
320 		blk->buf = realloc(blk->buf, sz);
321 		if (NULL == blk->buf) {
322 			warn("realloc");
323 			return(-1);
324 		}
325 		blk->sz = sz;
326 	}
327 
328 	/* Fill buf with file blocksize. */
329 
330 	for (lnn = pos = comment = 0; ; ) {
331 		if (-1 == (ssz = read(curp->fd, blk->buf, sz))) {
332 			warn("%s", curp->file);
333 			return(-1);
334 		} else if (0 == ssz)
335 			break;
336 
337 		/* Parse the read block into partial or full lines. */
338 
339 		for (i = 0; i < (int)ssz; i++) {
340 			if (pos >= (int)ln->sz) {
341 				ln->sz += 256; /* Step-size. */
342 				ln->buf = realloc(ln->buf, ln->sz);
343 				if (NULL == ln->buf) {
344 					warn("realloc");
345 					return(-1);
346 				}
347 			}
348 
349 			if ('\n' != blk->buf[i]) {
350 				if (comment)
351 					continue;
352 				ln->buf[pos++] = blk->buf[i];
353 
354 				/* Handle in-line `\"' comments. */
355 
356 				if (1 == pos || '\"' != ln->buf[pos - 1])
357 					continue;
358 
359 				for (j = pos - 2; j >= 0; j--)
360 					if ('\\' != ln->buf[j])
361 						break;
362 
363 				if ( ! ((pos - 2 - j) % 2))
364 					continue;
365 
366 				comment = 1;
367 				pos -= 2;
368 				continue;
369 			}
370 
371 			/* Handle escaped `\\n' newlines. */
372 
373 			if (pos > 0 && 0 == comment &&
374 					'\\' == ln->buf[pos - 1]) {
375 				for (j = pos - 1; j >= 0; j--)
376 					if ('\\' != ln->buf[j])
377 						break;
378 				if ( ! ((pos - j) % 2)) {
379 					pos--;
380 					lnn++;
381 					continue;
382 				}
383 			}
384 
385 			ln->buf[pos] = 0;
386 			lnn++;
387 
388 			/* If unset, assign parser in pset(). */
389 
390 			if ( ! (man || mdoc) && ! pset(ln->buf,
391 						pos, curp, &man, &mdoc))
392 				return(-1);
393 
394 			pos = comment = 0;
395 
396 			/* Pass down into parsers. */
397 
398 			if (man && ! man_parseln(man, lnn, ln->buf))
399 				return(0);
400 			if (mdoc && ! mdoc_parseln(mdoc, lnn, ln->buf))
401 				return(0);
402 		}
403 	}
404 
405 	/* NOTE a parser may not have been assigned, yet. */
406 
407 	if ( ! (man || mdoc)) {
408 		(void)fprintf(stderr, "%s: not a manual\n",
409 				curp->file);
410 		return(0);
411 	}
412 
413 	if (mdoc && ! mdoc_endparse(mdoc))
414 		return(0);
415 	if (man && ! man_endparse(man))
416 		return(0);
417 
418 	/* If unset, allocate output dev now (if applicable). */
419 
420 	if ( ! (curp->outman && curp->outmdoc)) {
421 		switch (curp->outtype) {
422 		case (OUTT_TREE):
423 			curp->outman = tree_man;
424 			curp->outmdoc = tree_mdoc;
425 			break;
426 		case (OUTT_LINT):
427 			break;
428 		default:
429 			curp->outdata = ascii_alloc();
430 			curp->outman = terminal_man;
431 			curp->outmdoc = terminal_mdoc;
432 			curp->outfree = terminal_free;
433 			break;
434 		}
435 	}
436 
437 	/* Execute the out device, if it exists. */
438 
439 	if (man && curp->outman)
440 		(*curp->outman)(curp->outdata, man);
441 	if (mdoc && curp->outmdoc)
442 		(*curp->outmdoc)(curp->outdata, mdoc);
443 
444 	return(1);
445 }
446 
447 
448 static int
449 pset(const char *buf, int pos, struct curparse *curp,
450 		struct man **man, struct mdoc **mdoc)
451 {
452 	int		 i;
453 
454 	/*
455 	 * Try to intuit which kind of manual parser should be used.  If
456 	 * passed in by command-line (-man, -mdoc), then use that
457 	 * explicitly.  If passed as -mandoc, then try to guess from the
458 	 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
459 	 * default to -man, which is more lenient.
460 	 */
461 
462 	if (buf[0] == '.') {
463 		for (i = 1; buf[i]; i++)
464 			if (' ' != buf[i] && '\t' != buf[i])
465 				break;
466 		if (0 == buf[i])
467 			return(1);
468 	}
469 
470 	switch (curp->inttype) {
471 	case (INTT_MDOC):
472 		if (NULL == curp->mdoc)
473 			curp->mdoc = mdoc_init(curp);
474 		if (NULL == (*mdoc = curp->mdoc))
475 			return(0);
476 		curp->lastmdoc = *mdoc;
477 		return(1);
478 	case (INTT_MAN):
479 		if (NULL == curp->man)
480 			curp->man = man_init(curp);
481 		if (NULL == (*man = curp->man))
482 			return(0);
483 		curp->lastman = *man;
484 		return(1);
485 	default:
486 		break;
487 	}
488 
489 	if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3))  {
490 		if (NULL == curp->mdoc)
491 			curp->mdoc = mdoc_init(curp);
492 		if (NULL == (*mdoc = curp->mdoc))
493 			return(0);
494 		curp->lastmdoc = *mdoc;
495 		return(1);
496 	}
497 
498 	if (NULL == curp->man)
499 		curp->man = man_init(curp);
500 	if (NULL == (*man = curp->man))
501 		return(0);
502 	curp->lastman = *man;
503 	return(1);
504 }
505 
506 
507 static int
508 moptions(enum intt *tflags, char *arg)
509 {
510 
511 	if (0 == strcmp(arg, "doc"))
512 		*tflags = INTT_MDOC;
513 	else if (0 == strcmp(arg, "andoc"))
514 		*tflags = INTT_AUTO;
515 	else if (0 == strcmp(arg, "an"))
516 		*tflags = INTT_MAN;
517 	else {
518 		warnx("bad argument: -m%s", arg);
519 		return(0);
520 	}
521 
522 	return(1);
523 }
524 
525 
526 static int
527 toptions(enum outt *tflags, char *arg)
528 {
529 
530 	if (0 == strcmp(arg, "ascii"))
531 		*tflags = OUTT_ASCII;
532 	else if (0 == strcmp(arg, "lint"))
533 		*tflags = OUTT_LINT;
534 	else if (0 == strcmp(arg, "tree"))
535 		*tflags = OUTT_TREE;
536 	else {
537 		warnx("bad argument: -T%s", arg);
538 		return(0);
539 	}
540 
541 	return(1);
542 }
543 
544 
545 static int
546 foptions(int *fflags, char *arg)
547 {
548 	char		*v, *o;
549 	char		*toks[7];
550 
551 	toks[0] = "ign-scope";
552 	toks[1] = "no-ign-escape";
553 	toks[2] = "no-ign-macro";
554 	toks[3] = "no-ign-chars";
555 	toks[4] = "ign-errors";
556 	toks[5] = "strict";
557 	toks[6] = NULL;
558 
559 	while (*arg) {
560 		o = arg;
561 		switch (getsubopt(&arg, toks, &v)) {
562 		case (0):
563 			*fflags |= IGN_SCOPE;
564 			break;
565 		case (1):
566 			*fflags |= NO_IGN_ESCAPE;
567 			break;
568 		case (2):
569 			*fflags |= NO_IGN_MACRO;
570 			break;
571 		case (3):
572 			*fflags |= NO_IGN_CHARS;
573 			break;
574 		case (4):
575 			*fflags |= IGN_ERRORS;
576 			break;
577 		case (5):
578 			*fflags |= NO_IGN_ESCAPE |
579 			 	   NO_IGN_MACRO | NO_IGN_CHARS;
580 			break;
581 		default:
582 			warnx("bad argument: -f%s", o);
583 			return(0);
584 		}
585 	}
586 
587 	return(1);
588 }
589 
590 
591 static int
592 woptions(int *wflags, char *arg)
593 {
594 	char		*v, *o;
595 	char		*toks[3];
596 
597 	toks[0] = "all";
598 	toks[1] = "error";
599 	toks[2] = NULL;
600 
601 	while (*arg) {
602 		o = arg;
603 		switch (getsubopt(&arg, toks, &v)) {
604 		case (0):
605 			*wflags |= WARN_WALL;
606 			break;
607 		case (1):
608 			*wflags |= WARN_WERR;
609 			break;
610 		default:
611 			warnx("bad argument: -W%s", o);
612 			return(0);
613 		}
614 	}
615 
616 	return(1);
617 }
618 
619 
620 /* ARGSUSED */
621 static int
622 merr(void *arg, int line, int col, const char *msg)
623 {
624 	struct curparse *curp;
625 
626 	curp = (struct curparse *)arg;
627 
628 	(void)fprintf(stderr, "%s:%d:%d: error: %s\n",
629 			curp->file, line, col + 1, msg);
630 
631 	return(0);
632 }
633 
634 
635 static int
636 mwarn(void *arg, int line, int col, const char *msg)
637 {
638 	struct curparse *curp;
639 
640 	curp = (struct curparse *)arg;
641 
642 	if ( ! (curp->wflags & WARN_WALL))
643 		return(1);
644 
645 	(void)fprintf(stderr, "%s:%d:%d: warning: %s\n",
646 			curp->file, line, col + 1, msg);
647 
648 	if ( ! (curp->wflags & WARN_WERR))
649 		return(1);
650 
651 	return(0);
652 }
653 
654