xref: /openbsd-src/usr.bin/mandoc/main.c (revision cd1eb269cafb12c415be1749cd4a4b5422710415)
1 /*	$Id: main.c,v 1.23 2010/04/13 06:52:12 jmc Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <sys/stat.h>
18 
19 #include <assert.h>
20 #include <fcntl.h>
21 #include <stdio.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26 
27 #include "mdoc.h"
28 #include "man.h"
29 #include "main.h"
30 
31 #define	UNCONST(a)	((void *)(uintptr_t)(const void *)(a))
32 
33 typedef	void		(*out_mdoc)(void *, const struct mdoc *);
34 typedef	void		(*out_man)(void *, const struct man *);
35 typedef	void		(*out_free)(void *);
36 
37 struct	buf {
38 	char	 	 *buf;
39 	size_t		  sz;
40 };
41 
42 enum	intt {
43 	INTT_AUTO,
44 	INTT_MDOC,
45 	INTT_MAN
46 };
47 
48 enum	outt {
49 	OUTT_ASCII = 0,
50 	OUTT_TREE,
51 	OUTT_HTML,
52 	OUTT_XHTML,
53 	OUTT_LINT
54 };
55 
56 struct	curparse {
57 	const char	 *file;		/* Current parse. */
58 	int		  fd;		/* Current parse. */
59 	int		  wflags;
60 #define	WARN_WALL	 (1 << 0)	/* All-warnings mask. */
61 #define	WARN_WERR	 (1 << 2)	/* Warnings->errors. */
62 	int		  fflags;
63 #define	FL_IGN_SCOPE	 (1 << 0) 	/* Ignore scope errors. */
64 #define	FL_NIGN_ESCAPE	 (1 << 1) 	/* Don't ignore bad escapes. */
65 #define	FL_NIGN_MACRO	 (1 << 2) 	/* Don't ignore bad macros. */
66 #define	FL_NIGN_CHARS	 (1 << 3)	/* Don't ignore bad chars. */
67 #define	FL_IGN_ERRORS	 (1 << 4)	/* Ignore failed parse. */
68 	enum intt	  inttype;	/* Input parsers... */
69 	struct man	 *man;
70 	struct man	 *lastman;
71 	struct mdoc	 *mdoc;
72 	struct mdoc	 *lastmdoc;
73 	enum outt	  outtype;	/* Output devices... */
74 	out_mdoc	  outmdoc;
75 	out_man	  	  outman;
76 	out_free	  outfree;
77 	void		 *outdata;
78 	char		  outopts[BUFSIZ];
79 };
80 
81 #define	FL_STRICT	  FL_NIGN_ESCAPE | \
82 			  FL_NIGN_MACRO | \
83  			  FL_NIGN_CHARS
84 
85 static	int		  foptions(int *, char *);
86 static	int		  toptions(struct curparse *, char *);
87 static	int		  moptions(enum intt *, char *);
88 static	int		  woptions(int *, char *);
89 static	int		  merr(void *, int, int, const char *);
90 static	int		  mwarn(void *, int, int, const char *);
91 static	int		  ffile(struct buf *, struct buf *,
92 				const char *, struct curparse *);
93 static	int		  fdesc(struct buf *, struct buf *,
94 				struct curparse *);
95 static	int		  pset(const char *, int, struct curparse *,
96 				struct man **, struct mdoc **);
97 static	struct man	 *man_init(struct curparse *);
98 static	struct mdoc	 *mdoc_init(struct curparse *);
99 static	void		  version(void) __attribute__((noreturn));
100 static	void		  usage(void) __attribute__((noreturn));
101 
102 static	const char	 *progname;
103 
104 
105 int
106 main(int argc, char *argv[])
107 {
108 	int		 c, rc;
109 	struct buf	 ln, blk;
110 	struct curparse	 curp;
111 
112 	progname = strrchr(argv[0], '/');
113 	if (progname == NULL)
114 		progname = argv[0];
115 	else
116 		++progname;
117 
118 	memset(&curp, 0, sizeof(struct curparse));
119 
120 	curp.inttype = INTT_AUTO;
121 	curp.outtype = OUTT_ASCII;
122 
123 	/* LINTED */
124 	while (-1 != (c = getopt(argc, argv, "f:m:O:T:VW:")))
125 		switch (c) {
126 		case ('f'):
127 			if ( ! foptions(&curp.fflags, optarg))
128 				return(EXIT_FAILURE);
129 			break;
130 		case ('m'):
131 			if ( ! moptions(&curp.inttype, optarg))
132 				return(EXIT_FAILURE);
133 			break;
134 		case ('O'):
135 			(void)strlcat(curp.outopts, optarg, BUFSIZ);
136 			(void)strlcat(curp.outopts, ",", BUFSIZ);
137 			break;
138 		case ('T'):
139 			if ( ! toptions(&curp, optarg))
140 				return(EXIT_FAILURE);
141 			break;
142 		case ('W'):
143 			if ( ! woptions(&curp.wflags, optarg))
144 				return(EXIT_FAILURE);
145 			break;
146 		case ('V'):
147 			version();
148 			/* NOTREACHED */
149 		default:
150 			usage();
151 			/* NOTREACHED */
152 		}
153 
154 	argc -= optind;
155 	argv += optind;
156 
157 	memset(&ln, 0, sizeof(struct buf));
158 	memset(&blk, 0, sizeof(struct buf));
159 
160 	rc = 1;
161 
162 	if (NULL == *argv) {
163 		curp.file = "<stdin>";
164 		curp.fd = STDIN_FILENO;
165 
166 		c = fdesc(&blk, &ln, &curp);
167 		if ( ! (FL_IGN_ERRORS & curp.fflags))
168 			rc = 1 == c ? 1 : 0;
169 		else
170 			rc = -1 == c ? 0 : 1;
171 	}
172 
173 	while (rc && *argv) {
174 		c = ffile(&blk, &ln, *argv, &curp);
175 		if ( ! (FL_IGN_ERRORS & curp.fflags))
176 			rc = 1 == c ? 1 : 0;
177 		else
178 			rc = -1 == c ? 0 : 1;
179 
180 		argv++;
181 		if (*argv && rc) {
182 			if (curp.lastman)
183 				man_reset(curp.lastman);
184 			if (curp.lastmdoc)
185 				mdoc_reset(curp.lastmdoc);
186 			curp.lastman = NULL;
187 			curp.lastmdoc = NULL;
188 		}
189 	}
190 
191 	if (blk.buf)
192 		free(blk.buf);
193 	if (ln.buf)
194 		free(ln.buf);
195 	if (curp.outfree)
196 		(*curp.outfree)(curp.outdata);
197 	if (curp.mdoc)
198 		mdoc_free(curp.mdoc);
199 	if (curp.man)
200 		man_free(curp.man);
201 
202 	return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
203 }
204 
205 
206 static void
207 version(void)
208 {
209 
210 	(void)printf("%s %s\n", progname, VERSION);
211 	exit(EXIT_SUCCESS);
212 }
213 
214 
215 static void
216 usage(void)
217 {
218 
219 	(void)fprintf(stderr, "usage: %s [-V] [-foption] "
220 			"[-mformat] [-Ooption] [-Toutput] "
221 			"[-Werr] [file...]\n", progname);
222 	exit(EXIT_FAILURE);
223 }
224 
225 
226 static struct man *
227 man_init(struct curparse *curp)
228 {
229 	int		 pflags;
230 	struct man_cb	 mancb;
231 
232 	mancb.man_err = merr;
233 	mancb.man_warn = mwarn;
234 
235 	/* Defaults from mandoc.1. */
236 
237 	pflags = MAN_IGN_MACRO | MAN_IGN_ESCAPE | MAN_IGN_CHARS;
238 
239 	if (curp->fflags & FL_NIGN_MACRO)
240 		pflags &= ~MAN_IGN_MACRO;
241 	if (curp->fflags & FL_NIGN_CHARS)
242 		pflags &= ~MAN_IGN_CHARS;
243 	if (curp->fflags & FL_NIGN_ESCAPE)
244 		pflags &= ~MAN_IGN_ESCAPE;
245 
246 	return(man_alloc(curp, pflags, &mancb));
247 }
248 
249 
250 static struct mdoc *
251 mdoc_init(struct curparse *curp)
252 {
253 	int		 pflags;
254 	struct mdoc_cb	 mdoccb;
255 
256 	mdoccb.mdoc_err = merr;
257 	mdoccb.mdoc_warn = mwarn;
258 
259 	/* Defaults from mandoc.1. */
260 
261 	pflags = MDOC_IGN_MACRO | MDOC_IGN_ESCAPE | MDOC_IGN_CHARS;
262 
263 	if (curp->fflags & FL_IGN_SCOPE)
264 		pflags |= MDOC_IGN_SCOPE;
265 	if (curp->fflags & FL_NIGN_ESCAPE)
266 		pflags &= ~MDOC_IGN_ESCAPE;
267 	if (curp->fflags & FL_NIGN_MACRO)
268 		pflags &= ~MDOC_IGN_MACRO;
269 	if (curp->fflags & FL_NIGN_CHARS)
270 		pflags &= ~MDOC_IGN_CHARS;
271 
272 	return(mdoc_alloc(curp, pflags, &mdoccb));
273 }
274 
275 
276 static int
277 ffile(struct buf *blk, struct buf *ln,
278 		const char *file, struct curparse *curp)
279 {
280 	int		 c;
281 
282 	curp->file = file;
283 	if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) {
284 		perror(curp->file);
285 		return(-1);
286 	}
287 
288 	c = fdesc(blk, ln, curp);
289 
290 	if (-1 == close(curp->fd))
291 		perror(curp->file);
292 
293 	return(c);
294 }
295 
296 
297 static int
298 fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
299 {
300 	size_t		 sz;
301 	ssize_t		 ssz;
302 	struct stat	 st;
303 	int		 j, i, pos, lnn, comment;
304 	struct man	*man;
305 	struct mdoc	*mdoc;
306 
307 	sz = BUFSIZ;
308 	man = NULL;
309 	mdoc = NULL;
310 
311 	/*
312 	 * Two buffers: ln and buf.  buf is the input buffer optimised
313 	 * here for each file's block size.  ln is a line buffer.  Both
314 	 * growable, hence passed in by ptr-ptr.
315 	 */
316 
317 	if (-1 == fstat(curp->fd, &st))
318 		perror(curp->file);
319 	else if ((size_t)st.st_blksize > sz)
320 		sz = st.st_blksize;
321 
322 	if (sz > blk->sz) {
323 		blk->buf = realloc(blk->buf, sz);
324 		if (NULL == blk->buf) {
325 			perror(NULL);
326 			exit(EXIT_FAILURE);
327 		}
328 		blk->sz = sz;
329 	}
330 
331 	/* Fill buf with file blocksize. */
332 
333 	for (lnn = pos = comment = 0; ; ) {
334 		if (-1 == (ssz = read(curp->fd, blk->buf, sz))) {
335 			perror(curp->file);
336 			return(-1);
337 		} else if (0 == ssz)
338 			break;
339 
340 		/* Parse the read block into partial or full lines. */
341 
342 		for (i = 0; i < (int)ssz; i++) {
343 			if (pos >= (int)ln->sz) {
344 				ln->sz += 256; /* Step-size. */
345 				ln->buf = realloc(ln->buf, ln->sz);
346 				if (NULL == ln->buf) {
347 					perror(NULL);
348 					return(EXIT_FAILURE);
349 				}
350 			}
351 
352 			if ('\n' != blk->buf[i]) {
353 				if (comment)
354 					continue;
355 				ln->buf[pos++] = blk->buf[i];
356 
357 				/* Handle in-line `\"' comments. */
358 
359 				if (1 == pos || '\"' != ln->buf[pos - 1])
360 					continue;
361 
362 				for (j = pos - 2; j >= 0; j--)
363 					if ('\\' != ln->buf[j])
364 						break;
365 
366 				if ( ! ((pos - 2 - j) % 2))
367 					continue;
368 
369 				comment = 1;
370 				pos -= 2;
371 				continue;
372 			}
373 
374 			/* Handle escaped `\\n' newlines. */
375 
376 			if (pos > 0 && 0 == comment &&
377 					'\\' == ln->buf[pos - 1]) {
378 				for (j = pos - 1; j >= 0; j--)
379 					if ('\\' != ln->buf[j])
380 						break;
381 				if ( ! ((pos - j) % 2)) {
382 					pos--;
383 					lnn++;
384 					continue;
385 				}
386 			}
387 
388 			ln->buf[pos] = 0;
389 			lnn++;
390 
391 			/* If unset, assign parser in pset(). */
392 
393 			if ( ! (man || mdoc) && ! pset(ln->buf,
394 						pos, curp, &man, &mdoc))
395 				return(-1);
396 
397 			pos = comment = 0;
398 
399 			/* Pass down into parsers. */
400 
401 			if (man && ! man_parseln(man, lnn, ln->buf))
402 				return(0);
403 			if (mdoc && ! mdoc_parseln(mdoc, lnn, ln->buf))
404 				return(0);
405 		}
406 	}
407 
408 	/* NOTE a parser may not have been assigned, yet. */
409 
410 	if ( ! (man || mdoc)) {
411 		fprintf(stderr, "%s: Not a manual\n", curp->file);
412 		return(0);
413 	}
414 
415 	if (mdoc && ! mdoc_endparse(mdoc))
416 		return(0);
417 	if (man && ! man_endparse(man))
418 		return(0);
419 
420 	/* If unset, allocate output dev now (if applicable). */
421 
422 	if ( ! (curp->outman && curp->outmdoc)) {
423 		switch (curp->outtype) {
424 		case (OUTT_XHTML):
425 			curp->outdata = xhtml_alloc(curp->outopts);
426 			curp->outman = html_man;
427 			curp->outmdoc = html_mdoc;
428 			curp->outfree = html_free;
429 			break;
430 		case (OUTT_HTML):
431 			curp->outdata = html_alloc(curp->outopts);
432 			curp->outman = html_man;
433 			curp->outmdoc = html_mdoc;
434 			curp->outfree = html_free;
435 			break;
436 		case (OUTT_TREE):
437 			curp->outman = tree_man;
438 			curp->outmdoc = tree_mdoc;
439 			break;
440 		case (OUTT_LINT):
441 			break;
442 		default:
443 			curp->outdata = ascii_alloc();
444 			curp->outman = terminal_man;
445 			curp->outmdoc = terminal_mdoc;
446 			curp->outfree = terminal_free;
447 			break;
448 		}
449 	}
450 
451 	/* Execute the out device, if it exists. */
452 
453 	if (man && curp->outman)
454 		(*curp->outman)(curp->outdata, man);
455 	if (mdoc && curp->outmdoc)
456 		(*curp->outmdoc)(curp->outdata, mdoc);
457 
458 	return(1);
459 }
460 
461 
462 static int
463 pset(const char *buf, int pos, struct curparse *curp,
464 		struct man **man, struct mdoc **mdoc)
465 {
466 	int		 i;
467 
468 	/*
469 	 * Try to intuit which kind of manual parser should be used.  If
470 	 * passed in by command-line (-man, -mdoc), then use that
471 	 * explicitly.  If passed as -mandoc, then try to guess from the
472 	 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
473 	 * default to -man, which is more lenient.
474 	 */
475 
476 	if (buf[0] == '.') {
477 		for (i = 1; buf[i]; i++)
478 			if (' ' != buf[i] && '\t' != buf[i])
479 				break;
480 		if (0 == buf[i])
481 			return(1);
482 	}
483 
484 	switch (curp->inttype) {
485 	case (INTT_MDOC):
486 		if (NULL == curp->mdoc)
487 			curp->mdoc = mdoc_init(curp);
488 		if (NULL == (*mdoc = curp->mdoc))
489 			return(0);
490 		curp->lastmdoc = *mdoc;
491 		return(1);
492 	case (INTT_MAN):
493 		if (NULL == curp->man)
494 			curp->man = man_init(curp);
495 		if (NULL == (*man = curp->man))
496 			return(0);
497 		curp->lastman = *man;
498 		return(1);
499 	default:
500 		break;
501 	}
502 
503 	if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3))  {
504 		if (NULL == curp->mdoc)
505 			curp->mdoc = mdoc_init(curp);
506 		if (NULL == (*mdoc = curp->mdoc))
507 			return(0);
508 		curp->lastmdoc = *mdoc;
509 		return(1);
510 	}
511 
512 	if (NULL == curp->man)
513 		curp->man = man_init(curp);
514 	if (NULL == (*man = curp->man))
515 		return(0);
516 	curp->lastman = *man;
517 	return(1);
518 }
519 
520 
521 static int
522 moptions(enum intt *tflags, char *arg)
523 {
524 
525 	if (0 == strcmp(arg, "doc"))
526 		*tflags = INTT_MDOC;
527 	else if (0 == strcmp(arg, "andoc"))
528 		*tflags = INTT_AUTO;
529 	else if (0 == strcmp(arg, "an"))
530 		*tflags = INTT_MAN;
531 	else {
532 		fprintf(stderr, "%s: Bad argument\n", arg);
533 		return(0);
534 	}
535 
536 	return(1);
537 }
538 
539 
540 static int
541 toptions(struct curparse *curp, char *arg)
542 {
543 
544 	if (0 == strcmp(arg, "ascii"))
545 		curp->outtype = OUTT_ASCII;
546 	else if (0 == strcmp(arg, "lint")) {
547 		curp->outtype = OUTT_LINT;
548 		curp->wflags |= WARN_WALL;
549 		curp->fflags |= FL_STRICT;
550 	}
551 	else if (0 == strcmp(arg, "tree"))
552 		curp->outtype = OUTT_TREE;
553 	else if (0 == strcmp(arg, "html"))
554 		curp->outtype = OUTT_HTML;
555 	else if (0 == strcmp(arg, "xhtml"))
556 		curp->outtype = OUTT_XHTML;
557 	else {
558 		fprintf(stderr, "%s: Bad argument\n", arg);
559 		return(0);
560 	}
561 
562 	return(1);
563 }
564 
565 
566 static int
567 foptions(int *fflags, char *arg)
568 {
569 	char		*v, *o;
570 	const char	*toks[8];
571 
572 	toks[0] = "ign-scope";
573 	toks[1] = "no-ign-escape";
574 	toks[2] = "no-ign-macro";
575 	toks[3] = "no-ign-chars";
576 	toks[4] = "ign-errors";
577 	toks[5] = "strict";
578 	toks[6] = "ign-escape";
579 	toks[7] = NULL;
580 
581 	while (*arg) {
582 		o = arg;
583 		switch (getsubopt(&arg, UNCONST(toks), &v)) {
584 		case (0):
585 			*fflags |= FL_IGN_SCOPE;
586 			break;
587 		case (1):
588 			*fflags |= FL_NIGN_ESCAPE;
589 			break;
590 		case (2):
591 			*fflags |= FL_NIGN_MACRO;
592 			break;
593 		case (3):
594 			*fflags |= FL_NIGN_CHARS;
595 			break;
596 		case (4):
597 			*fflags |= FL_IGN_ERRORS;
598 			break;
599 		case (5):
600 			*fflags |= FL_STRICT;
601 			break;
602 		case (6):
603 			*fflags &= ~FL_NIGN_ESCAPE;
604 			break;
605 		default:
606 			fprintf(stderr, "%s: Bad argument\n", o);
607 			return(0);
608 		}
609 	}
610 
611 	return(1);
612 }
613 
614 
615 static int
616 woptions(int *wflags, char *arg)
617 {
618 	char		*v, *o;
619 	const char	*toks[3];
620 
621 	toks[0] = "all";
622 	toks[1] = "error";
623 	toks[2] = NULL;
624 
625 	while (*arg) {
626 		o = arg;
627 		switch (getsubopt(&arg, UNCONST(toks), &v)) {
628 		case (0):
629 			*wflags |= WARN_WALL;
630 			break;
631 		case (1):
632 			*wflags |= WARN_WERR;
633 			break;
634 		default:
635 			fprintf(stderr, "%s: Bad argument\n", o);
636 			return(0);
637 		}
638 	}
639 
640 	return(1);
641 }
642 
643 
644 /* ARGSUSED */
645 static int
646 merr(void *arg, int line, int col, const char *msg)
647 {
648 	struct curparse *curp;
649 
650 	curp = (struct curparse *)arg;
651 
652 	(void)fprintf(stderr, "%s:%d:%d: error: %s\n",
653 			curp->file, line, col + 1, msg);
654 
655 	return(0);
656 }
657 
658 
659 static int
660 mwarn(void *arg, int line, int col, const char *msg)
661 {
662 	struct curparse *curp;
663 
664 	curp = (struct curparse *)arg;
665 
666 	if ( ! (curp->wflags & WARN_WALL))
667 		return(1);
668 
669 	(void)fprintf(stderr, "%s:%d:%d: warning: %s\n",
670 			curp->file, line, col + 1, msg);
671 
672 	if ( ! (curp->wflags & WARN_WERR))
673 		return(1);
674 
675 	return(0);
676 }
677 
678