xref: /openbsd-src/usr.bin/mandoc/main.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /* $Id: main.c,v 1.1 2009/04/06 20:30:40 kristaps Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the
7  * above copyright notice and this permission notice appear in all
8  * copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11  * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12  * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13  * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14  * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15  * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16  * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17  * PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include <sys/stat.h>
20 
21 #include <assert.h>
22 #include <err.h>
23 #include <fcntl.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <unistd.h>
28 
29 #include "mdoc.h"
30 #include "man.h"
31 
32 typedef	int		(*out_mdoc)(void *, const struct mdoc *);
33 typedef	int		(*out_man)(void *, const struct man *);
34 typedef	void		(*out_free)(void *);
35 
36 struct	buf {
37 	char	 	 *buf;
38 	size_t		  sz;
39 };
40 
41 enum	intt {
42 	INTT_AUTO,
43 	INTT_MDOC,
44 	INTT_MAN
45 };
46 
47 enum	outt {
48 	OUTT_ASCII = 0,
49 	OUTT_TREE,
50 	OUTT_LINT
51 };
52 
53 struct	curparse {
54 	const char	 *file;		/* Current parse. */
55 	int		  fd;		/* Current parse. */
56 	int		  wflags;
57 #define	WARN_WALL	  0x03		/* All-warnings mask. */
58 #define	WARN_WCOMPAT	 (1 << 0)	/* Compatibility warnings. */
59 #define	WARN_WSYNTAX	 (1 << 1)	/* Syntax warnings. */
60 #define	WARN_WERR	 (1 << 2)	/* Warnings->errors. */
61 	int		  fflags;
62 #define	IGN_SCOPE	 (1 << 0) 	/* Ignore scope errors. */
63 #define	NO_IGN_ESCAPE	 (1 << 1) 	/* Don't ignore bad escapes. */
64 #define	NO_IGN_MACRO	 (1 << 2) 	/* Don't ignore bad macros. */
65 #define	NO_IGN_CHARS	 (1 << 3)	/* Don't ignore bad chars. */
66 	enum intt	  inttype;	/* Input parsers. */
67 	struct man	 *man;
68 	struct man	 *lastman;
69 	struct mdoc	 *mdoc;
70 	struct mdoc	 *lastmdoc;
71 	enum outt	  outtype;	/* Output devices. */
72 	out_mdoc	  outmdoc;
73 	out_man	  	  outman;
74 	out_free	  outfree;
75 	void		 *outdata;
76 };
77 
78 extern	void		 *ascii_alloc(void);
79 extern	int		  tree_mdoc(void *, const struct mdoc *);
80 extern	int		  tree_man(void *, const struct man *);
81 extern	int		  terminal_mdoc(void *, const struct mdoc *);
82 extern	int		  terminal_man(void *, const struct man *);
83 extern	void		  terminal_free(void *);
84 
85 static	int		  foptions(int *, char *);
86 static	int		  toptions(enum outt *, char *);
87 static	int		  moptions(enum intt *, char *);
88 static	int		  woptions(int *, char *);
89 static	int		  merr(void *, int, int, const char *);
90 static	int		  manwarn(void *, int, int, const char *);
91 static	int		  mdocwarn(void *, int, int,
92 				enum mdoc_warn, const char *);
93 static	int		  fstdin(struct buf *, struct buf *,
94 				struct curparse *);
95 static	int		  ffile(struct buf *, struct buf *,
96 				const char *, struct curparse *);
97 static	int		  fdesc(struct buf *, struct buf *,
98 				struct curparse *);
99 static	int		  pset(const char *, int, struct curparse *,
100 				struct man **, struct mdoc **);
101 static	struct man	 *man_init(struct curparse *);
102 static	struct mdoc	 *mdoc_init(struct curparse *);
103 __dead	static void	  usage(void);
104 
105 extern	char		 *__progname;
106 
107 
108 int
109 main(int argc, char *argv[])
110 {
111 	int		 c, rc;
112 	struct buf	 ln, blk;
113 	struct curparse	 curp;
114 
115 	bzero(&curp, sizeof(struct curparse));
116 
117 	curp.inttype = INTT_AUTO;
118 	curp.outtype = OUTT_ASCII;
119 
120 	/* LINTED */
121 	while (-1 != (c = getopt(argc, argv, "f:m:W:T:")))
122 		switch (c) {
123 		case ('f'):
124 			if ( ! foptions(&curp.fflags, optarg))
125 				return(0);
126 			break;
127 		case ('m'):
128 			if ( ! moptions(&curp.inttype, optarg))
129 				return(0);
130 			break;
131 		case ('T'):
132 			if ( ! toptions(&curp.outtype, optarg))
133 				return(0);
134 			break;
135 		case ('W'):
136 			if ( ! woptions(&curp.wflags, optarg))
137 				return(0);
138 			break;
139 		default:
140 			usage();
141 			/* NOTREACHED */
142 		}
143 
144 	argc -= optind;
145 	argv += optind;
146 
147 	/* Configure buffers. */
148 
149 	bzero(&ln, sizeof(struct buf));
150 	bzero(&blk, sizeof(struct buf));
151 
152 	rc = 1;
153 
154 	if (NULL == *argv)
155 		if ( ! fstdin(&blk, &ln, &curp))
156 			rc = 0;
157 
158 	while (rc && *argv) {
159 		if ( ! ffile(&blk, &ln, *argv, &curp))
160 			rc = 0;
161 		argv++;
162 		if (*argv && rc) {
163 			if (curp.lastman)
164 				if ( ! man_reset(curp.lastman))
165 					rc = 0;
166 			if (curp.lastmdoc)
167 				if ( ! mdoc_reset(curp.lastmdoc))
168 					rc = 0;
169 			curp.lastman = NULL;
170 			curp.lastmdoc = NULL;
171 		}
172 	}
173 
174 	if (blk.buf)
175 		free(blk.buf);
176 	if (ln.buf)
177 		free(ln.buf);
178 	if (curp.outfree)
179 		(*curp.outfree)(curp.outdata);
180 	if (curp.mdoc)
181 		mdoc_free(curp.mdoc);
182 	if (curp.man)
183 		man_free(curp.man);
184 
185 	return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
186 }
187 
188 
189 __dead static void
190 usage(void)
191 {
192 
193 	(void)fprintf(stderr, "usage: %s [-foption...] "
194 			"[-mformat] [-Toutput] [-Werr...]\n",
195 			__progname);
196 	exit(EXIT_FAILURE);
197 }
198 
199 
200 static struct man *
201 man_init(struct curparse *curp)
202 {
203 	int		 pflags;
204 	struct man	*man;
205 	struct man_cb	 mancb;
206 
207 	mancb.man_err = merr;
208 	mancb.man_warn = manwarn;
209 
210 	pflags = MAN_IGN_MACRO;
211 
212 	if (curp->fflags & NO_IGN_MACRO)
213 		pflags &= ~MAN_IGN_MACRO;
214 
215 	if (NULL == (man = man_alloc(curp, pflags, &mancb)))
216 		warnx("memory exhausted");
217 
218 	return(man);
219 }
220 
221 
222 static struct mdoc *
223 mdoc_init(struct curparse *curp)
224 {
225 	int		 pflags;
226 	struct mdoc	*mdoc;
227 	struct mdoc_cb	 mdoccb;
228 
229 	mdoccb.mdoc_msg = NULL;
230 	mdoccb.mdoc_err = merr;
231 	mdoccb.mdoc_warn = mdocwarn;
232 
233 	pflags = MDOC_IGN_MACRO | MDOC_IGN_ESCAPE | MDOC_IGN_CHARS;
234 
235 	if (curp->fflags & IGN_SCOPE)
236 		pflags |= MDOC_IGN_SCOPE;
237 	if (curp->fflags & NO_IGN_ESCAPE)
238 		pflags &= ~MDOC_IGN_ESCAPE;
239 	if (curp->fflags & NO_IGN_MACRO)
240 		pflags &= ~MDOC_IGN_MACRO;
241 	if (curp->fflags & NO_IGN_CHARS)
242 		pflags &= ~MDOC_IGN_CHARS;
243 
244 	if (NULL == (mdoc = mdoc_alloc(curp, pflags, &mdoccb)))
245 		warnx("memory exhausted");
246 
247 	return(mdoc);
248 }
249 
250 
251 static int
252 fstdin(struct buf *blk, struct buf *ln, struct curparse *curp)
253 {
254 
255 	curp->file = "<stdin>";
256 	curp->fd = STDIN_FILENO;
257 	return(fdesc(blk, ln, curp));
258 }
259 
260 
261 static int
262 ffile(struct buf *blk, struct buf *ln,
263 		const char *file, struct curparse *curp)
264 {
265 	int		 c;
266 
267 	curp->file = file;
268 	if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) {
269 		warn("%s", curp->file);
270 		return(0);
271 	}
272 
273 	c = fdesc(blk, ln, curp);
274 
275 	if (-1 == close(curp->fd))
276 		warn("%s", curp->file);
277 
278 	return(c);
279 }
280 
281 
282 static int
283 fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
284 {
285 	size_t		 sz;
286 	ssize_t		 ssz;
287 	struct stat	 st;
288 	int		 j, i, pos, lnn;
289 	struct man	*man;
290 	struct mdoc	*mdoc;
291 
292 	sz = BUFSIZ;
293 	man = NULL;
294 	mdoc = NULL;
295 
296 	/*
297 	 * Two buffers: ln and buf.  buf is the input buffer optimised
298 	 * here for each file's block size.  ln is a line buffer.  Both
299 	 * growable, hence passed in by ptr-ptr.
300 	 */
301 
302 	if (-1 == fstat(curp->fd, &st))
303 		warnx("%s", curp->file);
304 	else if ((size_t)st.st_blksize > sz)
305 		sz = st.st_blksize;
306 
307 	if (sz > blk->sz) {
308 		blk->buf = realloc(blk->buf, sz);
309 		if (NULL == blk->buf) {
310 			warn("realloc");
311 			return(0);
312 		}
313 		blk->sz = sz;
314 	}
315 
316 	/* Fill buf with file blocksize. */
317 
318 	for (lnn = 0, pos = 0; ; ) {
319 		if (-1 == (ssz = read(curp->fd, blk->buf, sz))) {
320 			warn("%s", curp->file);
321 			return(0);
322 		} else if (0 == ssz)
323 			break;
324 
325 		/* Parse the read block into partial or full lines. */
326 
327 		for (i = 0; i < (int)ssz; i++) {
328 			if (pos >= (int)ln->sz) {
329 				ln->sz += 256; /* Step-size. */
330 				ln->buf = realloc(ln->buf, ln->sz);
331 				if (NULL == ln->buf) {
332 					warn("realloc");
333 					return(0);
334 				}
335 			}
336 
337 			if ('\n' != blk->buf[i]) {
338 				ln->buf[pos++] = blk->buf[i];
339 				continue;
340 			}
341 
342 			/* Check for CPP-escaped newline.  */
343 
344 			if (pos > 0 && '\\' == ln->buf[pos - 1]) {
345 				for (j = pos - 1; j >= 0; j--)
346 					if ('\\' != ln->buf[j])
347 						break;
348 
349 				if ( ! ((pos - j) % 2)) {
350 					pos--;
351 					lnn++;
352 					continue;
353 				}
354 			}
355 
356 			ln->buf[pos] = 0;
357 			lnn++;
358 
359 			/*
360 			 * If no manual parser has been assigned, then
361 			 * try to assign one in pset(), which may do
362 			 * nothing at all.  After this, parse the manual
363 			 * line accordingly.
364 			 */
365 
366 			if ( ! (man || mdoc) && ! pset(ln->buf,
367 						pos, curp, &man, &mdoc))
368 				return(0);
369 
370 			pos = 0;
371 
372 			if (man && ! man_parseln(man, lnn, ln->buf))
373 				return(0);
374 			if (mdoc && ! mdoc_parseln(mdoc, lnn, ln->buf))
375 				return(0);
376 		}
377 	}
378 
379 	/* Note that a parser may not have been assigned, yet. */
380 
381 	if ( ! (man || mdoc)) {
382 		warnx("%s: not a manual", curp->file);
383 		return(0);
384 	}
385 
386 	if (mdoc && ! mdoc_endparse(mdoc))
387 		return(0);
388 	if (man && ! man_endparse(man))
389 		return(0);
390 
391 	/*
392 	 * If an output device hasn't been allocated, see if we should
393 	 * do so now.  Note that not all outtypes have functions, so
394 	 * this switch statement may be superfluous, but it's
395 	 * low-overhead enough not to matter very much.
396 	 */
397 
398 	if ( ! (curp->outman && curp->outmdoc)) {
399 		switch (curp->outtype) {
400 		case (OUTT_TREE):
401 			curp->outman = tree_man;
402 			curp->outmdoc = tree_mdoc;
403 			break;
404 		case (OUTT_LINT):
405 			break;
406 		default:
407 			curp->outdata = ascii_alloc();
408 			curp->outman = terminal_man;
409 			curp->outmdoc = terminal_mdoc;
410 			curp->outfree = terminal_free;
411 			break;
412 		}
413 	}
414 
415 	/* Execute the out device, if it exists. */
416 
417 	if (man && curp->outman)
418 		if ( ! (*curp->outman)(curp->outdata, man))
419 			return(0);
420 	if (mdoc && curp->outmdoc)
421 		if ( ! (*curp->outmdoc)(curp->outdata, mdoc))
422 			return(0);
423 
424 	return(1);
425 }
426 
427 
428 static int
429 pset(const char *buf, int pos, struct curparse *curp,
430 		struct man **man, struct mdoc **mdoc)
431 {
432 
433 	/*
434 	 * Try to intuit which kind of manual parser should be used.  If
435 	 * passed in by command-line (-man, -mdoc), then use that
436 	 * explicitly.  If passed as -mandoc, then try to guess from the
437 	 * line: either skip comments, use -mdoc when finding `.Dt', or
438 	 * default to -man, which is more lenient.
439 	 */
440 
441 	if (pos >= 3 && 0 == memcmp(buf, ".\\\"", 3))
442 		return(1);
443 
444 	switch (curp->inttype) {
445 	case (INTT_MDOC):
446 		if (NULL == curp->mdoc)
447 			curp->mdoc = mdoc_init(curp);
448 		if (NULL == (*mdoc = curp->mdoc))
449 			return(0);
450 		curp->lastmdoc = *mdoc;
451 		return(1);
452 	case (INTT_MAN):
453 		if (NULL == curp->man)
454 			curp->man = man_init(curp);
455 		if (NULL == (*man = curp->man))
456 			return(0);
457 		curp->lastman = *man;
458 		return(1);
459 	default:
460 		break;
461 	}
462 
463 	if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3))  {
464 		if (NULL == curp->mdoc)
465 			curp->mdoc = mdoc_init(curp);
466 		if (NULL == (*mdoc = curp->mdoc))
467 			return(0);
468 		curp->lastmdoc = *mdoc;
469 		return(1);
470 	}
471 
472 	if (NULL == curp->man)
473 		curp->man = man_init(curp);
474 	if (NULL == (*man = curp->man))
475 		return(0);
476 	curp->lastman = *man;
477 	return(1);
478 }
479 
480 
481 static int
482 moptions(enum intt *tflags, char *arg)
483 {
484 
485 	if (0 == strcmp(arg, "doc"))
486 		*tflags = INTT_MDOC;
487 	else if (0 == strcmp(arg, "andoc"))
488 		*tflags = INTT_AUTO;
489 	else if (0 == strcmp(arg, "an"))
490 		*tflags = INTT_MAN;
491 	else {
492 		warnx("bad argument: -m%s", arg);
493 		return(0);
494 	}
495 
496 	return(1);
497 }
498 
499 
500 static int
501 toptions(enum outt *tflags, char *arg)
502 {
503 
504 	if (0 == strcmp(arg, "ascii"))
505 		*tflags = OUTT_ASCII;
506 	else if (0 == strcmp(arg, "lint"))
507 		*tflags = OUTT_LINT;
508 	else if (0 == strcmp(arg, "tree"))
509 		*tflags = OUTT_TREE;
510 	else {
511 		warnx("bad argument: -T%s", arg);
512 		return(0);
513 	}
514 
515 	return(1);
516 }
517 
518 
519 /*
520  * Parse out the options for [-fopt...] setting compiler options.  These
521  * can be comma-delimited or called again.
522  */
523 static int
524 foptions(int *fflags, char *arg)
525 {
526 	char		*v;
527 	char		*toks[6];
528 
529 	toks[0] = "ign-scope";
530 	toks[1] = "no-ign-escape";
531 	toks[2] = "no-ign-macro";
532 	toks[3] = "no-ign-chars";
533 	toks[4] = "strict";
534 	toks[5] = NULL;
535 
536 	while (*arg)
537 		switch (getsubopt(&arg, toks, &v)) {
538 		case (0):
539 			*fflags |= IGN_SCOPE;
540 			break;
541 		case (1):
542 			*fflags |= NO_IGN_ESCAPE;
543 			break;
544 		case (2):
545 			*fflags |= NO_IGN_MACRO;
546 			break;
547 		case (3):
548 			*fflags |= NO_IGN_CHARS;
549 			break;
550 		case (4):
551 			*fflags |= NO_IGN_ESCAPE |
552 			 	   NO_IGN_MACRO | NO_IGN_CHARS;
553 			break;
554 		default:
555 			warnx("bad argument: -f%s", arg);
556 			return(0);
557 		}
558 
559 	return(1);
560 }
561 
562 
563 /*
564  * Parse out the options for [-Werr...], which sets warning modes.
565  * These can be comma-delimited or called again.
566  */
567 static int
568 woptions(int *wflags, char *arg)
569 {
570 	char		*v;
571 	char		*toks[5];
572 
573 	toks[0] = "all";
574 	toks[1] = "compat";
575 	toks[2] = "syntax";
576 	toks[3] = "error";
577 	toks[4] = NULL;
578 
579 	while (*arg)
580 		switch (getsubopt(&arg, toks, &v)) {
581 		case (0):
582 			*wflags |= WARN_WALL;
583 			break;
584 		case (1):
585 			*wflags |= WARN_WCOMPAT;
586 			break;
587 		case (2):
588 			*wflags |= WARN_WSYNTAX;
589 			break;
590 		case (3):
591 			*wflags |= WARN_WERR;
592 			break;
593 		default:
594 			warnx("bad argument: -W%s", arg);
595 			return(0);
596 		}
597 
598 	return(1);
599 }
600 
601 
602 /* ARGSUSED */
603 static int
604 merr(void *arg, int line, int col, const char *msg)
605 {
606 	struct curparse *curp;
607 
608 	curp = (struct curparse *)arg;
609 
610 	warnx("%s:%d: error: %s (column %d)",
611 			curp->file, line, msg, col);
612 	return(0);
613 }
614 
615 
616 static int
617 mdocwarn(void *arg, int line, int col,
618 		enum mdoc_warn type, const char *msg)
619 {
620 	struct curparse *curp;
621 	char		*wtype;
622 
623 	curp = (struct curparse *)arg;
624 	wtype = NULL;
625 
626 	switch (type) {
627 	case (WARN_COMPAT):
628 		wtype = "compat";
629 		if (curp->wflags & WARN_WCOMPAT)
630 			break;
631 		return(1);
632 	case (WARN_SYNTAX):
633 		wtype = "syntax";
634 		if (curp->wflags & WARN_WSYNTAX)
635 			break;
636 		return(1);
637 	}
638 
639 	assert(wtype);
640 	warnx("%s:%d: %s warning: %s (column %d)",
641 			curp->file, line, wtype, msg, col);
642 
643 	if ( ! (curp->wflags & WARN_WERR))
644 		return(1);
645 
646 	warnx("%s: considering warnings as errors",
647 			__progname);
648 	return(0);
649 }
650 
651 
652 static int
653 manwarn(void *arg, int line, int col, const char *msg)
654 {
655 	struct curparse *curp;
656 
657 	curp = (struct curparse *)arg;
658 
659 	if ( ! (curp->wflags & WARN_WSYNTAX))
660 		return(1);
661 
662 	warnx("%s:%d: syntax warning: %s (column %d)",
663 			curp->file, line, msg, col);
664 
665 	if ( ! (curp->wflags & WARN_WERR))
666 		return(1);
667 
668 	warnx("%s: considering warnings as errors",
669 			__progname);
670 	return(0);
671 }
672