xref: /netbsd-src/external/bsd/mdocml/dist/main.c (revision 3816d47b2c42fcd6e549e3407f842a5b1a1d23ad)
1 /*	$Vendor-Id: main.c,v 1.57 2009/11/02 08:29:25 kristaps Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <sys/stat.h>
18 
19 #include <assert.h>
20 #include <fcntl.h>
21 #include <stdio.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26 
27 #include "mdoc.h"
28 #include "man.h"
29 #include "main.h"
30 
31 #define	UNCONST(a)	((void *)(uintptr_t)(const void *)(a))
32 
33 /* FIXME: Intel's compiler?  LLVM?  pcc?  */
34 
35 #if !defined(__GNUC__) || (__GNUC__ < 2)
36 # if !defined(lint)
37 #  define __attribute__(x)
38 # endif
39 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
40 
41 #ifdef __linux__
42 extern	int		  getsubopt(char **, char * const *, char **);
43 extern	size_t	  	  strlcat(char *, const char *, size_t);
44 #endif
45 
46 typedef	void		(*out_mdoc)(void *, const struct mdoc *);
47 typedef	void		(*out_man)(void *, const struct man *);
48 typedef	void		(*out_free)(void *);
49 
50 struct	buf {
51 	char	 	 *buf;
52 	size_t		  sz;
53 };
54 
55 enum	intt {
56 	INTT_AUTO,
57 	INTT_MDOC,
58 	INTT_MAN
59 };
60 
61 enum	outt {
62 	OUTT_ASCII = 0,
63 	OUTT_TREE,
64 	OUTT_HTML,
65 	OUTT_LINT
66 };
67 
68 struct	curparse {
69 	const char	 *file;		/* Current parse. */
70 	int		  fd;		/* Current parse. */
71 	int		  wflags;
72 #define	WARN_WALL	 (1 << 0)	/* All-warnings mask. */
73 #define	WARN_WERR	 (1 << 2)	/* Warnings->errors. */
74 	int		  fflags;
75 #define	IGN_SCOPE	 (1 << 0) 	/* Ignore scope errors. */
76 #define	NO_IGN_ESCAPE	 (1 << 1) 	/* Don't ignore bad escapes. */
77 #define	NO_IGN_MACRO	 (1 << 2) 	/* Don't ignore bad macros. */
78 #define	NO_IGN_CHARS	 (1 << 3)	/* Don't ignore bad chars. */
79 #define	IGN_ERRORS	 (1 << 4)	/* Ignore failed parse. */
80 	enum intt	  inttype;	/* Input parsers... */
81 	struct man	 *man;
82 	struct man	 *lastman;
83 	struct mdoc	 *mdoc;
84 	struct mdoc	 *lastmdoc;
85 	enum outt	  outtype;	/* Output devices... */
86 	out_mdoc	  outmdoc;
87 	out_man	  	  outman;
88 	out_free	  outfree;
89 	void		 *outdata;
90 	char		  outopts[BUFSIZ];
91 };
92 
93 static	int		  foptions(int *, char *);
94 static	int		  toptions(enum outt *, char *);
95 static	int		  moptions(enum intt *, char *);
96 static	int		  woptions(int *, char *);
97 static	int		  merr(void *, int, int, const char *);
98 static	int		  mwarn(void *, int, int, const char *);
99 static	int		  ffile(struct buf *, struct buf *,
100 				const char *, struct curparse *);
101 static	int		  fdesc(struct buf *, struct buf *,
102 				struct curparse *);
103 static	int		  pset(const char *, int, struct curparse *,
104 				struct man **, struct mdoc **);
105 static	struct man	 *man_init(struct curparse *);
106 static	struct mdoc	 *mdoc_init(struct curparse *);
107 static	void		  version(void) __attribute__((noreturn));
108 static	void		  usage(void) __attribute__((noreturn));
109 
110 static	const char	 *progname;
111 
112 
113 int
114 main(int argc, char *argv[])
115 {
116 	int		 c, rc;
117 	struct buf	 ln, blk;
118 	struct curparse	 curp;
119 
120 	progname = strrchr(argv[0], '/');
121 	if (progname == NULL)
122 		progname = argv[0];
123 	else
124 		++progname;
125 
126 	memset(&curp, 0, sizeof(struct curparse));
127 
128 	curp.inttype = INTT_AUTO;
129 	curp.outtype = OUTT_ASCII;
130 
131 	/* LINTED */
132 	while (-1 != (c = getopt(argc, argv, "f:m:O:T:VW:")))
133 		switch (c) {
134 		case ('f'):
135 			if ( ! foptions(&curp.fflags, optarg))
136 				return(EXIT_FAILURE);
137 			break;
138 		case ('m'):
139 			if ( ! moptions(&curp.inttype, optarg))
140 				return(EXIT_FAILURE);
141 			break;
142 		case ('O'):
143 			(void)strlcat(curp.outopts, optarg, BUFSIZ);
144 			(void)strlcat(curp.outopts, ",", BUFSIZ);
145 			break;
146 		case ('T'):
147 			if ( ! toptions(&curp.outtype, optarg))
148 				return(EXIT_FAILURE);
149 			break;
150 		case ('W'):
151 			if ( ! woptions(&curp.wflags, optarg))
152 				return(EXIT_FAILURE);
153 			break;
154 		case ('V'):
155 			version();
156 			/* NOTREACHED */
157 		default:
158 			usage();
159 			/* NOTREACHED */
160 		}
161 
162 	argc -= optind;
163 	argv += optind;
164 
165 	memset(&ln, 0, sizeof(struct buf));
166 	memset(&blk, 0, sizeof(struct buf));
167 
168 	rc = 1;
169 
170 	if (NULL == *argv) {
171 		curp.file = "<stdin>";
172 		curp.fd = STDIN_FILENO;
173 
174 		c = fdesc(&blk, &ln, &curp);
175 		if ( ! (IGN_ERRORS & curp.fflags))
176 			rc = 1 == c ? 1 : 0;
177 		else
178 			rc = -1 == c ? 0 : 1;
179 	}
180 
181 	while (rc && *argv) {
182 		c = ffile(&blk, &ln, *argv, &curp);
183 		if ( ! (IGN_ERRORS & curp.fflags))
184 			rc = 1 == c ? 1 : 0;
185 		else
186 			rc = -1 == c ? 0 : 1;
187 
188 		argv++;
189 		if (*argv && rc) {
190 			if (curp.lastman)
191 				man_reset(curp.lastman);
192 			if (curp.lastmdoc)
193 				mdoc_reset(curp.lastmdoc);
194 			curp.lastman = NULL;
195 			curp.lastmdoc = NULL;
196 		}
197 	}
198 
199 	if (blk.buf)
200 		free(blk.buf);
201 	if (ln.buf)
202 		free(ln.buf);
203 	if (curp.outfree)
204 		(*curp.outfree)(curp.outdata);
205 	if (curp.mdoc)
206 		mdoc_free(curp.mdoc);
207 	if (curp.man)
208 		man_free(curp.man);
209 
210 	return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
211 }
212 
213 
214 static void
215 version(void)
216 {
217 
218 	(void)printf("%s %s\n", progname, VERSION);
219 	exit(EXIT_SUCCESS);
220 }
221 
222 
223 static void
224 usage(void)
225 {
226 
227 	(void)fprintf(stderr, "usage: %s [-V] [-foption...] "
228 			"[-mformat] [-Ooption] [-Toutput] "
229 			"[-Werr...]\n", progname);
230 	exit(EXIT_FAILURE);
231 }
232 
233 
234 static struct man *
235 man_init(struct curparse *curp)
236 {
237 	int		 pflags;
238 	struct man_cb	 mancb;
239 
240 	mancb.man_err = merr;
241 	mancb.man_warn = mwarn;
242 
243 	/* Defaults from mandoc.1. */
244 
245 	pflags = MAN_IGN_MACRO | MAN_IGN_ESCAPE | MAN_IGN_CHARS;
246 
247 	if (curp->fflags & NO_IGN_MACRO)
248 		pflags &= ~MAN_IGN_MACRO;
249 	if (curp->fflags & NO_IGN_CHARS)
250 		pflags &= ~MAN_IGN_CHARS;
251 	if (curp->fflags & NO_IGN_ESCAPE)
252 		pflags &= ~MAN_IGN_ESCAPE;
253 
254 	return(man_alloc(curp, pflags, &mancb));
255 }
256 
257 
258 static struct mdoc *
259 mdoc_init(struct curparse *curp)
260 {
261 	int		 pflags;
262 	struct mdoc_cb	 mdoccb;
263 
264 	mdoccb.mdoc_err = merr;
265 	mdoccb.mdoc_warn = mwarn;
266 
267 	/* Defaults from mandoc.1. */
268 
269 	pflags = MDOC_IGN_MACRO | MDOC_IGN_ESCAPE | MDOC_IGN_CHARS;
270 
271 	if (curp->fflags & IGN_SCOPE)
272 		pflags |= MDOC_IGN_SCOPE;
273 	if (curp->fflags & NO_IGN_ESCAPE)
274 		pflags &= ~MDOC_IGN_ESCAPE;
275 	if (curp->fflags & NO_IGN_MACRO)
276 		pflags &= ~MDOC_IGN_MACRO;
277 	if (curp->fflags & NO_IGN_CHARS)
278 		pflags &= ~MDOC_IGN_CHARS;
279 
280 	return(mdoc_alloc(curp, pflags, &mdoccb));
281 }
282 
283 
284 static int
285 ffile(struct buf *blk, struct buf *ln,
286 		const char *file, struct curparse *curp)
287 {
288 	int		 c;
289 
290 	curp->file = file;
291 	if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) {
292 		perror(curp->file);
293 		return(-1);
294 	}
295 
296 	c = fdesc(blk, ln, curp);
297 
298 	if (-1 == close(curp->fd))
299 		perror(curp->file);
300 
301 	return(c);
302 }
303 
304 
305 static int
306 fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
307 {
308 	size_t		 sz;
309 	ssize_t		 ssz;
310 	struct stat	 st;
311 	int		 j, i, pos, lnn, comment;
312 	struct man	*man;
313 	struct mdoc	*mdoc;
314 
315 	sz = BUFSIZ;
316 	man = NULL;
317 	mdoc = NULL;
318 
319 	/*
320 	 * Two buffers: ln and buf.  buf is the input buffer optimised
321 	 * here for each file's block size.  ln is a line buffer.  Both
322 	 * growable, hence passed in by ptr-ptr.
323 	 */
324 
325 	if (-1 == fstat(curp->fd, &st))
326 		perror(curp->file);
327 	else if ((size_t)st.st_blksize > sz)
328 		sz = st.st_blksize;
329 
330 	if (sz > blk->sz) {
331 		blk->buf = realloc(blk->buf, sz);
332 		if (NULL == blk->buf) {
333 			perror(NULL);
334 			exit(EXIT_FAILURE);
335 		}
336 		blk->sz = sz;
337 	}
338 
339 	/* Fill buf with file blocksize. */
340 
341 	for (lnn = pos = comment = 0; ; ) {
342 		if (-1 == (ssz = read(curp->fd, blk->buf, sz))) {
343 			perror(curp->file);
344 			return(-1);
345 		} else if (0 == ssz)
346 			break;
347 
348 		/* Parse the read block into partial or full lines. */
349 
350 		for (i = 0; i < (int)ssz; i++) {
351 			if (pos >= (int)ln->sz) {
352 				ln->sz += 256; /* Step-size. */
353 				ln->buf = realloc(ln->buf, ln->sz);
354 				if (NULL == ln->buf) {
355 					perror(NULL);
356 					return(EXIT_FAILURE);
357 				}
358 			}
359 
360 			if ('\n' != blk->buf[i]) {
361 				if (comment)
362 					continue;
363 				ln->buf[pos++] = blk->buf[i];
364 
365 				/* Handle in-line `\"' comments. */
366 
367 				if (1 == pos || '\"' != ln->buf[pos - 1])
368 					continue;
369 
370 				for (j = pos - 2; j >= 0; j--)
371 					if ('\\' != ln->buf[j])
372 						break;
373 
374 				if ( ! ((pos - 2 - j) % 2))
375 					continue;
376 
377 				comment = 1;
378 				pos -= 2;
379 				continue;
380 			}
381 
382 			/* Handle escaped `\\n' newlines. */
383 
384 			if (pos > 0 && 0 == comment &&
385 					'\\' == ln->buf[pos - 1]) {
386 				for (j = pos - 1; j >= 0; j--)
387 					if ('\\' != ln->buf[j])
388 						break;
389 				if ( ! ((pos - j) % 2)) {
390 					pos--;
391 					lnn++;
392 					continue;
393 				}
394 			}
395 
396 			ln->buf[pos] = 0;
397 			lnn++;
398 
399 			/* If unset, assign parser in pset(). */
400 
401 			if ( ! (man || mdoc) && ! pset(ln->buf,
402 						pos, curp, &man, &mdoc))
403 				return(-1);
404 
405 			pos = comment = 0;
406 
407 			/* Pass down into parsers. */
408 
409 			if (man && ! man_parseln(man, lnn, ln->buf))
410 				return(0);
411 			if (mdoc && ! mdoc_parseln(mdoc, lnn, ln->buf))
412 				return(0);
413 		}
414 	}
415 
416 	/* NOTE a parser may not have been assigned, yet. */
417 
418 	if ( ! (man || mdoc)) {
419 		fprintf(stderr, "%s: Not a manual\n", curp->file);
420 		return(0);
421 	}
422 
423 	if (mdoc && ! mdoc_endparse(mdoc))
424 		return(0);
425 	if (man && ! man_endparse(man))
426 		return(0);
427 
428 	/* If unset, allocate output dev now (if applicable). */
429 
430 	if ( ! (curp->outman && curp->outmdoc)) {
431 		switch (curp->outtype) {
432 		case (OUTT_HTML):
433 			curp->outdata = html_alloc(curp->outopts);
434 			curp->outman = html_man;
435 			curp->outmdoc = html_mdoc;
436 			curp->outfree = html_free;
437 			break;
438 		case (OUTT_TREE):
439 			curp->outman = tree_man;
440 			curp->outmdoc = tree_mdoc;
441 			break;
442 		case (OUTT_LINT):
443 			break;
444 		default:
445 			curp->outdata = ascii_alloc();
446 			curp->outman = terminal_man;
447 			curp->outmdoc = terminal_mdoc;
448 			curp->outfree = terminal_free;
449 			break;
450 		}
451 	}
452 
453 	/* Execute the out device, if it exists. */
454 
455 	if (man && curp->outman)
456 		(*curp->outman)(curp->outdata, man);
457 	if (mdoc && curp->outmdoc)
458 		(*curp->outmdoc)(curp->outdata, mdoc);
459 
460 	return(1);
461 }
462 
463 
464 static int
465 pset(const char *buf, int pos, struct curparse *curp,
466 		struct man **man, struct mdoc **mdoc)
467 {
468 	int		 i;
469 
470 	/*
471 	 * Try to intuit which kind of manual parser should be used.  If
472 	 * passed in by command-line (-man, -mdoc), then use that
473 	 * explicitly.  If passed as -mandoc, then try to guess from the
474 	 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
475 	 * default to -man, which is more lenient.
476 	 */
477 
478 	if (buf[0] == '.') {
479 		for (i = 1; buf[i]; i++)
480 			if (' ' != buf[i] && '\t' != buf[i])
481 				break;
482 		if (0 == buf[i])
483 			return(1);
484 	}
485 
486 	switch (curp->inttype) {
487 	case (INTT_MDOC):
488 		if (NULL == curp->mdoc)
489 			curp->mdoc = mdoc_init(curp);
490 		if (NULL == (*mdoc = curp->mdoc))
491 			return(0);
492 		curp->lastmdoc = *mdoc;
493 		return(1);
494 	case (INTT_MAN):
495 		if (NULL == curp->man)
496 			curp->man = man_init(curp);
497 		if (NULL == (*man = curp->man))
498 			return(0);
499 		curp->lastman = *man;
500 		return(1);
501 	default:
502 		break;
503 	}
504 
505 	if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3))  {
506 		if (NULL == curp->mdoc)
507 			curp->mdoc = mdoc_init(curp);
508 		if (NULL == (*mdoc = curp->mdoc))
509 			return(0);
510 		curp->lastmdoc = *mdoc;
511 		return(1);
512 	}
513 
514 	if (NULL == curp->man)
515 		curp->man = man_init(curp);
516 	if (NULL == (*man = curp->man))
517 		return(0);
518 	curp->lastman = *man;
519 	return(1);
520 }
521 
522 
523 static int
524 moptions(enum intt *tflags, char *arg)
525 {
526 
527 	if (0 == strcmp(arg, "doc"))
528 		*tflags = INTT_MDOC;
529 	else if (0 == strcmp(arg, "andoc"))
530 		*tflags = INTT_AUTO;
531 	else if (0 == strcmp(arg, "an"))
532 		*tflags = INTT_MAN;
533 	else {
534 		fprintf(stderr, "%s: Bad argument\n", arg);
535 		return(0);
536 	}
537 
538 	return(1);
539 }
540 
541 
542 static int
543 toptions(enum outt *tflags, char *arg)
544 {
545 
546 	if (0 == strcmp(arg, "ascii"))
547 		*tflags = OUTT_ASCII;
548 	else if (0 == strcmp(arg, "lint"))
549 		*tflags = OUTT_LINT;
550 	else if (0 == strcmp(arg, "tree"))
551 		*tflags = OUTT_TREE;
552 	else if (0 == strcmp(arg, "html"))
553 		*tflags = OUTT_HTML;
554 	else {
555 		fprintf(stderr, "%s: Bad argument\n", arg);
556 		return(0);
557 	}
558 
559 	return(1);
560 }
561 
562 
563 static int
564 foptions(int *fflags, char *arg)
565 {
566 	char		*v, *o;
567 	const char	*toks[8];
568 
569 	toks[0] = "ign-scope";
570 	toks[1] = "no-ign-escape";
571 	toks[2] = "no-ign-macro";
572 	toks[3] = "no-ign-chars";
573 	toks[4] = "ign-errors";
574 	toks[5] = "strict";
575 	toks[6] = "ign-escape";
576 	toks[7] = NULL;
577 
578 	while (*arg) {
579 		o = arg;
580 		switch (getsubopt(&arg, UNCONST(toks), &v)) {
581 		case (0):
582 			*fflags |= IGN_SCOPE;
583 			break;
584 		case (1):
585 			*fflags |= NO_IGN_ESCAPE;
586 			break;
587 		case (2):
588 			*fflags |= NO_IGN_MACRO;
589 			break;
590 		case (3):
591 			*fflags |= NO_IGN_CHARS;
592 			break;
593 		case (4):
594 			*fflags |= IGN_ERRORS;
595 			break;
596 		case (5):
597 			*fflags |= NO_IGN_ESCAPE |
598 			 	   NO_IGN_MACRO | NO_IGN_CHARS;
599 			break;
600 		case (6):
601 			*fflags &= ~NO_IGN_ESCAPE;
602 			break;
603 		default:
604 			fprintf(stderr, "%s: Bad argument\n", o);
605 			return(0);
606 		}
607 	}
608 
609 	return(1);
610 }
611 
612 
613 static int
614 woptions(int *wflags, char *arg)
615 {
616 	char		*v, *o;
617 	const char	*toks[3];
618 
619 	toks[0] = "all";
620 	toks[1] = "error";
621 	toks[2] = NULL;
622 
623 	while (*arg) {
624 		o = arg;
625 		switch (getsubopt(&arg, UNCONST(toks), &v)) {
626 		case (0):
627 			*wflags |= WARN_WALL;
628 			break;
629 		case (1):
630 			*wflags |= WARN_WERR;
631 			break;
632 		default:
633 			fprintf(stderr, "%s: Bad argument\n", o);
634 			return(0);
635 		}
636 	}
637 
638 	return(1);
639 }
640 
641 
642 /* ARGSUSED */
643 static int
644 merr(void *arg, int line, int col, const char *msg)
645 {
646 	struct curparse *curp;
647 
648 	curp = (struct curparse *)arg;
649 
650 	(void)fprintf(stderr, "%s:%d:%d: error: %s\n",
651 			curp->file, line, col + 1, msg);
652 
653 	return(0);
654 }
655 
656 
657 static int
658 mwarn(void *arg, int line, int col, const char *msg)
659 {
660 	struct curparse *curp;
661 
662 	curp = (struct curparse *)arg;
663 
664 	if ( ! (curp->wflags & WARN_WALL))
665 		return(1);
666 
667 	(void)fprintf(stderr, "%s:%d:%d: warning: %s\n",
668 			curp->file, line, col + 1, msg);
669 
670 	if ( ! (curp->wflags & WARN_WERR))
671 		return(1);
672 
673 	return(0);
674 }
675 
676