xref: /netbsd-src/external/bsd/mdocml/dist/main.c (revision bbde328be4e75ea9ad02e9715ea13ca54b797ada)
1 /*	$Vendor-Id: main.c,v 1.60 2010/03/22 20:43:00 kristaps Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include <sys/stat.h>
22 
23 #include <assert.h>
24 #include <fcntl.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "mdoc.h"
32 #include "man.h"
33 #include "main.h"
34 
35 #define	UNCONST(a)	((void *)(uintptr_t)(const void *)(a))
36 
37 /* FIXME: Intel's compiler?  LLVM?  pcc?  */
38 
39 #if !defined(__GNUC__) || (__GNUC__ < 2)
40 # if !defined(lint)
41 #  define __attribute__(x)
42 # endif
43 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
44 
45 typedef	void		(*out_mdoc)(void *, const struct mdoc *);
46 typedef	void		(*out_man)(void *, const struct man *);
47 typedef	void		(*out_free)(void *);
48 
49 struct	buf {
50 	char	 	 *buf;
51 	size_t		  sz;
52 };
53 
54 enum	intt {
55 	INTT_AUTO,
56 	INTT_MDOC,
57 	INTT_MAN
58 };
59 
60 enum	outt {
61 	OUTT_ASCII = 0,
62 	OUTT_TREE,
63 	OUTT_HTML,
64 	OUTT_XHTML,
65 	OUTT_LINT
66 };
67 
68 struct	curparse {
69 	const char	 *file;		/* Current parse. */
70 	int		  fd;		/* Current parse. */
71 	int		  wflags;
72 #define	WARN_WALL	 (1 << 0)	/* All-warnings mask. */
73 #define	WARN_WERR	 (1 << 2)	/* Warnings->errors. */
74 	int		  fflags;
75 #define	FL_IGN_SCOPE	 (1 << 0) 	/* Ignore scope errors. */
76 #define	FL_NIGN_ESCAPE	 (1 << 1) 	/* Don't ignore bad escapes. */
77 #define	FL_NIGN_MACRO	 (1 << 2) 	/* Don't ignore bad macros. */
78 #define	FL_NIGN_CHARS	 (1 << 3)	/* Don't ignore bad chars. */
79 #define	FL_IGN_ERRORS	 (1 << 4)	/* Ignore failed parse. */
80 	enum intt	  inttype;	/* Input parsers... */
81 	struct man	 *man;
82 	struct man	 *lastman;
83 	struct mdoc	 *mdoc;
84 	struct mdoc	 *lastmdoc;
85 	enum outt	  outtype;	/* Output devices... */
86 	out_mdoc	  outmdoc;
87 	out_man	  	  outman;
88 	out_free	  outfree;
89 	void		 *outdata;
90 	char		  outopts[BUFSIZ];
91 };
92 
93 #define	FL_STRICT	  FL_NIGN_ESCAPE | \
94 			  FL_NIGN_MACRO | \
95  			  FL_NIGN_CHARS
96 
97 static	int		  foptions(int *, char *);
98 static	int		  toptions(struct curparse *, char *);
99 static	int		  moptions(enum intt *, char *);
100 static	int		  woptions(int *, char *);
101 static	int		  merr(void *, int, int, const char *);
102 static	int		  mwarn(void *, int, int, const char *);
103 static	int		  ffile(struct buf *, struct buf *,
104 				const char *, struct curparse *);
105 static	int		  fdesc(struct buf *, struct buf *,
106 				struct curparse *);
107 static	int		  pset(const char *, int, struct curparse *,
108 				struct man **, struct mdoc **);
109 static	struct man	 *man_init(struct curparse *);
110 static	struct mdoc	 *mdoc_init(struct curparse *);
111 static	void		  version(void) __attribute__((noreturn));
112 static	void		  usage(void) __attribute__((noreturn));
113 
114 static	const char	 *progname;
115 
116 
117 int
118 main(int argc, char *argv[])
119 {
120 	int		 c, rc;
121 	struct buf	 ln, blk;
122 	struct curparse	 curp;
123 
124 	progname = strrchr(argv[0], '/');
125 	if (progname == NULL)
126 		progname = argv[0];
127 	else
128 		++progname;
129 
130 	memset(&curp, 0, sizeof(struct curparse));
131 
132 	curp.inttype = INTT_AUTO;
133 	curp.outtype = OUTT_ASCII;
134 
135 	/* LINTED */
136 	while (-1 != (c = getopt(argc, argv, "f:m:O:T:VW:")))
137 		switch (c) {
138 		case ('f'):
139 			if ( ! foptions(&curp.fflags, optarg))
140 				return(EXIT_FAILURE);
141 			break;
142 		case ('m'):
143 			if ( ! moptions(&curp.inttype, optarg))
144 				return(EXIT_FAILURE);
145 			break;
146 		case ('O'):
147 			(void)strlcat(curp.outopts, optarg, BUFSIZ);
148 			(void)strlcat(curp.outopts, ",", BUFSIZ);
149 			break;
150 		case ('T'):
151 			if ( ! toptions(&curp, optarg))
152 				return(EXIT_FAILURE);
153 			break;
154 		case ('W'):
155 			if ( ! woptions(&curp.wflags, optarg))
156 				return(EXIT_FAILURE);
157 			break;
158 		case ('V'):
159 			version();
160 			/* NOTREACHED */
161 		default:
162 			usage();
163 			/* NOTREACHED */
164 		}
165 
166 	argc -= optind;
167 	argv += optind;
168 
169 	memset(&ln, 0, sizeof(struct buf));
170 	memset(&blk, 0, sizeof(struct buf));
171 
172 	rc = 1;
173 
174 	if (NULL == *argv) {
175 		curp.file = "<stdin>";
176 		curp.fd = STDIN_FILENO;
177 
178 		c = fdesc(&blk, &ln, &curp);
179 		if ( ! (FL_IGN_ERRORS & curp.fflags))
180 			rc = 1 == c ? 1 : 0;
181 		else
182 			rc = -1 == c ? 0 : 1;
183 	}
184 
185 	while (rc && *argv) {
186 		c = ffile(&blk, &ln, *argv, &curp);
187 		if ( ! (FL_IGN_ERRORS & curp.fflags))
188 			rc = 1 == c ? 1 : 0;
189 		else
190 			rc = -1 == c ? 0 : 1;
191 
192 		argv++;
193 		if (*argv && rc) {
194 			if (curp.lastman)
195 				man_reset(curp.lastman);
196 			if (curp.lastmdoc)
197 				mdoc_reset(curp.lastmdoc);
198 			curp.lastman = NULL;
199 			curp.lastmdoc = NULL;
200 		}
201 	}
202 
203 	if (blk.buf)
204 		free(blk.buf);
205 	if (ln.buf)
206 		free(ln.buf);
207 	if (curp.outfree)
208 		(*curp.outfree)(curp.outdata);
209 	if (curp.mdoc)
210 		mdoc_free(curp.mdoc);
211 	if (curp.man)
212 		man_free(curp.man);
213 
214 	return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
215 }
216 
217 
218 static void
219 version(void)
220 {
221 
222 	(void)printf("%s %s\n", progname, VERSION);
223 	exit(EXIT_SUCCESS);
224 }
225 
226 
227 static void
228 usage(void)
229 {
230 
231 	(void)fprintf(stderr, "usage: %s [-V] [-foption...] "
232 			"[-mformat] [-Ooption] [-Toutput] "
233 			"[-Werr...]\n", progname);
234 	exit(EXIT_FAILURE);
235 }
236 
237 
238 static struct man *
239 man_init(struct curparse *curp)
240 {
241 	int		 pflags;
242 	struct man_cb	 mancb;
243 
244 	mancb.man_err = merr;
245 	mancb.man_warn = mwarn;
246 
247 	/* Defaults from mandoc.1. */
248 
249 	pflags = MAN_IGN_MACRO | MAN_IGN_ESCAPE | MAN_IGN_CHARS;
250 
251 	if (curp->fflags & FL_NIGN_MACRO)
252 		pflags &= ~MAN_IGN_MACRO;
253 	if (curp->fflags & FL_NIGN_CHARS)
254 		pflags &= ~MAN_IGN_CHARS;
255 	if (curp->fflags & FL_NIGN_ESCAPE)
256 		pflags &= ~MAN_IGN_ESCAPE;
257 
258 	return(man_alloc(curp, pflags, &mancb));
259 }
260 
261 
262 static struct mdoc *
263 mdoc_init(struct curparse *curp)
264 {
265 	int		 pflags;
266 	struct mdoc_cb	 mdoccb;
267 
268 	mdoccb.mdoc_err = merr;
269 	mdoccb.mdoc_warn = mwarn;
270 
271 	/* Defaults from mandoc.1. */
272 
273 	pflags = MDOC_IGN_MACRO | MDOC_IGN_ESCAPE | MDOC_IGN_CHARS;
274 
275 	if (curp->fflags & FL_IGN_SCOPE)
276 		pflags |= MDOC_IGN_SCOPE;
277 	if (curp->fflags & FL_NIGN_ESCAPE)
278 		pflags &= ~MDOC_IGN_ESCAPE;
279 	if (curp->fflags & FL_NIGN_MACRO)
280 		pflags &= ~MDOC_IGN_MACRO;
281 	if (curp->fflags & FL_NIGN_CHARS)
282 		pflags &= ~MDOC_IGN_CHARS;
283 
284 	return(mdoc_alloc(curp, pflags, &mdoccb));
285 }
286 
287 
288 static int
289 ffile(struct buf *blk, struct buf *ln,
290 		const char *file, struct curparse *curp)
291 {
292 	int		 c;
293 
294 	curp->file = file;
295 	if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) {
296 		perror(curp->file);
297 		return(-1);
298 	}
299 
300 	c = fdesc(blk, ln, curp);
301 
302 	if (-1 == close(curp->fd))
303 		perror(curp->file);
304 
305 	return(c);
306 }
307 
308 
309 static int
310 fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
311 {
312 	size_t		 sz;
313 	ssize_t		 ssz;
314 	struct stat	 st;
315 	int		 j, i, pos, lnn, comment;
316 	struct man	*man;
317 	struct mdoc	*mdoc;
318 
319 	sz = BUFSIZ;
320 	man = NULL;
321 	mdoc = NULL;
322 
323 	/*
324 	 * Two buffers: ln and buf.  buf is the input buffer optimised
325 	 * here for each file's block size.  ln is a line buffer.  Both
326 	 * growable, hence passed in by ptr-ptr.
327 	 */
328 
329 	if (-1 == fstat(curp->fd, &st))
330 		perror(curp->file);
331 	else if ((size_t)st.st_blksize > sz)
332 		sz = st.st_blksize;
333 
334 	if (sz > blk->sz) {
335 		blk->buf = realloc(blk->buf, sz);
336 		if (NULL == blk->buf) {
337 			perror(NULL);
338 			exit(EXIT_FAILURE);
339 		}
340 		blk->sz = sz;
341 	}
342 
343 	/* Fill buf with file blocksize. */
344 
345 	for (lnn = pos = comment = 0; ; ) {
346 		if (-1 == (ssz = read(curp->fd, blk->buf, sz))) {
347 			perror(curp->file);
348 			return(-1);
349 		} else if (0 == ssz)
350 			break;
351 
352 		/* Parse the read block into partial or full lines. */
353 
354 		for (i = 0; i < (int)ssz; i++) {
355 			if (pos >= (int)ln->sz) {
356 				ln->sz += 256; /* Step-size. */
357 				ln->buf = realloc(ln->buf, ln->sz);
358 				if (NULL == ln->buf) {
359 					perror(NULL);
360 					return(EXIT_FAILURE);
361 				}
362 			}
363 
364 			if ('\n' != blk->buf[i]) {
365 				if (comment)
366 					continue;
367 				ln->buf[pos++] = blk->buf[i];
368 
369 				/* Handle in-line `\"' comments. */
370 
371 				if (1 == pos || '\"' != ln->buf[pos - 1])
372 					continue;
373 
374 				for (j = pos - 2; j >= 0; j--)
375 					if ('\\' != ln->buf[j])
376 						break;
377 
378 				if ( ! ((pos - 2 - j) % 2))
379 					continue;
380 
381 				comment = 1;
382 				pos -= 2;
383 				continue;
384 			}
385 
386 			/* Handle escaped `\\n' newlines. */
387 
388 			if (pos > 0 && 0 == comment &&
389 					'\\' == ln->buf[pos - 1]) {
390 				for (j = pos - 1; j >= 0; j--)
391 					if ('\\' != ln->buf[j])
392 						break;
393 				if ( ! ((pos - j) % 2)) {
394 					pos--;
395 					lnn++;
396 					continue;
397 				}
398 			}
399 
400 			ln->buf[pos] = 0;
401 			lnn++;
402 
403 			/* If unset, assign parser in pset(). */
404 
405 			if ( ! (man || mdoc) && ! pset(ln->buf,
406 						pos, curp, &man, &mdoc))
407 				return(-1);
408 
409 			pos = comment = 0;
410 
411 			/* Pass down into parsers. */
412 
413 			if (man && ! man_parseln(man, lnn, ln->buf))
414 				return(0);
415 			if (mdoc && ! mdoc_parseln(mdoc, lnn, ln->buf))
416 				return(0);
417 		}
418 	}
419 
420 	/* NOTE a parser may not have been assigned, yet. */
421 
422 	if ( ! (man || mdoc)) {
423 		fprintf(stderr, "%s: Not a manual\n", curp->file);
424 		return(0);
425 	}
426 
427 	if (mdoc && ! mdoc_endparse(mdoc))
428 		return(0);
429 	if (man && ! man_endparse(man))
430 		return(0);
431 
432 	/* If unset, allocate output dev now (if applicable). */
433 
434 	if ( ! (curp->outman && curp->outmdoc)) {
435 		switch (curp->outtype) {
436 		case (OUTT_XHTML):
437 			curp->outdata = xhtml_alloc(curp->outopts);
438 			curp->outman = html_man;
439 			curp->outmdoc = html_mdoc;
440 			curp->outfree = html_free;
441 			break;
442 		case (OUTT_HTML):
443 			curp->outdata = html_alloc(curp->outopts);
444 			curp->outman = html_man;
445 			curp->outmdoc = html_mdoc;
446 			curp->outfree = html_free;
447 			break;
448 		case (OUTT_TREE):
449 			curp->outman = tree_man;
450 			curp->outmdoc = tree_mdoc;
451 			break;
452 		case (OUTT_LINT):
453 			break;
454 		default:
455 			curp->outdata = ascii_alloc();
456 			curp->outman = terminal_man;
457 			curp->outmdoc = terminal_mdoc;
458 			curp->outfree = terminal_free;
459 			break;
460 		}
461 	}
462 
463 	/* Execute the out device, if it exists. */
464 
465 	if (man && curp->outman)
466 		(*curp->outman)(curp->outdata, man);
467 	if (mdoc && curp->outmdoc)
468 		(*curp->outmdoc)(curp->outdata, mdoc);
469 
470 	return(1);
471 }
472 
473 
474 static int
475 pset(const char *buf, int pos, struct curparse *curp,
476 		struct man **man, struct mdoc **mdoc)
477 {
478 	int		 i;
479 
480 	/*
481 	 * Try to intuit which kind of manual parser should be used.  If
482 	 * passed in by command-line (-man, -mdoc), then use that
483 	 * explicitly.  If passed as -mandoc, then try to guess from the
484 	 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
485 	 * default to -man, which is more lenient.
486 	 */
487 
488 	if (buf[0] == '.') {
489 		for (i = 1; buf[i]; i++)
490 			if (' ' != buf[i] && '\t' != buf[i])
491 				break;
492 		if (0 == buf[i])
493 			return(1);
494 	}
495 
496 	switch (curp->inttype) {
497 	case (INTT_MDOC):
498 		if (NULL == curp->mdoc)
499 			curp->mdoc = mdoc_init(curp);
500 		if (NULL == (*mdoc = curp->mdoc))
501 			return(0);
502 		curp->lastmdoc = *mdoc;
503 		return(1);
504 	case (INTT_MAN):
505 		if (NULL == curp->man)
506 			curp->man = man_init(curp);
507 		if (NULL == (*man = curp->man))
508 			return(0);
509 		curp->lastman = *man;
510 		return(1);
511 	default:
512 		break;
513 	}
514 
515 	if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3))  {
516 		if (NULL == curp->mdoc)
517 			curp->mdoc = mdoc_init(curp);
518 		if (NULL == (*mdoc = curp->mdoc))
519 			return(0);
520 		curp->lastmdoc = *mdoc;
521 		return(1);
522 	}
523 
524 	if (NULL == curp->man)
525 		curp->man = man_init(curp);
526 	if (NULL == (*man = curp->man))
527 		return(0);
528 	curp->lastman = *man;
529 	return(1);
530 }
531 
532 
533 static int
534 moptions(enum intt *tflags, char *arg)
535 {
536 
537 	if (0 == strcmp(arg, "doc"))
538 		*tflags = INTT_MDOC;
539 	else if (0 == strcmp(arg, "andoc"))
540 		*tflags = INTT_AUTO;
541 	else if (0 == strcmp(arg, "an"))
542 		*tflags = INTT_MAN;
543 	else {
544 		fprintf(stderr, "%s: Bad argument\n", arg);
545 		return(0);
546 	}
547 
548 	return(1);
549 }
550 
551 
552 static int
553 toptions(struct curparse *curp, char *arg)
554 {
555 
556 	if (0 == strcmp(arg, "ascii"))
557 		curp->outtype = OUTT_ASCII;
558 	else if (0 == strcmp(arg, "lint")) {
559 		curp->outtype = OUTT_LINT;
560 		curp->wflags |= WARN_WALL;
561 		curp->fflags |= FL_STRICT;
562 	}
563 	else if (0 == strcmp(arg, "tree"))
564 		curp->outtype = OUTT_TREE;
565 	else if (0 == strcmp(arg, "html"))
566 		curp->outtype = OUTT_HTML;
567 	else if (0 == strcmp(arg, "xhtml"))
568 		curp->outtype = OUTT_XHTML;
569 	else {
570 		fprintf(stderr, "%s: Bad argument\n", arg);
571 		return(0);
572 	}
573 
574 	return(1);
575 }
576 
577 
578 static int
579 foptions(int *fflags, char *arg)
580 {
581 	char		*v, *o;
582 	const char	*toks[8];
583 
584 	toks[0] = "ign-scope";
585 	toks[1] = "no-ign-escape";
586 	toks[2] = "no-ign-macro";
587 	toks[3] = "no-ign-chars";
588 	toks[4] = "ign-errors";
589 	toks[5] = "strict";
590 	toks[6] = "ign-escape";
591 	toks[7] = NULL;
592 
593 	while (*arg) {
594 		o = arg;
595 		switch (getsubopt(&arg, UNCONST(toks), &v)) {
596 		case (0):
597 			*fflags |= FL_IGN_SCOPE;
598 			break;
599 		case (1):
600 			*fflags |= FL_NIGN_ESCAPE;
601 			break;
602 		case (2):
603 			*fflags |= FL_NIGN_MACRO;
604 			break;
605 		case (3):
606 			*fflags |= FL_NIGN_CHARS;
607 			break;
608 		case (4):
609 			*fflags |= FL_IGN_ERRORS;
610 			break;
611 		case (5):
612 			*fflags |= FL_STRICT;
613 			break;
614 		case (6):
615 			*fflags &= ~FL_NIGN_ESCAPE;
616 			break;
617 		default:
618 			fprintf(stderr, "%s: Bad argument\n", o);
619 			return(0);
620 		}
621 	}
622 
623 	return(1);
624 }
625 
626 
627 static int
628 woptions(int *wflags, char *arg)
629 {
630 	char		*v, *o;
631 	const char	*toks[3];
632 
633 	toks[0] = "all";
634 	toks[1] = "error";
635 	toks[2] = NULL;
636 
637 	while (*arg) {
638 		o = arg;
639 		switch (getsubopt(&arg, UNCONST(toks), &v)) {
640 		case (0):
641 			*wflags |= WARN_WALL;
642 			break;
643 		case (1):
644 			*wflags |= WARN_WERR;
645 			break;
646 		default:
647 			fprintf(stderr, "%s: Bad argument\n", o);
648 			return(0);
649 		}
650 	}
651 
652 	return(1);
653 }
654 
655 
656 /* ARGSUSED */
657 static int
658 merr(void *arg, int line, int col, const char *msg)
659 {
660 	struct curparse *curp;
661 
662 	curp = (struct curparse *)arg;
663 
664 	(void)fprintf(stderr, "%s:%d:%d: error: %s\n",
665 			curp->file, line, col + 1, msg);
666 
667 	return(0);
668 }
669 
670 
671 static int
672 mwarn(void *arg, int line, int col, const char *msg)
673 {
674 	struct curparse *curp;
675 
676 	curp = (struct curparse *)arg;
677 
678 	if ( ! (curp->wflags & WARN_WALL))
679 		return(1);
680 
681 	(void)fprintf(stderr, "%s:%d:%d: warning: %s\n",
682 			curp->file, line, col + 1, msg);
683 
684 	if ( ! (curp->wflags & WARN_WERR))
685 		return(1);
686 
687 	return(0);
688 }
689 
690