xref: /dflybsd-src/contrib/mdocml/main.c (revision 16e9ff28733d8bd9941b9770d79be966ba221f5f)
1 /*	$Id: main.c,v 1.135 2011/01/04 15:02:00 kristaps Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <sys/mman.h>
23 #include <sys/stat.h>
24 
25 #include <assert.h>
26 #include <ctype.h>
27 #include <fcntl.h>
28 #include <stdio.h>
29 #include <stdint.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33 
34 #include "mandoc.h"
35 #include "main.h"
36 #include "mdoc.h"
37 #include "man.h"
38 #include "roff.h"
39 
40 #ifndef MAP_FILE
41 #define	MAP_FILE	0
42 #endif
43 
44 #define	REPARSE_LIMIT	1000
45 #define	UNCONST(a)	((void *)(uintptr_t)(const void *)(a))
46 
47 /* FIXME: Intel's compiler?  LLVM?  pcc?  */
48 
49 #if !defined(__GNUC__) || (__GNUC__ < 2)
50 # if !defined(lint)
51 #  define __attribute__(x)
52 # endif
53 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
54 
55 typedef	void		(*out_mdoc)(void *, const struct mdoc *);
56 typedef	void		(*out_man)(void *, const struct man *);
57 typedef	void		(*out_free)(void *);
58 
59 struct	buf {
60 	char	 	 *buf;
61 	size_t		  sz;
62 };
63 
64 enum	intt {
65 	INTT_AUTO,
66 	INTT_MDOC,
67 	INTT_MAN
68 };
69 
70 enum	outt {
71 	OUTT_ASCII = 0,
72 	OUTT_TREE,
73 	OUTT_HTML,
74 	OUTT_XHTML,
75 	OUTT_LINT,
76 	OUTT_PS,
77 	OUTT_PDF
78 };
79 
80 struct	curparse {
81 	const char	 *file;		/* Current parse. */
82 	int		  fd;		/* Current parse. */
83 	int		  line;		/* Line number in the file. */
84 	enum mandoclevel  wlevel;	/* Ignore messages below this. */
85 	int		  wstop;	/* Stop after a file with a warning. */
86 	enum intt	  inttype;	/* which parser to use */
87 	struct man	 *pman;		/* persistent man parser */
88 	struct mdoc	 *pmdoc;	/* persistent mdoc parser */
89 	struct man	 *man;		/* man parser */
90 	struct mdoc	 *mdoc;		/* mdoc parser */
91 	struct roff	 *roff;		/* roff parser (!NULL) */
92 	struct regset	  regs;		/* roff registers */
93 	int		  reparse_count; /* finite interpolation stack */
94 	enum outt	  outtype; 	/* which output to use */
95 	out_mdoc	  outmdoc;	/* mdoc output ptr */
96 	out_man	  	  outman;	/* man output ptr */
97 	out_free	  outfree;	/* free output ptr */
98 	void		 *outdata;	/* data for output */
99 	char		  outopts[BUFSIZ]; /* buf of output opts */
100 };
101 
102 static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
103 	"SUCCESS",
104 	"RESERVED",
105 	"WARNING",
106 	"ERROR",
107 	"FATAL",
108 	"BADARG",
109 	"SYSERR"
110 };
111 
112 static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
113 	MANDOCERR_OK,
114 	MANDOCERR_WARNING,
115 	MANDOCERR_WARNING,
116 	MANDOCERR_ERROR,
117 	MANDOCERR_FATAL,
118 	MANDOCERR_MAX,
119 	MANDOCERR_MAX
120 };
121 
122 static	const char * const	mandocerrs[MANDOCERR_MAX] = {
123 	"ok",
124 
125 	"generic warning",
126 
127 	/* related to the prologue */
128 	"no title in document",
129 	"document title should be all caps",
130 	"unknown manual section",
131 	"cannot parse date argument",
132 	"prologue macros out of order",
133 	"duplicate prologue macro",
134 	"macro not allowed in prologue",
135 	"macro not allowed in body",
136 
137 	/* related to document structure */
138 	".so is fragile, better use ln(1)",
139 	"NAME section must come first",
140 	"bad NAME section contents",
141 	"manual name not yet set",
142 	"sections out of conventional order",
143 	"duplicate section name",
144 	"section not in conventional manual section",
145 
146 	/* related to macros and nesting */
147 	"skipping obsolete macro",
148 	"skipping paragraph macro",
149 	"blocks badly nested",
150 	"child violates parent syntax",
151 	"nested displays are not portable",
152 	"already in literal mode",
153 
154 	/* related to missing macro arguments */
155 	"skipping empty macro",
156 	"argument count wrong",
157 	"missing display type",
158 	"list type must come first",
159 	"tag lists require a width argument",
160 	"missing font type",
161 
162 	/* related to bad macro arguments */
163 	"skipping argument",
164 	"duplicate argument",
165 	"duplicate display type",
166 	"duplicate list type",
167 	"unknown AT&T UNIX version",
168 	"bad Boolean value",
169 	"unknown font",
170 	"unknown standard specifier",
171 	"bad width argument",
172 
173 	/* related to plain text */
174 	"blank line in non-literal context",
175 	"tab in non-literal context",
176 	"end of line whitespace",
177 	"bad comment style",
178 	"unknown escape sequence",
179 	"unterminated quoted string",
180 
181 	/* related to tables */
182 	"extra data cells",
183 
184 	"generic error",
185 
186 	/* related to tables */
187 	"bad table syntax",
188 	"bad table option",
189 	"bad table layout",
190 	"no table layout cells specified",
191 	"no table data cells specified",
192 	"ignore data in cell",
193 	"data block still open",
194 
195 	"input stack limit exceeded, infinite loop?",
196 	"skipping bad character",
197 	"skipping text before the first section header",
198 	"skipping unknown macro",
199 	"NOT IMPLEMENTED: skipping request",
200 	"line scope broken",
201 	"argument count wrong",
202 	"skipping end of block that is not open",
203 	"missing end of block",
204 	"scope open on exit",
205 	"uname(3) system call failed",
206 	"macro requires line argument(s)",
207 	"macro requires body argument(s)",
208 	"macro requires argument(s)",
209 	"missing list type",
210 	"line argument(s) will be lost",
211 	"body argument(s) will be lost",
212 
213 	"generic fatal error",
214 
215 	"column syntax is inconsistent",
216 	"NOT IMPLEMENTED: .Bd -file",
217 	"line scope broken, syntax violated",
218 	"argument count wrong, violates syntax",
219 	"child violates parent syntax",
220 	"argument count wrong, violates syntax",
221 	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
222 	"no document body",
223 	"no document prologue",
224 	"static buffer exhausted",
225 };
226 
227 static	void		  parsebuf(struct curparse *, struct buf, int);
228 static	void		  pdesc(struct curparse *);
229 static	void		  fdesc(struct curparse *);
230 static	void		  ffile(const char *, struct curparse *);
231 static	int		  pfile(const char *, struct curparse *);
232 static	int		  moptions(enum intt *, char *);
233 static	int		  mmsg(enum mandocerr, void *,
234 				int, int, const char *);
235 static	void		  pset(const char *, int, struct curparse *);
236 static	int		  toptions(struct curparse *, char *);
237 static	void		  usage(void) __attribute__((noreturn));
238 static	void		  version(void) __attribute__((noreturn));
239 static	int		  woptions(struct curparse *, char *);
240 
241 static	const char	 *progname;
242 static	enum mandoclevel  file_status = MANDOCLEVEL_OK;
243 static	enum mandoclevel  exit_status = MANDOCLEVEL_OK;
244 
245 int
246 main(int argc, char *argv[])
247 {
248 	int		 c;
249 	struct curparse	 curp;
250 
251 	progname = strrchr(argv[0], '/');
252 	if (progname == NULL)
253 		progname = argv[0];
254 	else
255 		++progname;
256 
257 	memset(&curp, 0, sizeof(struct curparse));
258 
259 	curp.inttype = INTT_AUTO;
260 	curp.outtype = OUTT_ASCII;
261 	curp.wlevel  = MANDOCLEVEL_FATAL;
262 
263 	/* LINTED */
264 	while (-1 != (c = getopt(argc, argv, "m:O:T:VW:")))
265 		switch (c) {
266 		case ('m'):
267 			if ( ! moptions(&curp.inttype, optarg))
268 				return((int)MANDOCLEVEL_BADARG);
269 			break;
270 		case ('O'):
271 			(void)strlcat(curp.outopts, optarg, BUFSIZ);
272 			(void)strlcat(curp.outopts, ",", BUFSIZ);
273 			break;
274 		case ('T'):
275 			if ( ! toptions(&curp, optarg))
276 				return((int)MANDOCLEVEL_BADARG);
277 			break;
278 		case ('W'):
279 			if ( ! woptions(&curp, optarg))
280 				return((int)MANDOCLEVEL_BADARG);
281 			break;
282 		case ('V'):
283 			version();
284 			/* NOTREACHED */
285 		default:
286 			usage();
287 			/* NOTREACHED */
288 		}
289 
290 	argc -= optind;
291 	argv += optind;
292 
293 	if (NULL == *argv) {
294 		curp.file = "<stdin>";
295 		curp.fd = STDIN_FILENO;
296 
297 		fdesc(&curp);
298 	}
299 
300 	while (*argv) {
301 		ffile(*argv, &curp);
302 		if (MANDOCLEVEL_OK != exit_status && curp.wstop)
303 			break;
304 		++argv;
305 	}
306 
307 	if (curp.outfree)
308 		(*curp.outfree)(curp.outdata);
309 	if (curp.pmdoc)
310 		mdoc_free(curp.pmdoc);
311 	if (curp.pman)
312 		man_free(curp.pman);
313 	if (curp.roff)
314 		roff_free(curp.roff);
315 
316 	return((int)exit_status);
317 }
318 
319 
320 static void
321 version(void)
322 {
323 
324 	(void)printf("%s %s\n", progname, VERSION);
325 	exit((int)MANDOCLEVEL_OK);
326 }
327 
328 
329 static void
330 usage(void)
331 {
332 
333 	(void)fprintf(stderr, "usage: %s "
334 			"[-V] "
335 			"[-foption] "
336 			"[-mformat] "
337 			"[-Ooption] "
338 			"[-Toutput] "
339 			"[-Werr] "
340 			"[file...]\n",
341 			progname);
342 
343 	exit((int)MANDOCLEVEL_BADARG);
344 }
345 
346 static void
347 ffile(const char *file, struct curparse *curp)
348 {
349 
350 	/*
351 	 * Called once per input file.  Get the file ready for reading,
352 	 * pass it through to the parser-driver, then close it out.
353 	 * XXX: don't do anything special as this is only called for
354 	 * files; stdin goes directly to fdesc().
355 	 */
356 
357 	curp->file = file;
358 
359 	if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) {
360 		perror(curp->file);
361 		exit_status = MANDOCLEVEL_SYSERR;
362 		return;
363 	}
364 
365 	fdesc(curp);
366 
367 	if (-1 == close(curp->fd))
368 		perror(curp->file);
369 }
370 
371 static int
372 pfile(const char *file, struct curparse *curp)
373 {
374 	const char	*savefile;
375 	int		 fd, savefd;
376 
377 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
378 		perror(file);
379 		file_status = MANDOCLEVEL_SYSERR;
380 		return(0);
381 	}
382 
383 	savefile = curp->file;
384 	savefd = curp->fd;
385 
386 	curp->file = file;
387 	curp->fd = fd;
388 
389 	pdesc(curp);
390 
391 	curp->file = savefile;
392 	curp->fd = savefd;
393 
394 	if (-1 == close(fd))
395 		perror(file);
396 
397 	return(MANDOCLEVEL_FATAL > file_status ? 1 : 0);
398 }
399 
400 
401 static void
402 resize_buf(struct buf *buf, size_t initial)
403 {
404 
405 	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
406 	buf->buf = realloc(buf->buf, buf->sz);
407 	if (NULL == buf->buf) {
408 		perror(NULL);
409 		exit((int)MANDOCLEVEL_SYSERR);
410 	}
411 }
412 
413 
414 static int
415 read_whole_file(struct curparse *curp, struct buf *fb, int *with_mmap)
416 {
417 	struct stat	 st;
418 	size_t		 off;
419 	ssize_t		 ssz;
420 
421 	if (-1 == fstat(curp->fd, &st)) {
422 		perror(curp->file);
423 		return(0);
424 	}
425 
426 	/*
427 	 * If we're a regular file, try just reading in the whole entry
428 	 * via mmap().  This is faster than reading it into blocks, and
429 	 * since each file is only a few bytes to begin with, I'm not
430 	 * concerned that this is going to tank any machines.
431 	 */
432 
433 	if (S_ISREG(st.st_mode)) {
434 		if (st.st_size >= (1U << 31)) {
435 			fprintf(stderr, "%s: input too large\n",
436 					curp->file);
437 			return(0);
438 		}
439 		*with_mmap = 1;
440 		fb->sz = (size_t)st.st_size;
441 		fb->buf = mmap(NULL, fb->sz, PROT_READ,
442 				MAP_FILE|MAP_SHARED, curp->fd, 0);
443 		if (fb->buf != MAP_FAILED)
444 			return(1);
445 	}
446 
447 	/*
448 	 * If this isn't a regular file (like, say, stdin), then we must
449 	 * go the old way and just read things in bit by bit.
450 	 */
451 
452 	*with_mmap = 0;
453 	off = 0;
454 	fb->sz = 0;
455 	fb->buf = NULL;
456 	for (;;) {
457 		if (off == fb->sz) {
458 			if (fb->sz == (1U << 31)) {
459 				fprintf(stderr, "%s: input too large\n",
460 						curp->file);
461 				break;
462 			}
463 			resize_buf(fb, 65536);
464 		}
465 		ssz = read(curp->fd, fb->buf + (int)off, fb->sz - off);
466 		if (ssz == 0) {
467 			fb->sz = off;
468 			return(1);
469 		}
470 		if (ssz == -1) {
471 			perror(curp->file);
472 			break;
473 		}
474 		off += (size_t)ssz;
475 	}
476 
477 	free(fb->buf);
478 	fb->buf = NULL;
479 	return(0);
480 }
481 
482 
483 static void
484 fdesc(struct curparse *curp)
485 {
486 
487 	/*
488 	 * Called once per file with an opened file descriptor.  All
489 	 * pre-file-parse operations (whether stdin or a file) should go
490 	 * here.
491 	 *
492 	 * This calls down into the nested parser, which drills down and
493 	 * fully parses a file and all its dependences (i.e., `so').  It
494 	 * then runs the cleanup validators and pushes to output.
495 	 */
496 
497 	/* Zero the parse type. */
498 
499 	curp->mdoc = NULL;
500 	curp->man = NULL;
501 	file_status = MANDOCLEVEL_OK;
502 
503 	/* Make sure the mandotory roff parser is initialised. */
504 
505 	if (NULL == curp->roff) {
506 		curp->roff = roff_alloc(&curp->regs, curp, mmsg);
507 		assert(curp->roff);
508 	}
509 
510 	/* Fully parse the file. */
511 
512 	pdesc(curp);
513 
514 	if (MANDOCLEVEL_FATAL <= file_status)
515 		goto cleanup;
516 
517 	/* NOTE a parser may not have been assigned, yet. */
518 
519 	if ( ! (curp->man || curp->mdoc)) {
520 		fprintf(stderr, "%s: Not a manual\n", curp->file);
521 		file_status = MANDOCLEVEL_FATAL;
522 		goto cleanup;
523 	}
524 
525 	/* Clean up the parse routine ASTs. */
526 
527 	if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
528 		assert(MANDOCLEVEL_FATAL <= file_status);
529 		goto cleanup;
530 	}
531 
532 	if (curp->man && ! man_endparse(curp->man)) {
533 		assert(MANDOCLEVEL_FATAL <= file_status);
534 		goto cleanup;
535 	}
536 
537 	assert(curp->roff);
538 	roff_endparse(curp->roff);
539 
540 	/*
541 	 * With -Wstop and warnings or errors of at least
542 	 * the requested level, do not produce output.
543 	 */
544 
545 	if (MANDOCLEVEL_OK != file_status && curp->wstop)
546 		goto cleanup;
547 
548 	/* If unset, allocate output dev now (if applicable). */
549 
550 	if ( ! (curp->outman && curp->outmdoc)) {
551 		switch (curp->outtype) {
552 		case (OUTT_XHTML):
553 			curp->outdata = xhtml_alloc(curp->outopts);
554 			break;
555 		case (OUTT_HTML):
556 			curp->outdata = html_alloc(curp->outopts);
557 			break;
558 		case (OUTT_ASCII):
559 			curp->outdata = ascii_alloc(curp->outopts);
560 			curp->outfree = ascii_free;
561 			break;
562 		case (OUTT_PDF):
563 			curp->outdata = pdf_alloc(curp->outopts);
564 			curp->outfree = pspdf_free;
565 			break;
566 		case (OUTT_PS):
567 			curp->outdata = ps_alloc(curp->outopts);
568 			curp->outfree = pspdf_free;
569 			break;
570 		default:
571 			break;
572 		}
573 
574 		switch (curp->outtype) {
575 		case (OUTT_HTML):
576 			/* FALLTHROUGH */
577 		case (OUTT_XHTML):
578 			curp->outman = html_man;
579 			curp->outmdoc = html_mdoc;
580 			curp->outfree = html_free;
581 			break;
582 		case (OUTT_TREE):
583 			curp->outman = tree_man;
584 			curp->outmdoc = tree_mdoc;
585 			break;
586 		case (OUTT_PDF):
587 			/* FALLTHROUGH */
588 		case (OUTT_ASCII):
589 			/* FALLTHROUGH */
590 		case (OUTT_PS):
591 			curp->outman = terminal_man;
592 			curp->outmdoc = terminal_mdoc;
593 			break;
594 		default:
595 			break;
596 		}
597 	}
598 
599 	/* Execute the out device, if it exists. */
600 
601 	if (curp->man && curp->outman)
602 		(*curp->outman)(curp->outdata, curp->man);
603 	if (curp->mdoc && curp->outmdoc)
604 		(*curp->outmdoc)(curp->outdata, curp->mdoc);
605 
606  cleanup:
607 
608 	memset(&curp->regs, 0, sizeof(struct regset));
609 
610 	/* Reset the current-parse compilers. */
611 
612 	if (curp->mdoc)
613 		mdoc_reset(curp->mdoc);
614 	if (curp->man)
615 		man_reset(curp->man);
616 
617 	assert(curp->roff);
618 	roff_reset(curp->roff);
619 
620 	if (exit_status < file_status)
621 		exit_status = file_status;
622 
623 	return;
624 }
625 
626 static void
627 pdesc(struct curparse *curp)
628 {
629 	struct buf	 blk;
630 	int		 with_mmap;
631 
632 	/*
633 	 * Run for each opened file; may be called more than once for
634 	 * each full parse sequence if the opened file is nested (i.e.,
635 	 * from `so').  Simply sucks in the whole file and moves into
636 	 * the parse phase for the file.
637 	 */
638 
639 	if ( ! read_whole_file(curp, &blk, &with_mmap)) {
640 		file_status = MANDOCLEVEL_SYSERR;
641 		return;
642 	}
643 
644 	/* Line number is per-file. */
645 
646 	curp->line = 1;
647 
648 	parsebuf(curp, blk, 1);
649 
650 	if (with_mmap)
651 		munmap(blk.buf, blk.sz);
652 	else
653 		free(blk.buf);
654 }
655 
656 static void
657 parsebuf(struct curparse *curp, struct buf blk, int start)
658 {
659 	struct buf	 ln;
660 	enum rofferr	 rr;
661 	int		 i, of, rc;
662 	int		 pos; /* byte number in the ln buffer */
663 	int		 lnn; /* line number in the real file */
664 	unsigned char	 c;
665 
666 	/*
667 	 * Main parse routine for an opened file.  This is called for
668 	 * each opened file and simply loops around the full input file,
669 	 * possibly nesting (i.e., with `so').
670 	 */
671 
672 	memset(&ln, 0, sizeof(struct buf));
673 
674 	lnn = curp->line;
675 	pos = 0;
676 
677 	for (i = 0; i < (int)blk.sz; ) {
678 		if (0 == pos && '\0' == blk.buf[i])
679 			break;
680 
681 		if (start) {
682 			curp->line = lnn;
683 			curp->reparse_count = 0;
684 		}
685 
686 		while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
687 			if ('\n' == blk.buf[i]) {
688 				++i;
689 				++lnn;
690 				break;
691 			}
692 
693 			/*
694 			 * Warn about bogus characters.  If you're using
695 			 * non-ASCII encoding, you're screwing your
696 			 * readers.  Since I'd rather this not happen,
697 			 * I'll be helpful and drop these characters so
698 			 * we don't display gibberish.  Note to manual
699 			 * writers: use special characters.
700 			 */
701 
702 			c = (unsigned char) blk.buf[i];
703 
704 			if ( ! (isascii(c) &&
705 					(isgraph(c) || isblank(c)))) {
706 				mmsg(MANDOCERR_BADCHAR, curp,
707 				    curp->line, pos, "ignoring byte");
708 				i++;
709 				continue;
710 			}
711 
712 			/* Trailing backslash = a plain char. */
713 
714 			if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
715 				if (pos >= (int)ln.sz)
716 					resize_buf(&ln, 256);
717 				ln.buf[pos++] = blk.buf[i++];
718 				continue;
719 			}
720 
721 			/* Found escape & at least one other char. */
722 
723 			if ('\n' == blk.buf[i + 1]) {
724 				i += 2;
725 				/* Escaped newlines are skipped over */
726 				++lnn;
727 				continue;
728 			}
729 
730 			if ('"' == blk.buf[i + 1]) {
731 				i += 2;
732 				/* Comment, skip to end of line */
733 				for (; i < (int)blk.sz; ++i) {
734 					if ('\n' == blk.buf[i]) {
735 						++i;
736 						++lnn;
737 						break;
738 					}
739 				}
740 
741 				/* Backout trailing whitespaces */
742 				for (; pos > 0; --pos) {
743 					if (ln.buf[pos - 1] != ' ')
744 						break;
745 					if (pos > 2 && ln.buf[pos - 2] == '\\')
746 						break;
747 				}
748 				break;
749 			}
750 
751 			/* Some other escape sequence, copy & cont. */
752 
753 			if (pos + 1 >= (int)ln.sz)
754 				resize_buf(&ln, 256);
755 
756 			ln.buf[pos++] = blk.buf[i++];
757 			ln.buf[pos++] = blk.buf[i++];
758 		}
759 
760  		if (pos >= (int)ln.sz)
761 			resize_buf(&ln, 256);
762 
763 		ln.buf[pos] = '\0';
764 
765 		/*
766 		 * A significant amount of complexity is contained by
767 		 * the roff preprocessor.  It's line-oriented but can be
768 		 * expressed on one line, so we need at times to
769 		 * readjust our starting point and re-run it.  The roff
770 		 * preprocessor can also readjust the buffers with new
771 		 * data, so we pass them in wholesale.
772 		 */
773 
774 		of = 0;
775 
776 rerun:
777 		rr = roff_parseln
778 			(curp->roff, curp->line,
779 			 &ln.buf, &ln.sz, of, &of);
780 
781 		switch (rr) {
782 		case (ROFF_REPARSE):
783 			if (REPARSE_LIMIT >= ++curp->reparse_count)
784 				parsebuf(curp, ln, 0);
785 			else
786 				mmsg(MANDOCERR_ROFFLOOP, curp,
787 				    curp->line, pos, NULL);
788 			pos = 0;
789 			continue;
790 		case (ROFF_APPEND):
791 			pos = strlen(ln.buf);
792 			continue;
793 		case (ROFF_RERUN):
794 			goto rerun;
795 		case (ROFF_IGN):
796 			pos = 0;
797 			continue;
798 		case (ROFF_ERR):
799 			assert(MANDOCLEVEL_FATAL <= file_status);
800 			break;
801 		case (ROFF_SO):
802 			if (pfile(ln.buf + of, curp)) {
803 				pos = 0;
804 				continue;
805 			} else
806 				break;
807 		default:
808 			break;
809 		}
810 
811 		/*
812 		 * If we encounter errors in the recursive parsebuf()
813 		 * call, make sure we don't continue parsing.
814 		 */
815 
816 		if (MANDOCLEVEL_FATAL <= file_status)
817 			break;
818 
819 		/*
820 		 * If input parsers have not been allocated, do so now.
821 		 * We keep these instanced betwen parsers, but set them
822 		 * locally per parse routine since we can use different
823 		 * parsers with each one.
824 		 */
825 
826 		if ( ! (curp->man || curp->mdoc))
827 			pset(ln.buf + of, pos - of, curp);
828 
829 		/*
830 		 * Lastly, push down into the parsers themselves.  One
831 		 * of these will have already been set in the pset()
832 		 * routine.
833 		 * If libroff returns ROFF_TBL, then add it to the
834 		 * currently open parse.  Since we only get here if
835 		 * there does exist data (see tbl_data.c), we're
836 		 * guaranteed that something's been allocated.
837 		 */
838 
839 		if (ROFF_TBL == rr) {
840 			assert(curp->man || curp->mdoc);
841 			if (curp->man)
842 				man_addspan(curp->man, roff_span(curp->roff));
843 			else
844 				mdoc_addspan(curp->mdoc, roff_span(curp->roff));
845 
846 		} else if (curp->man || curp->mdoc) {
847 			rc = curp->man ?
848 				man_parseln(curp->man,
849 					curp->line, ln.buf, of) :
850 				mdoc_parseln(curp->mdoc,
851 					curp->line, ln.buf, of);
852 
853 			if ( ! rc) {
854 				assert(MANDOCLEVEL_FATAL <= file_status);
855 				break;
856 			}
857 		}
858 
859 		/* Temporary buffers typically are not full. */
860 
861 		if (0 == start && '\0' == blk.buf[i])
862 			break;
863 
864 		/* Start the next input line. */
865 
866 		pos = 0;
867 	}
868 
869 	free(ln.buf);
870 }
871 
872 static void
873 pset(const char *buf, int pos, struct curparse *curp)
874 {
875 	int		 i;
876 
877 	/*
878 	 * Try to intuit which kind of manual parser should be used.  If
879 	 * passed in by command-line (-man, -mdoc), then use that
880 	 * explicitly.  If passed as -mandoc, then try to guess from the
881 	 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
882 	 * default to -man, which is more lenient.
883 	 *
884 	 * Separate out pmdoc/pman from mdoc/man: the first persists
885 	 * through all parsers, while the latter is used per-parse.
886 	 */
887 
888 	if ('.' == buf[0] || '\'' == buf[0]) {
889 		for (i = 1; buf[i]; i++)
890 			if (' ' != buf[i] && '\t' != buf[i])
891 				break;
892 		if ('\0' == buf[i])
893 			return;
894 	}
895 
896 	switch (curp->inttype) {
897 	case (INTT_MDOC):
898 		if (NULL == curp->pmdoc)
899 			curp->pmdoc = mdoc_alloc
900 				(&curp->regs, curp, mmsg);
901 		assert(curp->pmdoc);
902 		curp->mdoc = curp->pmdoc;
903 		return;
904 	case (INTT_MAN):
905 		if (NULL == curp->pman)
906 			curp->pman = man_alloc
907 				(&curp->regs, curp, mmsg);
908 		assert(curp->pman);
909 		curp->man = curp->pman;
910 		return;
911 	default:
912 		break;
913 	}
914 
915 	if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3))  {
916 		if (NULL == curp->pmdoc)
917 			curp->pmdoc = mdoc_alloc
918 				(&curp->regs, curp, mmsg);
919 		assert(curp->pmdoc);
920 		curp->mdoc = curp->pmdoc;
921 		return;
922 	}
923 
924 	if (NULL == curp->pman)
925 		curp->pman = man_alloc(&curp->regs, curp, mmsg);
926 	assert(curp->pman);
927 	curp->man = curp->pman;
928 }
929 
930 static int
931 moptions(enum intt *tflags, char *arg)
932 {
933 
934 	if (0 == strcmp(arg, "doc"))
935 		*tflags = INTT_MDOC;
936 	else if (0 == strcmp(arg, "andoc"))
937 		*tflags = INTT_AUTO;
938 	else if (0 == strcmp(arg, "an"))
939 		*tflags = INTT_MAN;
940 	else {
941 		fprintf(stderr, "%s: Bad argument\n", arg);
942 		return(0);
943 	}
944 
945 	return(1);
946 }
947 
948 static int
949 toptions(struct curparse *curp, char *arg)
950 {
951 
952 	if (0 == strcmp(arg, "ascii"))
953 		curp->outtype = OUTT_ASCII;
954 	else if (0 == strcmp(arg, "lint")) {
955 		curp->outtype = OUTT_LINT;
956 		curp->wlevel  = MANDOCLEVEL_WARNING;
957 	}
958 	else if (0 == strcmp(arg, "tree"))
959 		curp->outtype = OUTT_TREE;
960 	else if (0 == strcmp(arg, "html"))
961 		curp->outtype = OUTT_HTML;
962 	else if (0 == strcmp(arg, "xhtml"))
963 		curp->outtype = OUTT_XHTML;
964 	else if (0 == strcmp(arg, "ps"))
965 		curp->outtype = OUTT_PS;
966 	else if (0 == strcmp(arg, "pdf"))
967 		curp->outtype = OUTT_PDF;
968 	else {
969 		fprintf(stderr, "%s: Bad argument\n", arg);
970 		return(0);
971 	}
972 
973 	return(1);
974 }
975 
976 static int
977 woptions(struct curparse *curp, char *arg)
978 {
979 	char		*v, *o;
980 	const char	*toks[6];
981 
982 	toks[0] = "stop";
983 	toks[1] = "all";
984 	toks[2] = "warning";
985 	toks[3] = "error";
986 	toks[4] = "fatal";
987 	toks[5] = NULL;
988 
989 	while (*arg) {
990 		o = arg;
991 		switch (getsubopt(&arg, UNCONST(toks), &v)) {
992 		case (0):
993 			curp->wstop = 1;
994 			break;
995 		case (1):
996 			/* FALLTHROUGH */
997 		case (2):
998 			curp->wlevel = MANDOCLEVEL_WARNING;
999 			break;
1000 		case (3):
1001 			curp->wlevel = MANDOCLEVEL_ERROR;
1002 			break;
1003 		case (4):
1004 			curp->wlevel = MANDOCLEVEL_FATAL;
1005 			break;
1006 		default:
1007 			fprintf(stderr, "-W%s: Bad argument\n", o);
1008 			return(0);
1009 		}
1010 	}
1011 
1012 	return(1);
1013 }
1014 
1015 static int
1016 mmsg(enum mandocerr t, void *arg, int ln, int col, const char *msg)
1017 {
1018 	struct curparse *cp;
1019 	enum mandoclevel level;
1020 
1021 	level = MANDOCLEVEL_FATAL;
1022 	while (t < mandoclimits[level])
1023 		/* LINTED */
1024 		level--;
1025 
1026 	cp = (struct curparse *)arg;
1027 	if (level < cp->wlevel)
1028 		return(1);
1029 
1030 	fprintf(stderr, "%s:%d:%d: %s: %s",
1031 	    cp->file, ln, col + 1, mandoclevels[level], mandocerrs[t]);
1032 	if (msg)
1033 		fprintf(stderr, ": %s", msg);
1034 	fputc('\n', stderr);
1035 
1036 	if (file_status < level)
1037 		file_status = level;
1038 
1039 	return(level < MANDOCLEVEL_FATAL);
1040 }
1041