xref: /openbsd-src/usr.bin/mandoc/roff.c (revision be38755c412cc72cb8d40f51ea70c9893196afff)
1 /*	$Id: roff.c,v 1.42 2011/09/18 15:54:48 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <assert.h>
19 #include <ctype.h>
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #include "mandoc.h"
24 #include "libroff.h"
25 #include "libmandoc.h"
26 
27 /* Maximum number of nested if-else conditionals. */
28 #define	RSTACK_MAX	128
29 
30 enum	rofft {
31 	ROFF_ad,
32 	ROFF_am,
33 	ROFF_ami,
34 	ROFF_am1,
35 	ROFF_de,
36 	ROFF_dei,
37 	ROFF_de1,
38 	ROFF_ds,
39 	ROFF_el,
40 	ROFF_hy,
41 	ROFF_ie,
42 	ROFF_if,
43 	ROFF_ig,
44 	ROFF_it,
45 	ROFF_ne,
46 	ROFF_nh,
47 	ROFF_nr,
48 	ROFF_ns,
49 	ROFF_ps,
50 	ROFF_rm,
51 	ROFF_so,
52 	ROFF_ta,
53 	ROFF_tr,
54 	ROFF_TS,
55 	ROFF_TE,
56 	ROFF_T_,
57 	ROFF_EQ,
58 	ROFF_EN,
59 	ROFF_cblock,
60 	ROFF_ccond,
61 	ROFF_USERDEF,
62 	ROFF_MAX
63 };
64 
65 enum	roffrule {
66 	ROFFRULE_ALLOW,
67 	ROFFRULE_DENY
68 };
69 
70 /*
71  * A single register entity.  If "set" is zero, the value of the
72  * register should be the default one, which is per-register.
73  * Registers are assumed to be unsigned ints for now.
74  */
75 struct	reg {
76 	int		 set; /* whether set or not */
77 	unsigned int	 u; /* unsigned integer */
78 };
79 
80 /*
81  * An incredibly-simple string buffer.
82  */
83 struct	roffstr {
84 	char		*p; /* nil-terminated buffer */
85 	size_t		 sz; /* saved strlen(p) */
86 };
87 
88 /*
89  * A key-value roffstr pair as part of a singly-linked list.
90  */
91 struct	roffkv {
92 	struct roffstr	 key;
93 	struct roffstr	 val;
94 	struct roffkv	*next; /* next in list */
95 };
96 
97 struct	roff {
98 	struct mparse	*parse; /* parse point */
99 	struct roffnode	*last; /* leaf of stack */
100 	enum roffrule	 rstack[RSTACK_MAX]; /* stack of !`ie' rules */
101 	int		 rstackpos; /* position in rstack */
102 	struct reg	 regs[REG__MAX];
103 	struct roffkv	*strtab; /* user-defined strings & macros */
104 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
105 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
106 	const char	*current_string; /* value of last called user macro */
107 	struct tbl_node	*first_tbl; /* first table parsed */
108 	struct tbl_node	*last_tbl; /* last table parsed */
109 	struct tbl_node	*tbl; /* current table being parsed */
110 	struct eqn_node	*last_eqn; /* last equation parsed */
111 	struct eqn_node	*first_eqn; /* first equation parsed */
112 	struct eqn_node	*eqn; /* current equation being parsed */
113 };
114 
115 struct	roffnode {
116 	enum rofft	 tok; /* type of node */
117 	struct roffnode	*parent; /* up one in stack */
118 	int		 line; /* parse line */
119 	int		 col; /* parse col */
120 	char		*name; /* node name, e.g. macro name */
121 	char		*end; /* end-rules: custom token */
122 	int		 endspan; /* end-rules: next-line or infty */
123 	enum roffrule	 rule; /* current evaluation rule */
124 };
125 
126 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
127 			 enum rofft tok, /* tok of macro */ \
128 		 	 char **bufp, /* input buffer */ \
129 			 size_t *szp, /* size of input buffer */ \
130 			 int ln, /* parse line */ \
131 			 int ppos, /* original pos in buffer */ \
132 			 int pos, /* current pos in buffer */ \
133 			 int *offs /* reset offset of buffer data */
134 
135 typedef	enum rofferr (*roffproc)(ROFF_ARGS);
136 
137 struct	roffmac {
138 	const char	*name; /* macro name */
139 	roffproc	 proc; /* process new macro */
140 	roffproc	 text; /* process as child text of macro */
141 	roffproc	 sub; /* process as child of macro */
142 	int		 flags;
143 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
144 	struct roffmac	*next;
145 };
146 
147 struct	predef {
148 	const char	*name; /* predefined input name */
149 	const char	*str; /* replacement symbol */
150 };
151 
152 #define	PREDEF(__name, __str) \
153 	{ (__name), (__str) },
154 
155 static	enum rofft	 roffhash_find(const char *, size_t);
156 static	void		 roffhash_init(void);
157 static	void		 roffnode_cleanscope(struct roff *);
158 static	void		 roffnode_pop(struct roff *);
159 static	void		 roffnode_push(struct roff *, enum rofft,
160 				const char *, int, int);
161 static	enum rofferr	 roff_block(ROFF_ARGS);
162 static	enum rofferr	 roff_block_text(ROFF_ARGS);
163 static	enum rofferr	 roff_block_sub(ROFF_ARGS);
164 static	enum rofferr	 roff_cblock(ROFF_ARGS);
165 static	enum rofferr	 roff_ccond(ROFF_ARGS);
166 static	enum rofferr	 roff_cond(ROFF_ARGS);
167 static	enum rofferr	 roff_cond_text(ROFF_ARGS);
168 static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
169 static	enum rofferr	 roff_ds(ROFF_ARGS);
170 static	enum roffrule	 roff_evalcond(const char *, int *);
171 static	void		 roff_free1(struct roff *);
172 static	void		 roff_freestr(struct roffkv *);
173 static	char		*roff_getname(struct roff *, char **, int, int);
174 static	const char	*roff_getstrn(const struct roff *,
175 				const char *, size_t);
176 static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
177 static	enum rofferr	 roff_nr(ROFF_ARGS);
178 static	void		 roff_openeqn(struct roff *, const char *,
179 				int, int, const char *);
180 static	enum rofft	 roff_parse(struct roff *, const char *, int *);
181 static	enum rofferr	 roff_parsetext(char *);
182 static	void		 roff_res(struct roff *,
183 				char **, size_t *, int, int);
184 static	enum rofferr	 roff_rm(ROFF_ARGS);
185 static	void		 roff_setstr(struct roff *,
186 				const char *, const char *, int);
187 static	void		 roff_setstrn(struct roffkv **, const char *,
188 				size_t, const char *, size_t, int);
189 static	enum rofferr	 roff_so(ROFF_ARGS);
190 static	enum rofferr	 roff_tr(ROFF_ARGS);
191 static	enum rofferr	 roff_TE(ROFF_ARGS);
192 static	enum rofferr	 roff_TS(ROFF_ARGS);
193 static	enum rofferr	 roff_EQ(ROFF_ARGS);
194 static	enum rofferr	 roff_EN(ROFF_ARGS);
195 static	enum rofferr	 roff_T_(ROFF_ARGS);
196 static	enum rofferr	 roff_userdef(ROFF_ARGS);
197 
198 /* See roffhash_find() */
199 
200 #define	ASCII_HI	 126
201 #define	ASCII_LO	 33
202 #define	HASHWIDTH	(ASCII_HI - ASCII_LO + 1)
203 
204 static	struct roffmac	*hash[HASHWIDTH];
205 
206 static	struct roffmac	 roffs[ROFF_MAX] = {
207 	{ "ad", roff_line_ignore, NULL, NULL, 0, NULL },
208 	{ "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
209 	{ "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
210 	{ "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
211 	{ "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
212 	{ "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
213 	{ "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
214 	{ "ds", roff_ds, NULL, NULL, 0, NULL },
215 	{ "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
216 	{ "hy", roff_line_ignore, NULL, NULL, 0, NULL },
217 	{ "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
218 	{ "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
219 	{ "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
220 	{ "it", roff_line_ignore, NULL, NULL, 0, NULL },
221 	{ "ne", roff_line_ignore, NULL, NULL, 0, NULL },
222 	{ "nh", roff_line_ignore, NULL, NULL, 0, NULL },
223 	{ "nr", roff_nr, NULL, NULL, 0, NULL },
224 	{ "ns", roff_line_ignore, NULL, NULL, 0, NULL },
225 	{ "ps", roff_line_ignore, NULL, NULL, 0, NULL },
226 	{ "rm", roff_rm, NULL, NULL, 0, NULL },
227 	{ "so", roff_so, NULL, NULL, 0, NULL },
228 	{ "ta", roff_line_ignore, NULL, NULL, 0, NULL },
229 	{ "tr", roff_tr, NULL, NULL, 0, NULL },
230 	{ "TS", roff_TS, NULL, NULL, 0, NULL },
231 	{ "TE", roff_TE, NULL, NULL, 0, NULL },
232 	{ "T&", roff_T_, NULL, NULL, 0, NULL },
233 	{ "EQ", roff_EQ, NULL, NULL, 0, NULL },
234 	{ "EN", roff_EN, NULL, NULL, 0, NULL },
235 	{ ".", roff_cblock, NULL, NULL, 0, NULL },
236 	{ "\\}", roff_ccond, NULL, NULL, 0, NULL },
237 	{ NULL, roff_userdef, NULL, NULL, 0, NULL },
238 };
239 
240 /* Array of injected predefined strings. */
241 #define	PREDEFS_MAX	 38
242 static	const struct predef predefs[PREDEFS_MAX] = {
243 #include "predefs.in"
244 };
245 
246 /* See roffhash_find() */
247 #define	ROFF_HASH(p)	(p[0] - ASCII_LO)
248 
249 static void
250 roffhash_init(void)
251 {
252 	struct roffmac	 *n;
253 	int		  buc, i;
254 
255 	for (i = 0; i < (int)ROFF_USERDEF; i++) {
256 		assert(roffs[i].name[0] >= ASCII_LO);
257 		assert(roffs[i].name[0] <= ASCII_HI);
258 
259 		buc = ROFF_HASH(roffs[i].name);
260 
261 		if (NULL != (n = hash[buc])) {
262 			for ( ; n->next; n = n->next)
263 				/* Do nothing. */ ;
264 			n->next = &roffs[i];
265 		} else
266 			hash[buc] = &roffs[i];
267 	}
268 }
269 
270 /*
271  * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
272  * the nil-terminated string name could be found.
273  */
274 static enum rofft
275 roffhash_find(const char *p, size_t s)
276 {
277 	int		 buc;
278 	struct roffmac	*n;
279 
280 	/*
281 	 * libroff has an extremely simple hashtable, for the time
282 	 * being, which simply keys on the first character, which must
283 	 * be printable, then walks a chain.  It works well enough until
284 	 * optimised.
285 	 */
286 
287 	if (p[0] < ASCII_LO || p[0] > ASCII_HI)
288 		return(ROFF_MAX);
289 
290 	buc = ROFF_HASH(p);
291 
292 	if (NULL == (n = hash[buc]))
293 		return(ROFF_MAX);
294 	for ( ; n; n = n->next)
295 		if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
296 			return((enum rofft)(n - roffs));
297 
298 	return(ROFF_MAX);
299 }
300 
301 
302 /*
303  * Pop the current node off of the stack of roff instructions currently
304  * pending.
305  */
306 static void
307 roffnode_pop(struct roff *r)
308 {
309 	struct roffnode	*p;
310 
311 	assert(r->last);
312 	p = r->last;
313 
314 	r->last = r->last->parent;
315 	free(p->name);
316 	free(p->end);
317 	free(p);
318 }
319 
320 
321 /*
322  * Push a roff node onto the instruction stack.  This must later be
323  * removed with roffnode_pop().
324  */
325 static void
326 roffnode_push(struct roff *r, enum rofft tok, const char *name,
327 		int line, int col)
328 {
329 	struct roffnode	*p;
330 
331 	p = mandoc_calloc(1, sizeof(struct roffnode));
332 	p->tok = tok;
333 	if (name)
334 		p->name = mandoc_strdup(name);
335 	p->parent = r->last;
336 	p->line = line;
337 	p->col = col;
338 	p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
339 
340 	r->last = p;
341 }
342 
343 
344 static void
345 roff_free1(struct roff *r)
346 {
347 	struct tbl_node	*t;
348 	struct eqn_node	*e;
349 	int		 i;
350 
351 	while (NULL != (t = r->first_tbl)) {
352 		r->first_tbl = t->next;
353 		tbl_free(t);
354 	}
355 
356 	r->first_tbl = r->last_tbl = r->tbl = NULL;
357 
358 	while (NULL != (e = r->first_eqn)) {
359 		r->first_eqn = e->next;
360 		eqn_free(e);
361 	}
362 
363 	r->first_eqn = r->last_eqn = r->eqn = NULL;
364 
365 	while (r->last)
366 		roffnode_pop(r);
367 
368 	roff_freestr(r->strtab);
369 	roff_freestr(r->xmbtab);
370 
371 	r->strtab = r->xmbtab = NULL;
372 
373 	if (r->xtab)
374 		for (i = 0; i < 128; i++)
375 			free(r->xtab[i].p);
376 
377 	free(r->xtab);
378 	r->xtab = NULL;
379 }
380 
381 void
382 roff_reset(struct roff *r)
383 {
384 	int		 i;
385 
386 	roff_free1(r);
387 
388 	memset(&r->regs, 0, sizeof(struct reg) * REG__MAX);
389 
390 	for (i = 0; i < PREDEFS_MAX; i++)
391 		roff_setstr(r, predefs[i].name, predefs[i].str, 0);
392 }
393 
394 
395 void
396 roff_free(struct roff *r)
397 {
398 
399 	roff_free1(r);
400 	free(r);
401 }
402 
403 
404 struct roff *
405 roff_alloc(struct mparse *parse)
406 {
407 	struct roff	*r;
408 	int		 i;
409 
410 	r = mandoc_calloc(1, sizeof(struct roff));
411 	r->parse = parse;
412 	r->rstackpos = -1;
413 
414 	roffhash_init();
415 
416 	for (i = 0; i < PREDEFS_MAX; i++)
417 		roff_setstr(r, predefs[i].name, predefs[i].str, 0);
418 
419 	return(r);
420 }
421 
422 /*
423  * Pre-filter each and every line for reserved words (one beginning with
424  * `\*', e.g., `\*(ab').  These must be handled before the actual line
425  * is processed.
426  * This also checks the syntax of regular escapes.
427  */
428 static void
429 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
430 {
431 	enum mandoc_esc	 esc;
432 	const char	*stesc;	/* start of an escape sequence ('\\') */
433 	const char	*stnam;	/* start of the name, after "[(*" */
434 	const char	*cp;	/* end of the name, e.g. before ']' */
435 	const char	*res;	/* the string to be substituted */
436 	int		 i, maxl;
437 	size_t		 nsz;
438 	char		*n;
439 
440 again:
441 	cp = *bufp + pos;
442 	while (NULL != (cp = strchr(cp, '\\'))) {
443 		stesc = cp++;
444 
445 		/*
446 		 * The second character must be an asterisk.
447 		 * If it isn't, skip it anyway:  It is escaped,
448 		 * so it can't start another escape sequence.
449 		 */
450 
451 		if ('\0' == *cp)
452 			return;
453 
454 		if ('*' != *cp) {
455 			res = cp;
456 			esc = mandoc_escape(&cp, NULL, NULL);
457 			if (ESCAPE_ERROR != esc)
458 				continue;
459 			cp = res;
460 			mandoc_msg
461 				(MANDOCERR_BADESCAPE, r->parse,
462 				 ln, (int)(stesc - *bufp), NULL);
463 			return;
464 		}
465 
466 		cp++;
467 
468 		/*
469 		 * The third character decides the length
470 		 * of the name of the string.
471 		 * Save a pointer to the name.
472 		 */
473 
474 		switch (*cp) {
475 		case ('\0'):
476 			return;
477 		case ('('):
478 			cp++;
479 			maxl = 2;
480 			break;
481 		case ('['):
482 			cp++;
483 			maxl = 0;
484 			break;
485 		default:
486 			maxl = 1;
487 			break;
488 		}
489 		stnam = cp;
490 
491 		/* Advance to the end of the name. */
492 
493 		for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
494 			if ('\0' == *cp) {
495 				mandoc_msg
496 					(MANDOCERR_BADESCAPE,
497 					 r->parse, ln,
498 					 (int)(stesc - *bufp), NULL);
499 				return;
500 			}
501 			if (0 == maxl && ']' == *cp)
502 				break;
503 		}
504 
505 		/*
506 		 * Retrieve the replacement string; if it is
507 		 * undefined, resume searching for escapes.
508 		 */
509 
510 		res = roff_getstrn(r, stnam, (size_t)i);
511 
512 		if (NULL == res) {
513 			mandoc_msg
514 				(MANDOCERR_BADESCAPE, r->parse,
515 				 ln, (int)(stesc - *bufp), NULL);
516 			res = "";
517 		}
518 
519 		/* Replace the escape sequence by the string. */
520 
521 		pos = stesc - *bufp;
522 
523 		nsz = *szp + strlen(res) + 1;
524 		n = mandoc_malloc(nsz);
525 
526 		strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
527 		strlcat(n, res, nsz);
528 		strlcat(n, cp + (maxl ? 0 : 1), nsz);
529 
530 		free(*bufp);
531 
532 		*bufp = n;
533 		*szp = nsz;
534 		goto again;
535 	}
536 }
537 
538 /*
539  * Process text streams: convert all breakable hyphens into ASCII_HYPH.
540  */
541 static enum rofferr
542 roff_parsetext(char *p)
543 {
544 	char		 l, r;
545 	size_t		 sz;
546 	const char	*start;
547 	enum mandoc_esc	 esc;
548 
549 	start = p;
550 
551 	while ('\0' != *p) {
552 		sz = strcspn(p, "-\\");
553 		p += sz;
554 
555 		if ('\0' == *p)
556 			break;
557 
558 		if ('\\' == *p) {
559 			/* Skip over escapes. */
560 			p++;
561 			esc = mandoc_escape
562 				((const char **)&p, NULL, NULL);
563 			if (ESCAPE_ERROR == esc)
564 				break;
565 			continue;
566 		} else if (p == start) {
567 			p++;
568 			continue;
569 		}
570 
571 		l = *(p - 1);
572 		r = *(p + 1);
573 		if ('\\' != l &&
574 				'\t' != r && '\t' != l &&
575 				' ' != r && ' ' != l &&
576 				'-' != r && '-' != l &&
577 				! isdigit((unsigned char)l) &&
578 				! isdigit((unsigned char)r))
579 			*p = ASCII_HYPH;
580 		p++;
581 	}
582 
583 	return(ROFF_CONT);
584 }
585 
586 enum rofferr
587 roff_parseln(struct roff *r, int ln, char **bufp,
588 		size_t *szp, int pos, int *offs)
589 {
590 	enum rofft	 t;
591 	enum rofferr	 e;
592 	int		 ppos, ctl;
593 
594 	/*
595 	 * Run the reserved-word filter only if we have some reserved
596 	 * words to fill in.
597 	 */
598 
599 	roff_res(r, bufp, szp, ln, pos);
600 
601 	ppos = pos;
602 	ctl = mandoc_getcontrol(*bufp, &pos);
603 
604 	/*
605 	 * First, if a scope is open and we're not a macro, pass the
606 	 * text through the macro's filter.  If a scope isn't open and
607 	 * we're not a macro, just let it through.
608 	 * Finally, if there's an equation scope open, divert it into it
609 	 * no matter our state.
610 	 */
611 
612 	if (r->last && ! ctl) {
613 		t = r->last->tok;
614 		assert(roffs[t].text);
615 		e = (*roffs[t].text)
616 			(r, t, bufp, szp, ln, pos, pos, offs);
617 		assert(ROFF_IGN == e || ROFF_CONT == e);
618 		if (ROFF_CONT != e)
619 			return(e);
620 		if (r->eqn)
621 			return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
622 		if (r->tbl)
623 			return(tbl_read(r->tbl, ln, *bufp, pos));
624 		return(roff_parsetext(*bufp + pos));
625 	} else if ( ! ctl) {
626 		if (r->eqn)
627 			return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
628 		if (r->tbl)
629 			return(tbl_read(r->tbl, ln, *bufp, pos));
630 		return(roff_parsetext(*bufp + pos));
631 	} else if (r->eqn)
632 		return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
633 
634 	/*
635 	 * If a scope is open, go to the child handler for that macro,
636 	 * as it may want to preprocess before doing anything with it.
637 	 * Don't do so if an equation is open.
638 	 */
639 
640 	if (r->last) {
641 		t = r->last->tok;
642 		assert(roffs[t].sub);
643 		return((*roffs[t].sub)
644 				(r, t, bufp, szp,
645 				 ln, ppos, pos, offs));
646 	}
647 
648 	/*
649 	 * Lastly, as we've no scope open, try to look up and execute
650 	 * the new macro.  If no macro is found, simply return and let
651 	 * the compilers handle it.
652 	 */
653 
654 	if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
655 		return(ROFF_CONT);
656 
657 	assert(roffs[t].proc);
658 	return((*roffs[t].proc)
659 			(r, t, bufp, szp,
660 			 ln, ppos, pos, offs));
661 }
662 
663 
664 void
665 roff_endparse(struct roff *r)
666 {
667 
668 	if (r->last)
669 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
670 				r->last->line, r->last->col, NULL);
671 
672 	if (r->eqn) {
673 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
674 				r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
675 		eqn_end(&r->eqn);
676 	}
677 
678 	if (r->tbl) {
679 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
680 				r->tbl->line, r->tbl->pos, NULL);
681 		tbl_end(&r->tbl);
682 	}
683 }
684 
685 /*
686  * Parse a roff node's type from the input buffer.  This must be in the
687  * form of ".foo xxx" in the usual way.
688  */
689 static enum rofft
690 roff_parse(struct roff *r, const char *buf, int *pos)
691 {
692 	const char	*mac;
693 	size_t		 maclen;
694 	enum rofft	 t;
695 
696 	if ('\0' == buf[*pos] || '"' == buf[*pos] ||
697 			'\t' == buf[*pos] || ' ' == buf[*pos])
698 		return(ROFF_MAX);
699 
700 	/*
701 	 * We stop the macro parse at an escape, tab, space, or nil.
702 	 * However, `\}' is also a valid macro, so make sure we don't
703 	 * clobber it by seeing the `\' as the end of token.
704 	 */
705 
706 	mac = buf + *pos;
707 	maclen = strcspn(mac + 1, " \\\t\0") + 1;
708 
709 	t = (r->current_string = roff_getstrn(r, mac, maclen))
710 	    ? ROFF_USERDEF : roffhash_find(mac, maclen);
711 
712 	*pos += (int)maclen;
713 
714 	while (buf[*pos] && ' ' == buf[*pos])
715 		(*pos)++;
716 
717 	return(t);
718 }
719 
720 /* ARGSUSED */
721 static enum rofferr
722 roff_cblock(ROFF_ARGS)
723 {
724 
725 	/*
726 	 * A block-close `..' should only be invoked as a child of an
727 	 * ignore macro, otherwise raise a warning and just ignore it.
728 	 */
729 
730 	if (NULL == r->last) {
731 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
732 		return(ROFF_IGN);
733 	}
734 
735 	switch (r->last->tok) {
736 	case (ROFF_am):
737 		/* FALLTHROUGH */
738 	case (ROFF_ami):
739 		/* FALLTHROUGH */
740 	case (ROFF_am1):
741 		/* FALLTHROUGH */
742 	case (ROFF_de):
743 		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
744 		/* FALLTHROUGH */
745 	case (ROFF_dei):
746 		/* FALLTHROUGH */
747 	case (ROFF_ig):
748 		break;
749 	default:
750 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
751 		return(ROFF_IGN);
752 	}
753 
754 	if ((*bufp)[pos])
755 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
756 
757 	roffnode_pop(r);
758 	roffnode_cleanscope(r);
759 	return(ROFF_IGN);
760 
761 }
762 
763 
764 static void
765 roffnode_cleanscope(struct roff *r)
766 {
767 
768 	while (r->last) {
769 		if (--r->last->endspan < 0)
770 			break;
771 		roffnode_pop(r);
772 	}
773 }
774 
775 
776 /* ARGSUSED */
777 static enum rofferr
778 roff_ccond(ROFF_ARGS)
779 {
780 
781 	if (NULL == r->last) {
782 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
783 		return(ROFF_IGN);
784 	}
785 
786 	switch (r->last->tok) {
787 	case (ROFF_el):
788 		/* FALLTHROUGH */
789 	case (ROFF_ie):
790 		/* FALLTHROUGH */
791 	case (ROFF_if):
792 		break;
793 	default:
794 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
795 		return(ROFF_IGN);
796 	}
797 
798 	if (r->last->endspan > -1) {
799 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
800 		return(ROFF_IGN);
801 	}
802 
803 	if ((*bufp)[pos])
804 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
805 
806 	roffnode_pop(r);
807 	roffnode_cleanscope(r);
808 	return(ROFF_IGN);
809 }
810 
811 
812 /* ARGSUSED */
813 static enum rofferr
814 roff_block(ROFF_ARGS)
815 {
816 	int		sv;
817 	size_t		sz;
818 	char		*name;
819 
820 	name = NULL;
821 
822 	if (ROFF_ig != tok) {
823 		if ('\0' == (*bufp)[pos]) {
824 			mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
825 			return(ROFF_IGN);
826 		}
827 
828 		/*
829 		 * Re-write `de1', since we don't really care about
830 		 * groff's strange compatibility mode, into `de'.
831 		 */
832 
833 		if (ROFF_de1 == tok)
834 			tok = ROFF_de;
835 		if (ROFF_de == tok)
836 			name = *bufp + pos;
837 		else
838 			mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
839 			    roffs[tok].name);
840 
841 		while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
842 			pos++;
843 
844 		while (isspace((unsigned char)(*bufp)[pos]))
845 			(*bufp)[pos++] = '\0';
846 	}
847 
848 	roffnode_push(r, tok, name, ln, ppos);
849 
850 	/*
851 	 * At the beginning of a `de' macro, clear the existing string
852 	 * with the same name, if there is one.  New content will be
853 	 * added from roff_block_text() in multiline mode.
854 	 */
855 
856 	if (ROFF_de == tok)
857 		roff_setstr(r, name, "", 0);
858 
859 	if ('\0' == (*bufp)[pos])
860 		return(ROFF_IGN);
861 
862 	/* If present, process the custom end-of-line marker. */
863 
864 	sv = pos;
865 	while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
866 		pos++;
867 
868 	/*
869 	 * Note: groff does NOT like escape characters in the input.
870 	 * Instead of detecting this, we're just going to let it fly and
871 	 * to hell with it.
872 	 */
873 
874 	assert(pos > sv);
875 	sz = (size_t)(pos - sv);
876 
877 	if (1 == sz && '.' == (*bufp)[sv])
878 		return(ROFF_IGN);
879 
880 	r->last->end = mandoc_malloc(sz + 1);
881 
882 	memcpy(r->last->end, *bufp + sv, sz);
883 	r->last->end[(int)sz] = '\0';
884 
885 	if ((*bufp)[pos])
886 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
887 
888 	return(ROFF_IGN);
889 }
890 
891 
892 /* ARGSUSED */
893 static enum rofferr
894 roff_block_sub(ROFF_ARGS)
895 {
896 	enum rofft	t;
897 	int		i, j;
898 
899 	/*
900 	 * First check whether a custom macro exists at this level.  If
901 	 * it does, then check against it.  This is some of groff's
902 	 * stranger behaviours.  If we encountered a custom end-scope
903 	 * tag and that tag also happens to be a "real" macro, then we
904 	 * need to try interpreting it again as a real macro.  If it's
905 	 * not, then return ignore.  Else continue.
906 	 */
907 
908 	if (r->last->end) {
909 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
910 			if ((*bufp)[i] != r->last->end[j])
911 				break;
912 
913 		if ('\0' == r->last->end[j] &&
914 				('\0' == (*bufp)[i] ||
915 				 ' ' == (*bufp)[i] ||
916 				 '\t' == (*bufp)[i])) {
917 			roffnode_pop(r);
918 			roffnode_cleanscope(r);
919 
920 			while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
921 				i++;
922 
923 			pos = i;
924 			if (ROFF_MAX != roff_parse(r, *bufp, &pos))
925 				return(ROFF_RERUN);
926 			return(ROFF_IGN);
927 		}
928 	}
929 
930 	/*
931 	 * If we have no custom end-query or lookup failed, then try
932 	 * pulling it out of the hashtable.
933 	 */
934 
935 	t = roff_parse(r, *bufp, &pos);
936 
937 	/*
938 	 * Macros other than block-end are only significant
939 	 * in `de' blocks; elsewhere, simply throw them away.
940 	 */
941 	if (ROFF_cblock != t) {
942 		if (ROFF_de == tok)
943 			roff_setstr(r, r->last->name, *bufp + ppos, 1);
944 		return(ROFF_IGN);
945 	}
946 
947 	assert(roffs[t].proc);
948 	return((*roffs[t].proc)(r, t, bufp, szp,
949 				ln, ppos, pos, offs));
950 }
951 
952 
953 /* ARGSUSED */
954 static enum rofferr
955 roff_block_text(ROFF_ARGS)
956 {
957 
958 	if (ROFF_de == tok)
959 		roff_setstr(r, r->last->name, *bufp + pos, 1);
960 
961 	return(ROFF_IGN);
962 }
963 
964 
965 /* ARGSUSED */
966 static enum rofferr
967 roff_cond_sub(ROFF_ARGS)
968 {
969 	enum rofft	 t;
970 	enum roffrule	 rr;
971 	char		*ep;
972 
973 	rr = r->last->rule;
974 	roffnode_cleanscope(r);
975 
976 	/*
977 	 * If the macro is unknown, first check if it contains a closing
978 	 * delimiter `\}'.  If it does, close out our scope and return
979 	 * the currently-scoped rule (ignore or continue).  Else, drop
980 	 * into the currently-scoped rule.
981 	 */
982 
983 	if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
984 		ep = &(*bufp)[pos];
985 		for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
986 			ep++;
987 			if ('}' != *ep)
988 				continue;
989 
990 			/*
991 			 * Make the \} go away.
992 			 * This is a little haphazard, as it's not quite
993 			 * clear how nroff does this.
994 			 * If we're at the end of line, then just chop
995 			 * off the \} and resize the buffer.
996 			 * If we aren't, then conver it to spaces.
997 			 */
998 
999 			if ('\0' == *(ep + 1)) {
1000 				*--ep = '\0';
1001 				*szp -= 2;
1002 			} else
1003 				*(ep - 1) = *ep = ' ';
1004 
1005 			roff_ccond(r, ROFF_ccond, bufp, szp,
1006 					ln, pos, pos + 2, offs);
1007 			break;
1008 		}
1009 		return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1010 	}
1011 
1012 	/*
1013 	 * A denied conditional must evaluate its children if and only
1014 	 * if they're either structurally required (such as loops and
1015 	 * conditionals) or a closing macro.
1016 	 */
1017 
1018 	if (ROFFRULE_DENY == rr)
1019 		if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
1020 			if (ROFF_ccond != t)
1021 				return(ROFF_IGN);
1022 
1023 	assert(roffs[t].proc);
1024 	return((*roffs[t].proc)(r, t, bufp, szp,
1025 				ln, ppos, pos, offs));
1026 }
1027 
1028 /* ARGSUSED */
1029 static enum rofferr
1030 roff_cond_text(ROFF_ARGS)
1031 {
1032 	char		*ep;
1033 	enum roffrule	 rr;
1034 
1035 	rr = r->last->rule;
1036 	roffnode_cleanscope(r);
1037 
1038 	ep = &(*bufp)[pos];
1039 	for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1040 		ep++;
1041 		if ('}' != *ep)
1042 			continue;
1043 		*ep = '&';
1044 		roff_ccond(r, ROFF_ccond, bufp, szp,
1045 				ln, pos, pos + 2, offs);
1046 	}
1047 	return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1048 }
1049 
1050 static enum roffrule
1051 roff_evalcond(const char *v, int *pos)
1052 {
1053 
1054 	switch (v[*pos]) {
1055 	case ('n'):
1056 		(*pos)++;
1057 		return(ROFFRULE_ALLOW);
1058 	case ('e'):
1059 		/* FALLTHROUGH */
1060 	case ('o'):
1061 		/* FALLTHROUGH */
1062 	case ('t'):
1063 		(*pos)++;
1064 		return(ROFFRULE_DENY);
1065 	default:
1066 		break;
1067 	}
1068 
1069 	while (v[*pos] && ' ' != v[*pos])
1070 		(*pos)++;
1071 	return(ROFFRULE_DENY);
1072 }
1073 
1074 /* ARGSUSED */
1075 static enum rofferr
1076 roff_line_ignore(ROFF_ARGS)
1077 {
1078 
1079 	if (ROFF_it == tok)
1080 		mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, "it");
1081 
1082 	return(ROFF_IGN);
1083 }
1084 
1085 /* ARGSUSED */
1086 static enum rofferr
1087 roff_cond(ROFF_ARGS)
1088 {
1089 	int		 sv;
1090 	enum roffrule	 rule;
1091 
1092 	/*
1093 	 * An `.el' has no conditional body: it will consume the value
1094 	 * of the current rstack entry set in prior `ie' calls or
1095 	 * defaults to DENY.
1096 	 *
1097 	 * If we're not an `el', however, then evaluate the conditional.
1098 	 */
1099 
1100 	rule = ROFF_el == tok ?
1101 		(r->rstackpos < 0 ?
1102 		 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1103 		roff_evalcond(*bufp, &pos);
1104 
1105 	sv = pos;
1106 	while (' ' == (*bufp)[pos])
1107 		pos++;
1108 
1109 	/*
1110 	 * Roff is weird.  If we have just white-space after the
1111 	 * conditional, it's considered the BODY and we exit without
1112 	 * really doing anything.  Warn about this.  It's probably
1113 	 * wrong.
1114 	 */
1115 
1116 	if ('\0' == (*bufp)[pos] && sv != pos) {
1117 		mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1118 		return(ROFF_IGN);
1119 	}
1120 
1121 	roffnode_push(r, tok, NULL, ln, ppos);
1122 
1123 	r->last->rule = rule;
1124 
1125 	/*
1126 	 * An if-else will put the NEGATION of the current evaluated
1127 	 * conditional into the stack of rules.
1128 	 */
1129 
1130 	if (ROFF_ie == tok) {
1131 		if (r->rstackpos == RSTACK_MAX - 1) {
1132 			mandoc_msg(MANDOCERR_MEM,
1133 				r->parse, ln, ppos, NULL);
1134 			return(ROFF_ERR);
1135 		}
1136 		r->rstack[++r->rstackpos] =
1137 			ROFFRULE_DENY == r->last->rule ?
1138 			ROFFRULE_ALLOW : ROFFRULE_DENY;
1139 	}
1140 
1141 	/* If the parent has false as its rule, then so do we. */
1142 
1143 	if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1144 		r->last->rule = ROFFRULE_DENY;
1145 
1146 	/*
1147 	 * Determine scope.  If we're invoked with "\{" trailing the
1148 	 * conditional, then we're in a multiline scope.  Else our scope
1149 	 * expires on the next line.
1150 	 */
1151 
1152 	r->last->endspan = 1;
1153 
1154 	if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1155 		r->last->endspan = -1;
1156 		pos += 2;
1157 	}
1158 
1159 	/*
1160 	 * If there are no arguments on the line, the next-line scope is
1161 	 * assumed.
1162 	 */
1163 
1164 	if ('\0' == (*bufp)[pos])
1165 		return(ROFF_IGN);
1166 
1167 	/* Otherwise re-run the roff parser after recalculating. */
1168 
1169 	*offs = pos;
1170 	return(ROFF_RERUN);
1171 }
1172 
1173 
1174 /* ARGSUSED */
1175 static enum rofferr
1176 roff_ds(ROFF_ARGS)
1177 {
1178 	char		*name, *string;
1179 
1180 	/*
1181 	 * A symbol is named by the first word following the macro
1182 	 * invocation up to a space.  Its value is anything after the
1183 	 * name's trailing whitespace and optional double-quote.  Thus,
1184 	 *
1185 	 *  [.ds foo "bar  "     ]
1186 	 *
1187 	 * will have `bar  "     ' as its value.
1188 	 */
1189 
1190 	string = *bufp + pos;
1191 	name = roff_getname(r, &string, ln, pos);
1192 	if ('\0' == *name)
1193 		return(ROFF_IGN);
1194 
1195 	/* Read past initial double-quote. */
1196 	if ('"' == *string)
1197 		string++;
1198 
1199 	/* The rest is the value. */
1200 	roff_setstr(r, name, string, 0);
1201 	return(ROFF_IGN);
1202 }
1203 
1204 int
1205 roff_regisset(const struct roff *r, enum regs reg)
1206 {
1207 
1208 	return(r->regs[(int)reg].set);
1209 }
1210 
1211 unsigned int
1212 roff_regget(const struct roff *r, enum regs reg)
1213 {
1214 
1215 	return(r->regs[(int)reg].u);
1216 }
1217 
1218 void
1219 roff_regunset(struct roff *r, enum regs reg)
1220 {
1221 
1222 	r->regs[(int)reg].set = 0;
1223 }
1224 
1225 /* ARGSUSED */
1226 static enum rofferr
1227 roff_nr(ROFF_ARGS)
1228 {
1229 	const char	*key;
1230 	char		*val;
1231 	int		 iv;
1232 
1233 	val = *bufp + pos;
1234 	key = roff_getname(r, &val, ln, pos);
1235 
1236 	if (0 == strcmp(key, "nS")) {
1237 		r->regs[(int)REG_nS].set = 1;
1238 		if ((iv = mandoc_strntoi(val, strlen(val), 10)) >= 0)
1239 			r->regs[(int)REG_nS].u = (unsigned)iv;
1240 		else
1241 			r->regs[(int)REG_nS].u = 0u;
1242 	}
1243 
1244 	return(ROFF_IGN);
1245 }
1246 
1247 /* ARGSUSED */
1248 static enum rofferr
1249 roff_rm(ROFF_ARGS)
1250 {
1251 	const char	 *name;
1252 	char		 *cp;
1253 
1254 	cp = *bufp + pos;
1255 	while ('\0' != *cp) {
1256 		name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1257 		if ('\0' != *name)
1258 			roff_setstr(r, name, NULL, 0);
1259 	}
1260 	return(ROFF_IGN);
1261 }
1262 
1263 /* ARGSUSED */
1264 static enum rofferr
1265 roff_TE(ROFF_ARGS)
1266 {
1267 
1268 	if (NULL == r->tbl)
1269 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1270 	else
1271 		tbl_end(&r->tbl);
1272 
1273 	return(ROFF_IGN);
1274 }
1275 
1276 /* ARGSUSED */
1277 static enum rofferr
1278 roff_T_(ROFF_ARGS)
1279 {
1280 
1281 	if (NULL == r->tbl)
1282 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1283 	else
1284 		tbl_restart(ppos, ln, r->tbl);
1285 
1286 	return(ROFF_IGN);
1287 }
1288 
1289 #if 0
1290 static int
1291 roff_closeeqn(struct roff *r)
1292 {
1293 
1294 	return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1295 }
1296 #endif
1297 
1298 static void
1299 roff_openeqn(struct roff *r, const char *name, int line,
1300 		int offs, const char *buf)
1301 {
1302 	struct eqn_node *e;
1303 	int		 poff;
1304 
1305 	assert(NULL == r->eqn);
1306 	e = eqn_alloc(name, offs, line, r->parse);
1307 
1308 	if (r->last_eqn)
1309 		r->last_eqn->next = e;
1310 	else
1311 		r->first_eqn = r->last_eqn = e;
1312 
1313 	r->eqn = r->last_eqn = e;
1314 
1315 	if (buf) {
1316 		poff = 0;
1317 		eqn_read(&r->eqn, line, buf, offs, &poff);
1318 	}
1319 }
1320 
1321 /* ARGSUSED */
1322 static enum rofferr
1323 roff_EQ(ROFF_ARGS)
1324 {
1325 
1326 	roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1327 	return(ROFF_IGN);
1328 }
1329 
1330 /* ARGSUSED */
1331 static enum rofferr
1332 roff_EN(ROFF_ARGS)
1333 {
1334 
1335 	mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1336 	return(ROFF_IGN);
1337 }
1338 
1339 /* ARGSUSED */
1340 static enum rofferr
1341 roff_TS(ROFF_ARGS)
1342 {
1343 	struct tbl_node	*t;
1344 
1345 	if (r->tbl) {
1346 		mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1347 		tbl_end(&r->tbl);
1348 	}
1349 
1350 	t = tbl_alloc(ppos, ln, r->parse);
1351 
1352 	if (r->last_tbl)
1353 		r->last_tbl->next = t;
1354 	else
1355 		r->first_tbl = r->last_tbl = t;
1356 
1357 	r->tbl = r->last_tbl = t;
1358 	return(ROFF_IGN);
1359 }
1360 
1361 /* ARGSUSED */
1362 static enum rofferr
1363 roff_tr(ROFF_ARGS)
1364 {
1365 	const char	*p, *first, *second;
1366 	size_t		 fsz, ssz;
1367 	enum mandoc_esc	 esc;
1368 
1369 	p = *bufp + pos;
1370 
1371 	if ('\0' == *p) {
1372 		mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1373 		return(ROFF_IGN);
1374 	}
1375 
1376 	while ('\0' != *p) {
1377 		fsz = ssz = 1;
1378 
1379 		first = p++;
1380 		if ('\\' == *first) {
1381 			esc = mandoc_escape(&p, NULL, NULL);
1382 			if (ESCAPE_ERROR == esc) {
1383 				mandoc_msg
1384 					(MANDOCERR_BADESCAPE, r->parse,
1385 					 ln, (int)(p - *bufp), NULL);
1386 				return(ROFF_IGN);
1387 			}
1388 			fsz = (size_t)(p - first);
1389 		}
1390 
1391 		second = p++;
1392 		if ('\\' == *second) {
1393 			esc = mandoc_escape(&p, NULL, NULL);
1394 			if (ESCAPE_ERROR == esc) {
1395 				mandoc_msg
1396 					(MANDOCERR_BADESCAPE, r->parse,
1397 					 ln, (int)(p - *bufp), NULL);
1398 				return(ROFF_IGN);
1399 			}
1400 			ssz = (size_t)(p - second);
1401 		} else if ('\0' == *second) {
1402 			mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1403 					ln, (int)(p - *bufp), NULL);
1404 			second = " ";
1405 			p--;
1406 		}
1407 
1408 		if (fsz > 1) {
1409 			roff_setstrn(&r->xmbtab, first,
1410 					fsz, second, ssz, 0);
1411 			continue;
1412 		}
1413 
1414 		if (NULL == r->xtab)
1415 			r->xtab = mandoc_calloc
1416 				(128, sizeof(struct roffstr));
1417 
1418 		free(r->xtab[(int)*first].p);
1419 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1420 		r->xtab[(int)*first].sz = ssz;
1421 	}
1422 
1423 	return(ROFF_IGN);
1424 }
1425 
1426 /* ARGSUSED */
1427 static enum rofferr
1428 roff_so(ROFF_ARGS)
1429 {
1430 	char *name;
1431 
1432 	mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1433 
1434 	/*
1435 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
1436 	 * opening anything that's not in our cwd or anything beneath
1437 	 * it.  Thus, explicitly disallow traversing up the file-system
1438 	 * or using absolute paths.
1439 	 */
1440 
1441 	name = *bufp + pos;
1442 	if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1443 		mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1444 		return(ROFF_ERR);
1445 	}
1446 
1447 	*offs = pos;
1448 	return(ROFF_SO);
1449 }
1450 
1451 /* ARGSUSED */
1452 static enum rofferr
1453 roff_userdef(ROFF_ARGS)
1454 {
1455 	const char	 *arg[9];
1456 	char		 *cp, *n1, *n2;
1457 	int		  i;
1458 
1459 	/*
1460 	 * Collect pointers to macro argument strings
1461 	 * and null-terminate them.
1462 	 */
1463 	cp = *bufp + pos;
1464 	for (i = 0; i < 9; i++)
1465 		arg[i] = '\0' == *cp ? "" :
1466 		    mandoc_getarg(r->parse, &cp, ln, &pos);
1467 
1468 	/*
1469 	 * Expand macro arguments.
1470 	 */
1471 	*szp = 0;
1472 	n1 = cp = mandoc_strdup(r->current_string);
1473 	while (NULL != (cp = strstr(cp, "\\$"))) {
1474 		i = cp[2] - '1';
1475 		if (0 > i || 8 < i) {
1476 			/* Not an argument invocation. */
1477 			cp += 2;
1478 			continue;
1479 		}
1480 
1481 		*szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1482 		n2 = mandoc_malloc(*szp);
1483 
1484 		strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1485 		strlcat(n2, arg[i], *szp);
1486 		strlcat(n2, cp + 3, *szp);
1487 
1488 		cp = n2 + (cp - n1);
1489 		free(n1);
1490 		n1 = n2;
1491 	}
1492 
1493 	/*
1494 	 * Replace the macro invocation
1495 	 * by the expanded macro.
1496 	 */
1497 	free(*bufp);
1498 	*bufp = n1;
1499 	if (0 == *szp)
1500 		*szp = strlen(*bufp) + 1;
1501 
1502 	return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1503 	   ROFF_REPARSE : ROFF_APPEND);
1504 }
1505 
1506 static char *
1507 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1508 {
1509 	char	 *name, *cp;
1510 
1511 	name = *cpp;
1512 	if ('\0' == *name)
1513 		return(name);
1514 
1515 	/* Read until end of name. */
1516 	for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1517 		if ('\\' != *cp)
1518 			continue;
1519 		cp++;
1520 		if ('\\' == *cp)
1521 			continue;
1522 		mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1523 		*cp = '\0';
1524 		name = cp;
1525 	}
1526 
1527 	/* Nil-terminate name. */
1528 	if ('\0' != *cp)
1529 		*(cp++) = '\0';
1530 
1531 	/* Read past spaces. */
1532 	while (' ' == *cp)
1533 		cp++;
1534 
1535 	*cpp = cp;
1536 	return(name);
1537 }
1538 
1539 /*
1540  * Store *string into the user-defined string called *name.
1541  * In multiline mode, append to an existing entry and append '\n';
1542  * else replace the existing entry, if there is one.
1543  * To clear an existing entry, call with (*r, *name, NULL, 0).
1544  */
1545 static void
1546 roff_setstr(struct roff *r, const char *name, const char *string,
1547 	int multiline)
1548 {
1549 
1550 	roff_setstrn(&r->strtab, name, strlen(name), string,
1551 			string ? strlen(string) : 0, multiline);
1552 }
1553 
1554 static void
1555 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1556 		const char *string, size_t stringsz, int multiline)
1557 {
1558 	struct roffkv	*n;
1559 	char		*c;
1560 	int		 i;
1561 	size_t		 oldch, newch;
1562 
1563 	/* Search for an existing string with the same name. */
1564 	n = *r;
1565 
1566 	while (n && strcmp(name, n->key.p))
1567 		n = n->next;
1568 
1569 	if (NULL == n) {
1570 		/* Create a new string table entry. */
1571 		n = mandoc_malloc(sizeof(struct roffkv));
1572 		n->key.p = mandoc_strndup(name, namesz);
1573 		n->key.sz = namesz;
1574 		n->val.p = NULL;
1575 		n->val.sz = 0;
1576 		n->next = *r;
1577 		*r = n;
1578 	} else if (0 == multiline) {
1579 		/* In multiline mode, append; else replace. */
1580 		free(n->val.p);
1581 		n->val.p = NULL;
1582 		n->val.sz = 0;
1583 	}
1584 
1585 	if (NULL == string)
1586 		return;
1587 
1588 	/*
1589 	 * One additional byte for the '\n' in multiline mode,
1590 	 * and one for the terminating '\0'.
1591 	 */
1592 	newch = stringsz + (multiline ? 2u : 1u);
1593 
1594 	if (NULL == n->val.p) {
1595 		n->val.p = mandoc_malloc(newch);
1596 		*n->val.p = '\0';
1597 		oldch = 0;
1598 	} else {
1599 		oldch = n->val.sz;
1600 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1601 	}
1602 
1603 	/* Skip existing content in the destination buffer. */
1604 	c = n->val.p + (int)oldch;
1605 
1606 	/* Append new content to the destination buffer. */
1607 	i = 0;
1608 	while (i < (int)stringsz) {
1609 		/*
1610 		 * Rudimentary roff copy mode:
1611 		 * Handle escaped backslashes.
1612 		 */
1613 		if ('\\' == string[i] && '\\' == string[i + 1])
1614 			i++;
1615 		*c++ = string[i++];
1616 	}
1617 
1618 	/* Append terminating bytes. */
1619 	if (multiline)
1620 		*c++ = '\n';
1621 
1622 	*c = '\0';
1623 	n->val.sz = (int)(c - n->val.p);
1624 }
1625 
1626 static const char *
1627 roff_getstrn(const struct roff *r, const char *name, size_t len)
1628 {
1629 	const struct roffkv *n;
1630 
1631 	for (n = r->strtab; n; n = n->next)
1632 		if (0 == strncmp(name, n->key.p, len) &&
1633 				'\0' == n->key.p[(int)len])
1634 			return(n->val.p);
1635 
1636 	return(NULL);
1637 }
1638 
1639 static void
1640 roff_freestr(struct roffkv *r)
1641 {
1642 	struct roffkv	 *n, *nn;
1643 
1644 	for (n = r; n; n = nn) {
1645 		free(n->key.p);
1646 		free(n->val.p);
1647 		nn = n->next;
1648 		free(n);
1649 	}
1650 }
1651 
1652 const struct tbl_span *
1653 roff_span(const struct roff *r)
1654 {
1655 
1656 	return(r->tbl ? tbl_span(r->tbl) : NULL);
1657 }
1658 
1659 const struct eqn *
1660 roff_eqn(const struct roff *r)
1661 {
1662 
1663 	return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1664 }
1665 
1666 /*
1667  * Duplicate an input string, making the appropriate character
1668  * conversations (as stipulated by `tr') along the way.
1669  * Returns a heap-allocated string with all the replacements made.
1670  */
1671 char *
1672 roff_strdup(const struct roff *r, const char *p)
1673 {
1674 	const struct roffkv *cp;
1675 	char		*res;
1676 	const char	*pp;
1677 	size_t		 ssz, sz;
1678 	enum mandoc_esc	 esc;
1679 
1680 	if (NULL == r->xmbtab && NULL == r->xtab)
1681 		return(mandoc_strdup(p));
1682 	else if ('\0' == *p)
1683 		return(mandoc_strdup(""));
1684 
1685 	/*
1686 	 * Step through each character looking for term matches
1687 	 * (remember that a `tr' can be invoked with an escape, which is
1688 	 * a glyph but the escape is multi-character).
1689 	 * We only do this if the character hash has been initialised
1690 	 * and the string is >0 length.
1691 	 */
1692 
1693 	res = NULL;
1694 	ssz = 0;
1695 
1696 	while ('\0' != *p) {
1697 		if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
1698 			sz = r->xtab[(int)*p].sz;
1699 			res = mandoc_realloc(res, ssz + sz + 1);
1700 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
1701 			ssz += sz;
1702 			p++;
1703 			continue;
1704 		} else if ('\\' != *p) {
1705 			res = mandoc_realloc(res, ssz + 2);
1706 			res[ssz++] = *p++;
1707 			continue;
1708 		}
1709 
1710 		/* Search for term matches. */
1711 		for (cp = r->xmbtab; cp; cp = cp->next)
1712 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
1713 				break;
1714 
1715 		if (NULL != cp) {
1716 			/*
1717 			 * A match has been found.
1718 			 * Append the match to the array and move
1719 			 * forward by its keysize.
1720 			 */
1721 			res = mandoc_realloc
1722 				(res, ssz + cp->val.sz + 1);
1723 			memcpy(res + ssz, cp->val.p, cp->val.sz);
1724 			ssz += cp->val.sz;
1725 			p += (int)cp->key.sz;
1726 			continue;
1727 		}
1728 
1729 		/*
1730 		 * Handle escapes carefully: we need to copy
1731 		 * over just the escape itself, or else we might
1732 		 * do replacements within the escape itself.
1733 		 * Make sure to pass along the bogus string.
1734 		 */
1735 		pp = p++;
1736 		esc = mandoc_escape(&p, NULL, NULL);
1737 		if (ESCAPE_ERROR == esc) {
1738 			sz = strlen(pp);
1739 			res = mandoc_realloc(res, ssz + sz + 1);
1740 			memcpy(res + ssz, pp, sz);
1741 			break;
1742 		}
1743 		/*
1744 		 * We bail out on bad escapes.
1745 		 * No need to warn: we already did so when
1746 		 * roff_res() was called.
1747 		 */
1748 		sz = (int)(p - pp);
1749 		res = mandoc_realloc(res, ssz + sz + 1);
1750 		memcpy(res + ssz, pp, sz);
1751 		ssz += sz;
1752 	}
1753 
1754 	res[(int)ssz] = '\0';
1755 	return(res);
1756 }
1757