xref: /openbsd-src/usr.bin/mandoc/roff.c (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1 /*	$Id: roff.c,v 1.48 2012/07/07 18:27:36 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2011, 2012 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <assert.h>
19 #include <ctype.h>
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #include "mandoc.h"
24 #include "libroff.h"
25 #include "libmandoc.h"
26 
27 /* Maximum number of nested if-else conditionals. */
28 #define	RSTACK_MAX	128
29 
30 /* Maximum number of string expansions per line, to break infinite loops. */
31 #define	EXPAND_LIMIT	1000
32 
33 enum	rofft {
34 	ROFF_ad,
35 	ROFF_am,
36 	ROFF_ami,
37 	ROFF_am1,
38 	ROFF_cc,
39 	ROFF_de,
40 	ROFF_dei,
41 	ROFF_de1,
42 	ROFF_ds,
43 	ROFF_el,
44 	ROFF_hy,
45 	ROFF_ie,
46 	ROFF_if,
47 	ROFF_ig,
48 	ROFF_it,
49 	ROFF_ne,
50 	ROFF_nh,
51 	ROFF_nr,
52 	ROFF_ns,
53 	ROFF_ps,
54 	ROFF_rm,
55 	ROFF_so,
56 	ROFF_ta,
57 	ROFF_tr,
58 	ROFF_Dd,
59 	ROFF_TH,
60 	ROFF_TS,
61 	ROFF_TE,
62 	ROFF_T_,
63 	ROFF_EQ,
64 	ROFF_EN,
65 	ROFF_cblock,
66 	ROFF_ccond,
67 	ROFF_USERDEF,
68 	ROFF_MAX
69 };
70 
71 enum	roffrule {
72 	ROFFRULE_ALLOW,
73 	ROFFRULE_DENY
74 };
75 
76 /*
77  * A single register entity.  If "set" is zero, the value of the
78  * register should be the default one, which is per-register.
79  * Registers are assumed to be unsigned ints for now.
80  */
81 struct	reg {
82 	int		 set; /* whether set or not */
83 	unsigned int	 u; /* unsigned integer */
84 };
85 
86 /*
87  * An incredibly-simple string buffer.
88  */
89 struct	roffstr {
90 	char		*p; /* nil-terminated buffer */
91 	size_t		 sz; /* saved strlen(p) */
92 };
93 
94 /*
95  * A key-value roffstr pair as part of a singly-linked list.
96  */
97 struct	roffkv {
98 	struct roffstr	 key;
99 	struct roffstr	 val;
100 	struct roffkv	*next; /* next in list */
101 };
102 
103 struct	roff {
104 	enum mparset	 parsetype; /* requested parse type */
105 	struct mparse	*parse; /* parse point */
106 	struct roffnode	*last; /* leaf of stack */
107 	enum roffrule	 rstack[RSTACK_MAX]; /* stack of !`ie' rules */
108 	char		 control; /* control character */
109 	int		 rstackpos; /* position in rstack */
110 	struct reg	 regs[REG__MAX];
111 	struct roffkv	*strtab; /* user-defined strings & macros */
112 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
113 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
114 	const char	*current_string; /* value of last called user macro */
115 	struct tbl_node	*first_tbl; /* first table parsed */
116 	struct tbl_node	*last_tbl; /* last table parsed */
117 	struct tbl_node	*tbl; /* current table being parsed */
118 	struct eqn_node	*last_eqn; /* last equation parsed */
119 	struct eqn_node	*first_eqn; /* first equation parsed */
120 	struct eqn_node	*eqn; /* current equation being parsed */
121 };
122 
123 struct	roffnode {
124 	enum rofft	 tok; /* type of node */
125 	struct roffnode	*parent; /* up one in stack */
126 	int		 line; /* parse line */
127 	int		 col; /* parse col */
128 	char		*name; /* node name, e.g. macro name */
129 	char		*end; /* end-rules: custom token */
130 	int		 endspan; /* end-rules: next-line or infty */
131 	enum roffrule	 rule; /* current evaluation rule */
132 };
133 
134 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
135 			 enum rofft tok, /* tok of macro */ \
136 		 	 char **bufp, /* input buffer */ \
137 			 size_t *szp, /* size of input buffer */ \
138 			 int ln, /* parse line */ \
139 			 int ppos, /* original pos in buffer */ \
140 			 int pos, /* current pos in buffer */ \
141 			 int *offs /* reset offset of buffer data */
142 
143 typedef	enum rofferr (*roffproc)(ROFF_ARGS);
144 
145 struct	roffmac {
146 	const char	*name; /* macro name */
147 	roffproc	 proc; /* process new macro */
148 	roffproc	 text; /* process as child text of macro */
149 	roffproc	 sub; /* process as child of macro */
150 	int		 flags;
151 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
152 	struct roffmac	*next;
153 };
154 
155 struct	predef {
156 	const char	*name; /* predefined input name */
157 	const char	*str; /* replacement symbol */
158 };
159 
160 #define	PREDEF(__name, __str) \
161 	{ (__name), (__str) },
162 
163 static	enum rofft	 roffhash_find(const char *, size_t);
164 static	void		 roffhash_init(void);
165 static	void		 roffnode_cleanscope(struct roff *);
166 static	void		 roffnode_pop(struct roff *);
167 static	void		 roffnode_push(struct roff *, enum rofft,
168 				const char *, int, int);
169 static	enum rofferr	 roff_block(ROFF_ARGS);
170 static	enum rofferr	 roff_block_text(ROFF_ARGS);
171 static	enum rofferr	 roff_block_sub(ROFF_ARGS);
172 static	enum rofferr	 roff_cblock(ROFF_ARGS);
173 static	enum rofferr	 roff_cc(ROFF_ARGS);
174 static	enum rofferr	 roff_ccond(ROFF_ARGS);
175 static	enum rofferr	 roff_cond(ROFF_ARGS);
176 static	enum rofferr	 roff_cond_text(ROFF_ARGS);
177 static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
178 static	enum rofferr	 roff_ds(ROFF_ARGS);
179 static	enum roffrule	 roff_evalcond(const char *, int *);
180 static	void		 roff_free1(struct roff *);
181 static	void		 roff_freestr(struct roffkv *);
182 static	char		*roff_getname(struct roff *, char **, int, int);
183 static	const char	*roff_getstrn(const struct roff *,
184 				const char *, size_t);
185 static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
186 static	enum rofferr	 roff_nr(ROFF_ARGS);
187 static	void		 roff_openeqn(struct roff *, const char *,
188 				int, int, const char *);
189 static	enum rofft	 roff_parse(struct roff *, const char *, int *);
190 static	enum rofferr	 roff_parsetext(char *);
191 static	enum rofferr	 roff_res(struct roff *,
192 				char **, size_t *, int, int);
193 static	enum rofferr	 roff_rm(ROFF_ARGS);
194 static	void		 roff_setstr(struct roff *,
195 				const char *, const char *, int);
196 static	void		 roff_setstrn(struct roffkv **, const char *,
197 				size_t, const char *, size_t, int);
198 static	enum rofferr	 roff_so(ROFF_ARGS);
199 static	enum rofferr	 roff_tr(ROFF_ARGS);
200 static	enum rofferr	 roff_Dd(ROFF_ARGS);
201 static	enum rofferr	 roff_TH(ROFF_ARGS);
202 static	enum rofferr	 roff_TE(ROFF_ARGS);
203 static	enum rofferr	 roff_TS(ROFF_ARGS);
204 static	enum rofferr	 roff_EQ(ROFF_ARGS);
205 static	enum rofferr	 roff_EN(ROFF_ARGS);
206 static	enum rofferr	 roff_T_(ROFF_ARGS);
207 static	enum rofferr	 roff_userdef(ROFF_ARGS);
208 
209 /* See roffhash_find() */
210 
211 #define	ASCII_HI	 126
212 #define	ASCII_LO	 33
213 #define	HASHWIDTH	(ASCII_HI - ASCII_LO + 1)
214 
215 static	struct roffmac	*hash[HASHWIDTH];
216 
217 static	struct roffmac	 roffs[ROFF_MAX] = {
218 	{ "ad", roff_line_ignore, NULL, NULL, 0, NULL },
219 	{ "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
220 	{ "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
221 	{ "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
222 	{ "cc", roff_cc, NULL, NULL, 0, NULL },
223 	{ "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
224 	{ "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
225 	{ "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
226 	{ "ds", roff_ds, NULL, NULL, 0, NULL },
227 	{ "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
228 	{ "hy", roff_line_ignore, NULL, NULL, 0, NULL },
229 	{ "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
230 	{ "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
231 	{ "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
232 	{ "it", roff_line_ignore, NULL, NULL, 0, NULL },
233 	{ "ne", roff_line_ignore, NULL, NULL, 0, NULL },
234 	{ "nh", roff_line_ignore, NULL, NULL, 0, NULL },
235 	{ "nr", roff_nr, NULL, NULL, 0, NULL },
236 	{ "ns", roff_line_ignore, NULL, NULL, 0, NULL },
237 	{ "ps", roff_line_ignore, NULL, NULL, 0, NULL },
238 	{ "rm", roff_rm, NULL, NULL, 0, NULL },
239 	{ "so", roff_so, NULL, NULL, 0, NULL },
240 	{ "ta", roff_line_ignore, NULL, NULL, 0, NULL },
241 	{ "tr", roff_tr, NULL, NULL, 0, NULL },
242 	{ "Dd", roff_Dd, NULL, NULL, 0, NULL },
243 	{ "TH", roff_TH, NULL, NULL, 0, NULL },
244 	{ "TS", roff_TS, NULL, NULL, 0, NULL },
245 	{ "TE", roff_TE, NULL, NULL, 0, NULL },
246 	{ "T&", roff_T_, NULL, NULL, 0, NULL },
247 	{ "EQ", roff_EQ, NULL, NULL, 0, NULL },
248 	{ "EN", roff_EN, NULL, NULL, 0, NULL },
249 	{ ".", roff_cblock, NULL, NULL, 0, NULL },
250 	{ "\\}", roff_ccond, NULL, NULL, 0, NULL },
251 	{ NULL, roff_userdef, NULL, NULL, 0, NULL },
252 };
253 
254 const	char *const __mdoc_reserved[] = {
255 	"Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
256 	"Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
257 	"Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
258 	"Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
259 	"Ds", "Dt", "Dv", "Dx", "D1",
260 	"Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
261 	"En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
262 	"Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
263 	"Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
264 	"Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
265 	"Oc", "Oo", "Op", "Os", "Ot", "Ox",
266 	"Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
267 	"Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
268 	"Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
269 	"Ss", "St", "Sx", "Sy",
270 	"Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
271 	"%A", "%B", "%D", "%I", "%J", "%N", "%O",
272 	"%P", "%Q", "%R", "%T", "%U", "%V",
273 	NULL
274 };
275 
276 const	char *const __man_reserved[] = {
277 	"AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
278 	"EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
279 	"LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
280 	"R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
281 	"TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
282 	NULL
283 };
284 
285 /* Array of injected predefined strings. */
286 #define	PREDEFS_MAX	 38
287 static	const struct predef predefs[PREDEFS_MAX] = {
288 #include "predefs.in"
289 };
290 
291 /* See roffhash_find() */
292 #define	ROFF_HASH(p)	(p[0] - ASCII_LO)
293 
294 static void
295 roffhash_init(void)
296 {
297 	struct roffmac	 *n;
298 	int		  buc, i;
299 
300 	for (i = 0; i < (int)ROFF_USERDEF; i++) {
301 		assert(roffs[i].name[0] >= ASCII_LO);
302 		assert(roffs[i].name[0] <= ASCII_HI);
303 
304 		buc = ROFF_HASH(roffs[i].name);
305 
306 		if (NULL != (n = hash[buc])) {
307 			for ( ; n->next; n = n->next)
308 				/* Do nothing. */ ;
309 			n->next = &roffs[i];
310 		} else
311 			hash[buc] = &roffs[i];
312 	}
313 }
314 
315 /*
316  * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
317  * the nil-terminated string name could be found.
318  */
319 static enum rofft
320 roffhash_find(const char *p, size_t s)
321 {
322 	int		 buc;
323 	struct roffmac	*n;
324 
325 	/*
326 	 * libroff has an extremely simple hashtable, for the time
327 	 * being, which simply keys on the first character, which must
328 	 * be printable, then walks a chain.  It works well enough until
329 	 * optimised.
330 	 */
331 
332 	if (p[0] < ASCII_LO || p[0] > ASCII_HI)
333 		return(ROFF_MAX);
334 
335 	buc = ROFF_HASH(p);
336 
337 	if (NULL == (n = hash[buc]))
338 		return(ROFF_MAX);
339 	for ( ; n; n = n->next)
340 		if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
341 			return((enum rofft)(n - roffs));
342 
343 	return(ROFF_MAX);
344 }
345 
346 
347 /*
348  * Pop the current node off of the stack of roff instructions currently
349  * pending.
350  */
351 static void
352 roffnode_pop(struct roff *r)
353 {
354 	struct roffnode	*p;
355 
356 	assert(r->last);
357 	p = r->last;
358 
359 	r->last = r->last->parent;
360 	free(p->name);
361 	free(p->end);
362 	free(p);
363 }
364 
365 
366 /*
367  * Push a roff node onto the instruction stack.  This must later be
368  * removed with roffnode_pop().
369  */
370 static void
371 roffnode_push(struct roff *r, enum rofft tok, const char *name,
372 		int line, int col)
373 {
374 	struct roffnode	*p;
375 
376 	p = mandoc_calloc(1, sizeof(struct roffnode));
377 	p->tok = tok;
378 	if (name)
379 		p->name = mandoc_strdup(name);
380 	p->parent = r->last;
381 	p->line = line;
382 	p->col = col;
383 	p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
384 
385 	r->last = p;
386 }
387 
388 
389 static void
390 roff_free1(struct roff *r)
391 {
392 	struct tbl_node	*t;
393 	struct eqn_node	*e;
394 	int		 i;
395 
396 	while (NULL != (t = r->first_tbl)) {
397 		r->first_tbl = t->next;
398 		tbl_free(t);
399 	}
400 
401 	r->first_tbl = r->last_tbl = r->tbl = NULL;
402 
403 	while (NULL != (e = r->first_eqn)) {
404 		r->first_eqn = e->next;
405 		eqn_free(e);
406 	}
407 
408 	r->first_eqn = r->last_eqn = r->eqn = NULL;
409 
410 	while (r->last)
411 		roffnode_pop(r);
412 
413 	roff_freestr(r->strtab);
414 	roff_freestr(r->xmbtab);
415 
416 	r->strtab = r->xmbtab = NULL;
417 
418 	if (r->xtab)
419 		for (i = 0; i < 128; i++)
420 			free(r->xtab[i].p);
421 
422 	free(r->xtab);
423 	r->xtab = NULL;
424 }
425 
426 void
427 roff_reset(struct roff *r)
428 {
429 	int		 i;
430 
431 	roff_free1(r);
432 
433 	r->control = 0;
434 	memset(&r->regs, 0, sizeof(struct reg) * REG__MAX);
435 
436 	for (i = 0; i < PREDEFS_MAX; i++)
437 		roff_setstr(r, predefs[i].name, predefs[i].str, 0);
438 }
439 
440 
441 void
442 roff_free(struct roff *r)
443 {
444 
445 	roff_free1(r);
446 	free(r);
447 }
448 
449 
450 struct roff *
451 roff_alloc(enum mparset type, struct mparse *parse)
452 {
453 	struct roff	*r;
454 	int		 i;
455 
456 	r = mandoc_calloc(1, sizeof(struct roff));
457 	r->parsetype = type;
458 	r->parse = parse;
459 	r->rstackpos = -1;
460 
461 	roffhash_init();
462 
463 	for (i = 0; i < PREDEFS_MAX; i++)
464 		roff_setstr(r, predefs[i].name, predefs[i].str, 0);
465 
466 	return(r);
467 }
468 
469 /*
470  * Pre-filter each and every line for reserved words (one beginning with
471  * `\*', e.g., `\*(ab').  These must be handled before the actual line
472  * is processed.
473  * This also checks the syntax of regular escapes.
474  */
475 static enum rofferr
476 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
477 {
478 	enum mandoc_esc	 esc;
479 	const char	*stesc;	/* start of an escape sequence ('\\') */
480 	const char	*stnam;	/* start of the name, after "[(*" */
481 	const char	*cp;	/* end of the name, e.g. before ']' */
482 	const char	*res;	/* the string to be substituted */
483 	int		 i, maxl, expand_count;
484 	size_t		 nsz;
485 	char		*n;
486 
487 	expand_count = 0;
488 
489 again:
490 	cp = *bufp + pos;
491 	while (NULL != (cp = strchr(cp, '\\'))) {
492 		stesc = cp++;
493 
494 		/*
495 		 * The second character must be an asterisk.
496 		 * If it isn't, skip it anyway:  It is escaped,
497 		 * so it can't start another escape sequence.
498 		 */
499 
500 		if ('\0' == *cp)
501 			return(ROFF_CONT);
502 
503 		if ('*' != *cp) {
504 			res = cp;
505 			esc = mandoc_escape(&cp, NULL, NULL);
506 			if (ESCAPE_ERROR != esc)
507 				continue;
508 			cp = res;
509 			mandoc_msg
510 				(MANDOCERR_BADESCAPE, r->parse,
511 				 ln, (int)(stesc - *bufp), NULL);
512 			return(ROFF_CONT);
513 		}
514 
515 		cp++;
516 
517 		/*
518 		 * The third character decides the length
519 		 * of the name of the string.
520 		 * Save a pointer to the name.
521 		 */
522 
523 		switch (*cp) {
524 		case ('\0'):
525 			return(ROFF_CONT);
526 		case ('('):
527 			cp++;
528 			maxl = 2;
529 			break;
530 		case ('['):
531 			cp++;
532 			maxl = 0;
533 			break;
534 		default:
535 			maxl = 1;
536 			break;
537 		}
538 		stnam = cp;
539 
540 		/* Advance to the end of the name. */
541 
542 		for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
543 			if ('\0' == *cp) {
544 				mandoc_msg
545 					(MANDOCERR_BADESCAPE,
546 					 r->parse, ln,
547 					 (int)(stesc - *bufp), NULL);
548 				return(ROFF_CONT);
549 			}
550 			if (0 == maxl && ']' == *cp)
551 				break;
552 		}
553 
554 		/*
555 		 * Retrieve the replacement string; if it is
556 		 * undefined, resume searching for escapes.
557 		 */
558 
559 		res = roff_getstrn(r, stnam, (size_t)i);
560 
561 		if (NULL == res) {
562 			mandoc_msg
563 				(MANDOCERR_BADESCAPE, r->parse,
564 				 ln, (int)(stesc - *bufp), NULL);
565 			res = "";
566 		}
567 
568 		/* Replace the escape sequence by the string. */
569 
570 		pos = stesc - *bufp;
571 
572 		nsz = *szp + strlen(res) + 1;
573 		n = mandoc_malloc(nsz);
574 
575 		strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
576 		strlcat(n, res, nsz);
577 		strlcat(n, cp + (maxl ? 0 : 1), nsz);
578 
579 		free(*bufp);
580 
581 		*bufp = n;
582 		*szp = nsz;
583 
584 		if (EXPAND_LIMIT >= ++expand_count)
585 			goto again;
586 
587 		/* Just leave the string unexpanded. */
588 		mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
589 		return(ROFF_IGN);
590 	}
591 	return(ROFF_CONT);
592 }
593 
594 /*
595  * Process text streams: convert all breakable hyphens into ASCII_HYPH.
596  */
597 static enum rofferr
598 roff_parsetext(char *p)
599 {
600 	size_t		 sz;
601 	const char	*start;
602 	enum mandoc_esc	 esc;
603 
604 	start = p;
605 
606 	while ('\0' != *p) {
607 		sz = strcspn(p, "-\\");
608 		p += sz;
609 
610 		if ('\0' == *p)
611 			break;
612 
613 		if ('\\' == *p) {
614 			/* Skip over escapes. */
615 			p++;
616 			esc = mandoc_escape
617 				((const char **)&p, NULL, NULL);
618 			if (ESCAPE_ERROR == esc)
619 				break;
620 			continue;
621 		} else if (p == start) {
622 			p++;
623 			continue;
624 		}
625 
626 		if (isalpha((unsigned char)p[-1]) &&
627 		    isalpha((unsigned char)p[1]))
628 			*p = ASCII_HYPH;
629 		p++;
630 	}
631 
632 	return(ROFF_CONT);
633 }
634 
635 enum rofferr
636 roff_parseln(struct roff *r, int ln, char **bufp,
637 		size_t *szp, int pos, int *offs)
638 {
639 	enum rofft	 t;
640 	enum rofferr	 e;
641 	int		 ppos, ctl;
642 
643 	/*
644 	 * Run the reserved-word filter only if we have some reserved
645 	 * words to fill in.
646 	 */
647 
648 	e = roff_res(r, bufp, szp, ln, pos);
649 	if (ROFF_IGN == e)
650 		return(e);
651 	assert(ROFF_CONT == e);
652 
653 	ppos = pos;
654 	ctl = roff_getcontrol(r, *bufp, &pos);
655 
656 	/*
657 	 * First, if a scope is open and we're not a macro, pass the
658 	 * text through the macro's filter.  If a scope isn't open and
659 	 * we're not a macro, just let it through.
660 	 * Finally, if there's an equation scope open, divert it into it
661 	 * no matter our state.
662 	 */
663 
664 	if (r->last && ! ctl) {
665 		t = r->last->tok;
666 		assert(roffs[t].text);
667 		e = (*roffs[t].text)
668 			(r, t, bufp, szp, ln, pos, pos, offs);
669 		assert(ROFF_IGN == e || ROFF_CONT == e);
670 		if (ROFF_CONT != e)
671 			return(e);
672 		if (r->eqn)
673 			return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
674 		if (r->tbl)
675 			return(tbl_read(r->tbl, ln, *bufp, pos));
676 		return(roff_parsetext(*bufp + pos));
677 	} else if ( ! ctl) {
678 		if (r->eqn)
679 			return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
680 		if (r->tbl)
681 			return(tbl_read(r->tbl, ln, *bufp, pos));
682 		return(roff_parsetext(*bufp + pos));
683 	} else if (r->eqn)
684 		return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
685 
686 	/*
687 	 * If a scope is open, go to the child handler for that macro,
688 	 * as it may want to preprocess before doing anything with it.
689 	 * Don't do so if an equation is open.
690 	 */
691 
692 	if (r->last) {
693 		t = r->last->tok;
694 		assert(roffs[t].sub);
695 		return((*roffs[t].sub)
696 				(r, t, bufp, szp,
697 				 ln, ppos, pos, offs));
698 	}
699 
700 	/*
701 	 * Lastly, as we've no scope open, try to look up and execute
702 	 * the new macro.  If no macro is found, simply return and let
703 	 * the compilers handle it.
704 	 */
705 
706 	if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
707 		return(ROFF_CONT);
708 
709 	assert(roffs[t].proc);
710 	return((*roffs[t].proc)
711 			(r, t, bufp, szp,
712 			 ln, ppos, pos, offs));
713 }
714 
715 
716 void
717 roff_endparse(struct roff *r)
718 {
719 
720 	if (r->last)
721 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
722 				r->last->line, r->last->col, NULL);
723 
724 	if (r->eqn) {
725 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
726 				r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
727 		eqn_end(&r->eqn);
728 	}
729 
730 	if (r->tbl) {
731 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
732 				r->tbl->line, r->tbl->pos, NULL);
733 		tbl_end(&r->tbl);
734 	}
735 }
736 
737 /*
738  * Parse a roff node's type from the input buffer.  This must be in the
739  * form of ".foo xxx" in the usual way.
740  */
741 static enum rofft
742 roff_parse(struct roff *r, const char *buf, int *pos)
743 {
744 	const char	*mac;
745 	size_t		 maclen;
746 	enum rofft	 t;
747 
748 	if ('\0' == buf[*pos] || '"' == buf[*pos] ||
749 			'\t' == buf[*pos] || ' ' == buf[*pos])
750 		return(ROFF_MAX);
751 
752 	/*
753 	 * We stop the macro parse at an escape, tab, space, or nil.
754 	 * However, `\}' is also a valid macro, so make sure we don't
755 	 * clobber it by seeing the `\' as the end of token.
756 	 */
757 
758 	mac = buf + *pos;
759 	maclen = strcspn(mac + 1, " \\\t\0") + 1;
760 
761 	t = (r->current_string = roff_getstrn(r, mac, maclen))
762 	    ? ROFF_USERDEF : roffhash_find(mac, maclen);
763 
764 	*pos += (int)maclen;
765 
766 	while (buf[*pos] && ' ' == buf[*pos])
767 		(*pos)++;
768 
769 	return(t);
770 }
771 
772 /* ARGSUSED */
773 static enum rofferr
774 roff_cblock(ROFF_ARGS)
775 {
776 
777 	/*
778 	 * A block-close `..' should only be invoked as a child of an
779 	 * ignore macro, otherwise raise a warning and just ignore it.
780 	 */
781 
782 	if (NULL == r->last) {
783 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
784 		return(ROFF_IGN);
785 	}
786 
787 	switch (r->last->tok) {
788 	case (ROFF_am):
789 		/* FALLTHROUGH */
790 	case (ROFF_ami):
791 		/* FALLTHROUGH */
792 	case (ROFF_am1):
793 		/* FALLTHROUGH */
794 	case (ROFF_de):
795 		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
796 		/* FALLTHROUGH */
797 	case (ROFF_dei):
798 		/* FALLTHROUGH */
799 	case (ROFF_ig):
800 		break;
801 	default:
802 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
803 		return(ROFF_IGN);
804 	}
805 
806 	if ((*bufp)[pos])
807 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
808 
809 	roffnode_pop(r);
810 	roffnode_cleanscope(r);
811 	return(ROFF_IGN);
812 
813 }
814 
815 
816 static void
817 roffnode_cleanscope(struct roff *r)
818 {
819 
820 	while (r->last) {
821 		if (--r->last->endspan != 0)
822 			break;
823 		roffnode_pop(r);
824 	}
825 }
826 
827 
828 /* ARGSUSED */
829 static enum rofferr
830 roff_ccond(ROFF_ARGS)
831 {
832 
833 	if (NULL == r->last) {
834 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
835 		return(ROFF_IGN);
836 	}
837 
838 	switch (r->last->tok) {
839 	case (ROFF_el):
840 		/* FALLTHROUGH */
841 	case (ROFF_ie):
842 		/* FALLTHROUGH */
843 	case (ROFF_if):
844 		break;
845 	default:
846 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
847 		return(ROFF_IGN);
848 	}
849 
850 	if (r->last->endspan > -1) {
851 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
852 		return(ROFF_IGN);
853 	}
854 
855 	if ((*bufp)[pos])
856 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
857 
858 	roffnode_pop(r);
859 	roffnode_cleanscope(r);
860 	return(ROFF_IGN);
861 }
862 
863 
864 /* ARGSUSED */
865 static enum rofferr
866 roff_block(ROFF_ARGS)
867 {
868 	int		sv;
869 	size_t		sz;
870 	char		*name;
871 
872 	name = NULL;
873 
874 	if (ROFF_ig != tok) {
875 		if ('\0' == (*bufp)[pos]) {
876 			mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
877 			return(ROFF_IGN);
878 		}
879 
880 		/*
881 		 * Re-write `de1', since we don't really care about
882 		 * groff's strange compatibility mode, into `de'.
883 		 */
884 
885 		if (ROFF_de1 == tok)
886 			tok = ROFF_de;
887 		if (ROFF_de == tok)
888 			name = *bufp + pos;
889 		else
890 			mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
891 			    roffs[tok].name);
892 
893 		while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
894 			pos++;
895 
896 		while (isspace((unsigned char)(*bufp)[pos]))
897 			(*bufp)[pos++] = '\0';
898 	}
899 
900 	roffnode_push(r, tok, name, ln, ppos);
901 
902 	/*
903 	 * At the beginning of a `de' macro, clear the existing string
904 	 * with the same name, if there is one.  New content will be
905 	 * added from roff_block_text() in multiline mode.
906 	 */
907 
908 	if (ROFF_de == tok)
909 		roff_setstr(r, name, "", 0);
910 
911 	if ('\0' == (*bufp)[pos])
912 		return(ROFF_IGN);
913 
914 	/* If present, process the custom end-of-line marker. */
915 
916 	sv = pos;
917 	while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
918 		pos++;
919 
920 	/*
921 	 * Note: groff does NOT like escape characters in the input.
922 	 * Instead of detecting this, we're just going to let it fly and
923 	 * to hell with it.
924 	 */
925 
926 	assert(pos > sv);
927 	sz = (size_t)(pos - sv);
928 
929 	if (1 == sz && '.' == (*bufp)[sv])
930 		return(ROFF_IGN);
931 
932 	r->last->end = mandoc_malloc(sz + 1);
933 
934 	memcpy(r->last->end, *bufp + sv, sz);
935 	r->last->end[(int)sz] = '\0';
936 
937 	if ((*bufp)[pos])
938 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
939 
940 	return(ROFF_IGN);
941 }
942 
943 
944 /* ARGSUSED */
945 static enum rofferr
946 roff_block_sub(ROFF_ARGS)
947 {
948 	enum rofft	t;
949 	int		i, j;
950 
951 	/*
952 	 * First check whether a custom macro exists at this level.  If
953 	 * it does, then check against it.  This is some of groff's
954 	 * stranger behaviours.  If we encountered a custom end-scope
955 	 * tag and that tag also happens to be a "real" macro, then we
956 	 * need to try interpreting it again as a real macro.  If it's
957 	 * not, then return ignore.  Else continue.
958 	 */
959 
960 	if (r->last->end) {
961 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
962 			if ((*bufp)[i] != r->last->end[j])
963 				break;
964 
965 		if ('\0' == r->last->end[j] &&
966 				('\0' == (*bufp)[i] ||
967 				 ' ' == (*bufp)[i] ||
968 				 '\t' == (*bufp)[i])) {
969 			roffnode_pop(r);
970 			roffnode_cleanscope(r);
971 
972 			while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
973 				i++;
974 
975 			pos = i;
976 			if (ROFF_MAX != roff_parse(r, *bufp, &pos))
977 				return(ROFF_RERUN);
978 			return(ROFF_IGN);
979 		}
980 	}
981 
982 	/*
983 	 * If we have no custom end-query or lookup failed, then try
984 	 * pulling it out of the hashtable.
985 	 */
986 
987 	t = roff_parse(r, *bufp, &pos);
988 
989 	/*
990 	 * Macros other than block-end are only significant
991 	 * in `de' blocks; elsewhere, simply throw them away.
992 	 */
993 	if (ROFF_cblock != t) {
994 		if (ROFF_de == tok)
995 			roff_setstr(r, r->last->name, *bufp + ppos, 1);
996 		return(ROFF_IGN);
997 	}
998 
999 	assert(roffs[t].proc);
1000 	return((*roffs[t].proc)(r, t, bufp, szp,
1001 				ln, ppos, pos, offs));
1002 }
1003 
1004 
1005 /* ARGSUSED */
1006 static enum rofferr
1007 roff_block_text(ROFF_ARGS)
1008 {
1009 
1010 	if (ROFF_de == tok)
1011 		roff_setstr(r, r->last->name, *bufp + pos, 1);
1012 
1013 	return(ROFF_IGN);
1014 }
1015 
1016 
1017 /* ARGSUSED */
1018 static enum rofferr
1019 roff_cond_sub(ROFF_ARGS)
1020 {
1021 	enum rofft	 t;
1022 	enum roffrule	 rr;
1023 	char		*ep;
1024 
1025 	rr = r->last->rule;
1026 	roffnode_cleanscope(r);
1027 
1028 	/*
1029 	 * If the macro is unknown, first check if it contains a closing
1030 	 * delimiter `\}'.  If it does, close out our scope and return
1031 	 * the currently-scoped rule (ignore or continue).  Else, drop
1032 	 * into the currently-scoped rule.
1033 	 */
1034 
1035 	if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
1036 		ep = &(*bufp)[pos];
1037 		for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1038 			ep++;
1039 			if ('}' != *ep)
1040 				continue;
1041 
1042 			/*
1043 			 * Make the \} go away.
1044 			 * This is a little haphazard, as it's not quite
1045 			 * clear how nroff does this.
1046 			 * If we're at the end of line, then just chop
1047 			 * off the \} and resize the buffer.
1048 			 * If we aren't, then conver it to spaces.
1049 			 */
1050 
1051 			if ('\0' == *(ep + 1)) {
1052 				*--ep = '\0';
1053 				*szp -= 2;
1054 			} else
1055 				*(ep - 1) = *ep = ' ';
1056 
1057 			roff_ccond(r, ROFF_ccond, bufp, szp,
1058 					ln, pos, pos + 2, offs);
1059 			break;
1060 		}
1061 		return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1062 	}
1063 
1064 	/*
1065 	 * A denied conditional must evaluate its children if and only
1066 	 * if they're either structurally required (such as loops and
1067 	 * conditionals) or a closing macro.
1068 	 */
1069 
1070 	if (ROFFRULE_DENY == rr)
1071 		if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
1072 			if (ROFF_ccond != t)
1073 				return(ROFF_IGN);
1074 
1075 	assert(roffs[t].proc);
1076 	return((*roffs[t].proc)(r, t, bufp, szp,
1077 				ln, ppos, pos, offs));
1078 }
1079 
1080 /* ARGSUSED */
1081 static enum rofferr
1082 roff_cond_text(ROFF_ARGS)
1083 {
1084 	char		*ep;
1085 	enum roffrule	 rr;
1086 
1087 	rr = r->last->rule;
1088 	roffnode_cleanscope(r);
1089 
1090 	ep = &(*bufp)[pos];
1091 	for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1092 		ep++;
1093 		if ('}' != *ep)
1094 			continue;
1095 		*ep = '&';
1096 		roff_ccond(r, ROFF_ccond, bufp, szp,
1097 				ln, pos, pos + 2, offs);
1098 	}
1099 	return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1100 }
1101 
1102 static enum roffrule
1103 roff_evalcond(const char *v, int *pos)
1104 {
1105 
1106 	switch (v[*pos]) {
1107 	case ('n'):
1108 		(*pos)++;
1109 		return(ROFFRULE_ALLOW);
1110 	case ('e'):
1111 		/* FALLTHROUGH */
1112 	case ('o'):
1113 		/* FALLTHROUGH */
1114 	case ('t'):
1115 		(*pos)++;
1116 		return(ROFFRULE_DENY);
1117 	default:
1118 		break;
1119 	}
1120 
1121 	while (v[*pos] && ' ' != v[*pos])
1122 		(*pos)++;
1123 	return(ROFFRULE_DENY);
1124 }
1125 
1126 /* ARGSUSED */
1127 static enum rofferr
1128 roff_line_ignore(ROFF_ARGS)
1129 {
1130 
1131 	if (ROFF_it == tok)
1132 		mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, "it");
1133 
1134 	return(ROFF_IGN);
1135 }
1136 
1137 /* ARGSUSED */
1138 static enum rofferr
1139 roff_cond(ROFF_ARGS)
1140 {
1141 
1142 	roffnode_push(r, tok, NULL, ln, ppos);
1143 
1144 	/*
1145 	 * An `.el' has no conditional body: it will consume the value
1146 	 * of the current rstack entry set in prior `ie' calls or
1147 	 * defaults to DENY.
1148 	 *
1149 	 * If we're not an `el', however, then evaluate the conditional.
1150 	 */
1151 
1152 	r->last->rule = ROFF_el == tok ?
1153 		(r->rstackpos < 0 ?
1154 		 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1155 		roff_evalcond(*bufp, &pos);
1156 
1157 	/*
1158 	 * An if-else will put the NEGATION of the current evaluated
1159 	 * conditional into the stack of rules.
1160 	 */
1161 
1162 	if (ROFF_ie == tok) {
1163 		if (r->rstackpos == RSTACK_MAX - 1) {
1164 			mandoc_msg(MANDOCERR_MEM,
1165 				r->parse, ln, ppos, NULL);
1166 			return(ROFF_ERR);
1167 		}
1168 		r->rstack[++r->rstackpos] =
1169 			ROFFRULE_DENY == r->last->rule ?
1170 			ROFFRULE_ALLOW : ROFFRULE_DENY;
1171 	}
1172 
1173 	/* If the parent has false as its rule, then so do we. */
1174 
1175 	if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1176 		r->last->rule = ROFFRULE_DENY;
1177 
1178 	/*
1179 	 * Determine scope.
1180 	 * If there is nothing on the line after the conditional,
1181 	 * not even whitespace, use next-line scope.
1182 	 */
1183 
1184 	if ('\0' == (*bufp)[pos]) {
1185 		r->last->endspan = 2;
1186 		goto out;
1187 	}
1188 
1189 	while (' ' == (*bufp)[pos])
1190 		pos++;
1191 
1192 	/* An opening brace requests multiline scope. */
1193 
1194 	if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1195 		r->last->endspan = -1;
1196 		pos += 2;
1197 		goto out;
1198 	}
1199 
1200 	/*
1201 	 * Anything else following the conditional causes
1202 	 * single-line scope.  Warn if the scope contains
1203 	 * nothing but trailing whitespace.
1204 	 */
1205 
1206 	if ('\0' == (*bufp)[pos])
1207 		mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1208 
1209 	r->last->endspan = 1;
1210 
1211 out:
1212 	*offs = pos;
1213 	return(ROFF_RERUN);
1214 }
1215 
1216 
1217 /* ARGSUSED */
1218 static enum rofferr
1219 roff_ds(ROFF_ARGS)
1220 {
1221 	char		*name, *string;
1222 
1223 	/*
1224 	 * A symbol is named by the first word following the macro
1225 	 * invocation up to a space.  Its value is anything after the
1226 	 * name's trailing whitespace and optional double-quote.  Thus,
1227 	 *
1228 	 *  [.ds foo "bar  "     ]
1229 	 *
1230 	 * will have `bar  "     ' as its value.
1231 	 */
1232 
1233 	string = *bufp + pos;
1234 	name = roff_getname(r, &string, ln, pos);
1235 	if ('\0' == *name)
1236 		return(ROFF_IGN);
1237 
1238 	/* Read past initial double-quote. */
1239 	if ('"' == *string)
1240 		string++;
1241 
1242 	/* The rest is the value. */
1243 	roff_setstr(r, name, string, 0);
1244 	return(ROFF_IGN);
1245 }
1246 
1247 int
1248 roff_regisset(const struct roff *r, enum regs reg)
1249 {
1250 
1251 	return(r->regs[(int)reg].set);
1252 }
1253 
1254 unsigned int
1255 roff_regget(const struct roff *r, enum regs reg)
1256 {
1257 
1258 	return(r->regs[(int)reg].u);
1259 }
1260 
1261 void
1262 roff_regunset(struct roff *r, enum regs reg)
1263 {
1264 
1265 	r->regs[(int)reg].set = 0;
1266 }
1267 
1268 /* ARGSUSED */
1269 static enum rofferr
1270 roff_nr(ROFF_ARGS)
1271 {
1272 	const char	*key;
1273 	char		*val;
1274 	int		 iv;
1275 
1276 	val = *bufp + pos;
1277 	key = roff_getname(r, &val, ln, pos);
1278 
1279 	if (0 == strcmp(key, "nS")) {
1280 		r->regs[(int)REG_nS].set = 1;
1281 		if ((iv = mandoc_strntoi(val, strlen(val), 10)) >= 0)
1282 			r->regs[(int)REG_nS].u = (unsigned)iv;
1283 		else
1284 			r->regs[(int)REG_nS].u = 0u;
1285 	}
1286 
1287 	return(ROFF_IGN);
1288 }
1289 
1290 /* ARGSUSED */
1291 static enum rofferr
1292 roff_rm(ROFF_ARGS)
1293 {
1294 	const char	 *name;
1295 	char		 *cp;
1296 
1297 	cp = *bufp + pos;
1298 	while ('\0' != *cp) {
1299 		name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1300 		if ('\0' != *name)
1301 			roff_setstr(r, name, NULL, 0);
1302 	}
1303 	return(ROFF_IGN);
1304 }
1305 
1306 /* ARGSUSED */
1307 static enum rofferr
1308 roff_Dd(ROFF_ARGS)
1309 {
1310 	const char *const	*cp;
1311 
1312 	if (MPARSE_MDOC != r->parsetype)
1313 		for (cp = __mdoc_reserved; *cp; cp++)
1314 			roff_setstr(r, *cp, NULL, 0);
1315 
1316 	return(ROFF_CONT);
1317 }
1318 
1319 /* ARGSUSED */
1320 static enum rofferr
1321 roff_TH(ROFF_ARGS)
1322 {
1323 	const char *const	*cp;
1324 
1325 	if (MPARSE_MDOC != r->parsetype)
1326 		for (cp = __man_reserved; *cp; cp++)
1327 			roff_setstr(r, *cp, NULL, 0);
1328 
1329 	return(ROFF_CONT);
1330 }
1331 
1332 /* ARGSUSED */
1333 static enum rofferr
1334 roff_TE(ROFF_ARGS)
1335 {
1336 
1337 	if (NULL == r->tbl)
1338 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1339 	else
1340 		tbl_end(&r->tbl);
1341 
1342 	return(ROFF_IGN);
1343 }
1344 
1345 /* ARGSUSED */
1346 static enum rofferr
1347 roff_T_(ROFF_ARGS)
1348 {
1349 
1350 	if (NULL == r->tbl)
1351 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1352 	else
1353 		tbl_restart(ppos, ln, r->tbl);
1354 
1355 	return(ROFF_IGN);
1356 }
1357 
1358 #if 0
1359 static int
1360 roff_closeeqn(struct roff *r)
1361 {
1362 
1363 	return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1364 }
1365 #endif
1366 
1367 static void
1368 roff_openeqn(struct roff *r, const char *name, int line,
1369 		int offs, const char *buf)
1370 {
1371 	struct eqn_node *e;
1372 	int		 poff;
1373 
1374 	assert(NULL == r->eqn);
1375 	e = eqn_alloc(name, offs, line, r->parse);
1376 
1377 	if (r->last_eqn)
1378 		r->last_eqn->next = e;
1379 	else
1380 		r->first_eqn = r->last_eqn = e;
1381 
1382 	r->eqn = r->last_eqn = e;
1383 
1384 	if (buf) {
1385 		poff = 0;
1386 		eqn_read(&r->eqn, line, buf, offs, &poff);
1387 	}
1388 }
1389 
1390 /* ARGSUSED */
1391 static enum rofferr
1392 roff_EQ(ROFF_ARGS)
1393 {
1394 
1395 	roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1396 	return(ROFF_IGN);
1397 }
1398 
1399 /* ARGSUSED */
1400 static enum rofferr
1401 roff_EN(ROFF_ARGS)
1402 {
1403 
1404 	mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1405 	return(ROFF_IGN);
1406 }
1407 
1408 /* ARGSUSED */
1409 static enum rofferr
1410 roff_TS(ROFF_ARGS)
1411 {
1412 	struct tbl_node	*t;
1413 
1414 	if (r->tbl) {
1415 		mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1416 		tbl_end(&r->tbl);
1417 	}
1418 
1419 	t = tbl_alloc(ppos, ln, r->parse);
1420 
1421 	if (r->last_tbl)
1422 		r->last_tbl->next = t;
1423 	else
1424 		r->first_tbl = r->last_tbl = t;
1425 
1426 	r->tbl = r->last_tbl = t;
1427 	return(ROFF_IGN);
1428 }
1429 
1430 /* ARGSUSED */
1431 static enum rofferr
1432 roff_cc(ROFF_ARGS)
1433 {
1434 	const char	*p;
1435 
1436 	p = *bufp + pos;
1437 
1438 	if ('\0' == *p || '.' == (r->control = *p++))
1439 		r->control = 0;
1440 
1441 	if ('\0' != *p)
1442 		mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1443 
1444 	return(ROFF_IGN);
1445 }
1446 
1447 /* ARGSUSED */
1448 static enum rofferr
1449 roff_tr(ROFF_ARGS)
1450 {
1451 	const char	*p, *first, *second;
1452 	size_t		 fsz, ssz;
1453 	enum mandoc_esc	 esc;
1454 
1455 	p = *bufp + pos;
1456 
1457 	if ('\0' == *p) {
1458 		mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1459 		return(ROFF_IGN);
1460 	}
1461 
1462 	while ('\0' != *p) {
1463 		fsz = ssz = 1;
1464 
1465 		first = p++;
1466 		if ('\\' == *first) {
1467 			esc = mandoc_escape(&p, NULL, NULL);
1468 			if (ESCAPE_ERROR == esc) {
1469 				mandoc_msg
1470 					(MANDOCERR_BADESCAPE, r->parse,
1471 					 ln, (int)(p - *bufp), NULL);
1472 				return(ROFF_IGN);
1473 			}
1474 			fsz = (size_t)(p - first);
1475 		}
1476 
1477 		second = p++;
1478 		if ('\\' == *second) {
1479 			esc = mandoc_escape(&p, NULL, NULL);
1480 			if (ESCAPE_ERROR == esc) {
1481 				mandoc_msg
1482 					(MANDOCERR_BADESCAPE, r->parse,
1483 					 ln, (int)(p - *bufp), NULL);
1484 				return(ROFF_IGN);
1485 			}
1486 			ssz = (size_t)(p - second);
1487 		} else if ('\0' == *second) {
1488 			mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1489 					ln, (int)(p - *bufp), NULL);
1490 			second = " ";
1491 			p--;
1492 		}
1493 
1494 		if (fsz > 1) {
1495 			roff_setstrn(&r->xmbtab, first,
1496 					fsz, second, ssz, 0);
1497 			continue;
1498 		}
1499 
1500 		if (NULL == r->xtab)
1501 			r->xtab = mandoc_calloc
1502 				(128, sizeof(struct roffstr));
1503 
1504 		free(r->xtab[(int)*first].p);
1505 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1506 		r->xtab[(int)*first].sz = ssz;
1507 	}
1508 
1509 	return(ROFF_IGN);
1510 }
1511 
1512 /* ARGSUSED */
1513 static enum rofferr
1514 roff_so(ROFF_ARGS)
1515 {
1516 	char *name;
1517 
1518 	mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1519 
1520 	/*
1521 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
1522 	 * opening anything that's not in our cwd or anything beneath
1523 	 * it.  Thus, explicitly disallow traversing up the file-system
1524 	 * or using absolute paths.
1525 	 */
1526 
1527 	name = *bufp + pos;
1528 	if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1529 		mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1530 		return(ROFF_ERR);
1531 	}
1532 
1533 	*offs = pos;
1534 	return(ROFF_SO);
1535 }
1536 
1537 /* ARGSUSED */
1538 static enum rofferr
1539 roff_userdef(ROFF_ARGS)
1540 {
1541 	const char	 *arg[9];
1542 	char		 *cp, *n1, *n2;
1543 	int		  i;
1544 
1545 	/*
1546 	 * Collect pointers to macro argument strings
1547 	 * and null-terminate them.
1548 	 */
1549 	cp = *bufp + pos;
1550 	for (i = 0; i < 9; i++)
1551 		arg[i] = '\0' == *cp ? "" :
1552 		    mandoc_getarg(r->parse, &cp, ln, &pos);
1553 
1554 	/*
1555 	 * Expand macro arguments.
1556 	 */
1557 	*szp = 0;
1558 	n1 = cp = mandoc_strdup(r->current_string);
1559 	while (NULL != (cp = strstr(cp, "\\$"))) {
1560 		i = cp[2] - '1';
1561 		if (0 > i || 8 < i) {
1562 			/* Not an argument invocation. */
1563 			cp += 2;
1564 			continue;
1565 		}
1566 
1567 		*szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1568 		n2 = mandoc_malloc(*szp);
1569 
1570 		strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1571 		strlcat(n2, arg[i], *szp);
1572 		strlcat(n2, cp + 3, *szp);
1573 
1574 		cp = n2 + (cp - n1);
1575 		free(n1);
1576 		n1 = n2;
1577 	}
1578 
1579 	/*
1580 	 * Replace the macro invocation
1581 	 * by the expanded macro.
1582 	 */
1583 	free(*bufp);
1584 	*bufp = n1;
1585 	if (0 == *szp)
1586 		*szp = strlen(*bufp) + 1;
1587 
1588 	return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1589 	   ROFF_REPARSE : ROFF_APPEND);
1590 }
1591 
1592 static char *
1593 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1594 {
1595 	char	 *name, *cp;
1596 
1597 	name = *cpp;
1598 	if ('\0' == *name)
1599 		return(name);
1600 
1601 	/* Read until end of name. */
1602 	for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1603 		if ('\\' != *cp)
1604 			continue;
1605 		cp++;
1606 		if ('\\' == *cp)
1607 			continue;
1608 		mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1609 		*cp = '\0';
1610 		name = cp;
1611 	}
1612 
1613 	/* Nil-terminate name. */
1614 	if ('\0' != *cp)
1615 		*(cp++) = '\0';
1616 
1617 	/* Read past spaces. */
1618 	while (' ' == *cp)
1619 		cp++;
1620 
1621 	*cpp = cp;
1622 	return(name);
1623 }
1624 
1625 /*
1626  * Store *string into the user-defined string called *name.
1627  * In multiline mode, append to an existing entry and append '\n';
1628  * else replace the existing entry, if there is one.
1629  * To clear an existing entry, call with (*r, *name, NULL, 0).
1630  */
1631 static void
1632 roff_setstr(struct roff *r, const char *name, const char *string,
1633 	int multiline)
1634 {
1635 
1636 	roff_setstrn(&r->strtab, name, strlen(name), string,
1637 			string ? strlen(string) : 0, multiline);
1638 }
1639 
1640 static void
1641 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1642 		const char *string, size_t stringsz, int multiline)
1643 {
1644 	struct roffkv	*n;
1645 	char		*c;
1646 	int		 i;
1647 	size_t		 oldch, newch;
1648 
1649 	/* Search for an existing string with the same name. */
1650 	n = *r;
1651 
1652 	while (n && strcmp(name, n->key.p))
1653 		n = n->next;
1654 
1655 	if (NULL == n) {
1656 		/* Create a new string table entry. */
1657 		n = mandoc_malloc(sizeof(struct roffkv));
1658 		n->key.p = mandoc_strndup(name, namesz);
1659 		n->key.sz = namesz;
1660 		n->val.p = NULL;
1661 		n->val.sz = 0;
1662 		n->next = *r;
1663 		*r = n;
1664 	} else if (0 == multiline) {
1665 		/* In multiline mode, append; else replace. */
1666 		free(n->val.p);
1667 		n->val.p = NULL;
1668 		n->val.sz = 0;
1669 	}
1670 
1671 	if (NULL == string)
1672 		return;
1673 
1674 	/*
1675 	 * One additional byte for the '\n' in multiline mode,
1676 	 * and one for the terminating '\0'.
1677 	 */
1678 	newch = stringsz + (multiline ? 2u : 1u);
1679 
1680 	if (NULL == n->val.p) {
1681 		n->val.p = mandoc_malloc(newch);
1682 		*n->val.p = '\0';
1683 		oldch = 0;
1684 	} else {
1685 		oldch = n->val.sz;
1686 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1687 	}
1688 
1689 	/* Skip existing content in the destination buffer. */
1690 	c = n->val.p + (int)oldch;
1691 
1692 	/* Append new content to the destination buffer. */
1693 	i = 0;
1694 	while (i < (int)stringsz) {
1695 		/*
1696 		 * Rudimentary roff copy mode:
1697 		 * Handle escaped backslashes.
1698 		 */
1699 		if ('\\' == string[i] && '\\' == string[i + 1])
1700 			i++;
1701 		*c++ = string[i++];
1702 	}
1703 
1704 	/* Append terminating bytes. */
1705 	if (multiline)
1706 		*c++ = '\n';
1707 
1708 	*c = '\0';
1709 	n->val.sz = (int)(c - n->val.p);
1710 }
1711 
1712 static const char *
1713 roff_getstrn(const struct roff *r, const char *name, size_t len)
1714 {
1715 	const struct roffkv *n;
1716 
1717 	for (n = r->strtab; n; n = n->next)
1718 		if (0 == strncmp(name, n->key.p, len) &&
1719 				'\0' == n->key.p[(int)len])
1720 			return(n->val.p);
1721 
1722 	return(NULL);
1723 }
1724 
1725 static void
1726 roff_freestr(struct roffkv *r)
1727 {
1728 	struct roffkv	 *n, *nn;
1729 
1730 	for (n = r; n; n = nn) {
1731 		free(n->key.p);
1732 		free(n->val.p);
1733 		nn = n->next;
1734 		free(n);
1735 	}
1736 }
1737 
1738 const struct tbl_span *
1739 roff_span(const struct roff *r)
1740 {
1741 
1742 	return(r->tbl ? tbl_span(r->tbl) : NULL);
1743 }
1744 
1745 const struct eqn *
1746 roff_eqn(const struct roff *r)
1747 {
1748 
1749 	return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1750 }
1751 
1752 /*
1753  * Duplicate an input string, making the appropriate character
1754  * conversations (as stipulated by `tr') along the way.
1755  * Returns a heap-allocated string with all the replacements made.
1756  */
1757 char *
1758 roff_strdup(const struct roff *r, const char *p)
1759 {
1760 	const struct roffkv *cp;
1761 	char		*res;
1762 	const char	*pp;
1763 	size_t		 ssz, sz;
1764 	enum mandoc_esc	 esc;
1765 
1766 	if (NULL == r->xmbtab && NULL == r->xtab)
1767 		return(mandoc_strdup(p));
1768 	else if ('\0' == *p)
1769 		return(mandoc_strdup(""));
1770 
1771 	/*
1772 	 * Step through each character looking for term matches
1773 	 * (remember that a `tr' can be invoked with an escape, which is
1774 	 * a glyph but the escape is multi-character).
1775 	 * We only do this if the character hash has been initialised
1776 	 * and the string is >0 length.
1777 	 */
1778 
1779 	res = NULL;
1780 	ssz = 0;
1781 
1782 	while ('\0' != *p) {
1783 		if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
1784 			sz = r->xtab[(int)*p].sz;
1785 			res = mandoc_realloc(res, ssz + sz + 1);
1786 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
1787 			ssz += sz;
1788 			p++;
1789 			continue;
1790 		} else if ('\\' != *p) {
1791 			res = mandoc_realloc(res, ssz + 2);
1792 			res[ssz++] = *p++;
1793 			continue;
1794 		}
1795 
1796 		/* Search for term matches. */
1797 		for (cp = r->xmbtab; cp; cp = cp->next)
1798 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
1799 				break;
1800 
1801 		if (NULL != cp) {
1802 			/*
1803 			 * A match has been found.
1804 			 * Append the match to the array and move
1805 			 * forward by its keysize.
1806 			 */
1807 			res = mandoc_realloc
1808 				(res, ssz + cp->val.sz + 1);
1809 			memcpy(res + ssz, cp->val.p, cp->val.sz);
1810 			ssz += cp->val.sz;
1811 			p += (int)cp->key.sz;
1812 			continue;
1813 		}
1814 
1815 		/*
1816 		 * Handle escapes carefully: we need to copy
1817 		 * over just the escape itself, or else we might
1818 		 * do replacements within the escape itself.
1819 		 * Make sure to pass along the bogus string.
1820 		 */
1821 		pp = p++;
1822 		esc = mandoc_escape(&p, NULL, NULL);
1823 		if (ESCAPE_ERROR == esc) {
1824 			sz = strlen(pp);
1825 			res = mandoc_realloc(res, ssz + sz + 1);
1826 			memcpy(res + ssz, pp, sz);
1827 			break;
1828 		}
1829 		/*
1830 		 * We bail out on bad escapes.
1831 		 * No need to warn: we already did so when
1832 		 * roff_res() was called.
1833 		 */
1834 		sz = (int)(p - pp);
1835 		res = mandoc_realloc(res, ssz + sz + 1);
1836 		memcpy(res + ssz, pp, sz);
1837 		ssz += sz;
1838 	}
1839 
1840 	res[(int)ssz] = '\0';
1841 	return(res);
1842 }
1843 
1844 /*
1845  * Find out whether a line is a macro line or not.
1846  * If it is, adjust the current position and return one; if it isn't,
1847  * return zero and don't change the current position.
1848  * If the control character has been set with `.cc', then let that grain
1849  * precedence.
1850  * This is slighly contrary to groff, where using the non-breaking
1851  * control character when `cc' has been invoked will cause the
1852  * non-breaking macro contents to be printed verbatim.
1853  */
1854 int
1855 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
1856 {
1857 	int		pos;
1858 
1859 	pos = *ppos;
1860 
1861 	if (0 != r->control && cp[pos] == r->control)
1862 		pos++;
1863 	else if (0 != r->control)
1864 		return(0);
1865 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
1866 		pos += 2;
1867 	else if ('.' == cp[pos] || '\'' == cp[pos])
1868 		pos++;
1869 	else
1870 		return(0);
1871 
1872 	while (' ' == cp[pos] || '\t' == cp[pos])
1873 		pos++;
1874 
1875 	*ppos = pos;
1876 	return(1);
1877 }
1878