xref: /openbsd-src/usr.bin/mandoc/roff.c (revision 42ac1f71ddfc8f2b1ea1555399aa1e1ffc2faced)
1 /* $OpenBSD: roff.c,v 1.259 2022/05/01 16:18:59 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Implementation of the roff(7) parser for mandoc(1).
19  */
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mandoc_parse.h"
36 #include "libmandoc.h"
37 #include "roff_int.h"
38 #include "tbl_parse.h"
39 #include "eqn_parse.h"
40 
41 /*
42  * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
43  * that an escape sequence resulted from copy-in processing and
44  * needs to be checked or interpolated.  As it is used nowhere
45  * else, it is defined here rather than in a header file.
46  */
47 #define	ASCII_ESC	27
48 
49 /* Maximum number of string expansions per line, to break infinite loops. */
50 #define	EXPAND_LIMIT	1000
51 
52 /* Types of definitions of macros and strings. */
53 #define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
54 #define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
55 #define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
56 #define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
57 #define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
58 			 ROFFDEF_REN | ROFFDEF_STD)
59 #define	ROFFDEF_UNDEF	(1 << 5)  /* Completely undefined. */
60 
61 /* --- data types --------------------------------------------------------- */
62 
63 /*
64  * An incredibly-simple string buffer.
65  */
66 struct	roffstr {
67 	char		*p; /* nil-terminated buffer */
68 	size_t		 sz; /* saved strlen(p) */
69 };
70 
71 /*
72  * A key-value roffstr pair as part of a singly-linked list.
73  */
74 struct	roffkv {
75 	struct roffstr	 key;
76 	struct roffstr	 val;
77 	struct roffkv	*next; /* next in list */
78 };
79 
80 /*
81  * A single number register as part of a singly-linked list.
82  */
83 struct	roffreg {
84 	struct roffstr	 key;
85 	int		 val;
86 	int		 step;
87 	struct roffreg	*next;
88 };
89 
90 /*
91  * Association of request and macro names with token IDs.
92  */
93 struct	roffreq {
94 	enum roff_tok	 tok;
95 	char		 name[];
96 };
97 
98 /*
99  * A macro processing context.
100  * More than one is needed when macro calls are nested.
101  */
102 struct	mctx {
103 	char		**argv;
104 	int		 argc;
105 	int		 argsz;
106 };
107 
108 struct	roff {
109 	struct roff_man	*man; /* mdoc or man parser */
110 	struct roffnode	*last; /* leaf of stack */
111 	struct mctx	*mstack; /* stack of macro contexts */
112 	int		*rstack; /* stack of inverted `ie' values */
113 	struct ohash	*reqtab; /* request lookup table */
114 	struct roffreg	*regtab; /* number registers */
115 	struct roffkv	*strtab; /* user-defined strings & macros */
116 	struct roffkv	*rentab; /* renamed strings & macros */
117 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
118 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
119 	const char	*current_string; /* value of last called user macro */
120 	struct tbl_node	*first_tbl; /* first table parsed */
121 	struct tbl_node	*last_tbl; /* last table parsed */
122 	struct tbl_node	*tbl; /* current table being parsed */
123 	struct eqn_node	*last_eqn; /* equation parser */
124 	struct eqn_node	*eqn; /* active equation parser */
125 	int		 eqn_inline; /* current equation is inline */
126 	int		 options; /* parse options */
127 	int		 mstacksz; /* current size of mstack */
128 	int		 mstackpos; /* position in mstack */
129 	int		 rstacksz; /* current size limit of rstack */
130 	int		 rstackpos; /* position in rstack */
131 	int		 format; /* current file in mdoc or man format */
132 	char		 control; /* control character */
133 	char		 escape; /* escape character */
134 };
135 
136 /*
137  * A macro definition, condition, or ignored block.
138  */
139 struct	roffnode {
140 	enum roff_tok	 tok; /* type of node */
141 	struct roffnode	*parent; /* up one in stack */
142 	int		 line; /* parse line */
143 	int		 col; /* parse col */
144 	char		*name; /* node name, e.g. macro name */
145 	char		*end; /* custom end macro of the block */
146 	int		 endspan; /* scope to: 1=eol 2=next line -1=\} */
147 	int		 rule; /* content is: 1=evaluated 0=skipped */
148 };
149 
150 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
151 			 enum roff_tok tok, /* tok of macro */ \
152 			 struct buf *buf, /* input buffer */ \
153 			 int ln, /* parse line */ \
154 			 int ppos, /* original pos in buffer */ \
155 			 int pos, /* current pos in buffer */ \
156 			 int *offs /* reset offset of buffer data */
157 
158 typedef	int (*roffproc)(ROFF_ARGS);
159 
160 struct	roffmac {
161 	roffproc	 proc; /* process new macro */
162 	roffproc	 text; /* process as child text of macro */
163 	roffproc	 sub; /* process as child of macro */
164 	int		 flags;
165 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
166 };
167 
168 struct	predef {
169 	const char	*name; /* predefined input name */
170 	const char	*str; /* replacement symbol */
171 };
172 
173 #define	PREDEF(__name, __str) \
174 	{ (__name), (__str) },
175 
176 /* --- function prototypes ------------------------------------------------ */
177 
178 static	int		 roffnode_cleanscope(struct roff *);
179 static	int		 roffnode_pop(struct roff *);
180 static	void		 roffnode_push(struct roff *, enum roff_tok,
181 				const char *, int, int);
182 static	void		 roff_addtbl(struct roff_man *, int, struct tbl_node *);
183 static	int		 roff_als(ROFF_ARGS);
184 static	int		 roff_block(ROFF_ARGS);
185 static	int		 roff_block_text(ROFF_ARGS);
186 static	int		 roff_block_sub(ROFF_ARGS);
187 static	int		 roff_break(ROFF_ARGS);
188 static	int		 roff_cblock(ROFF_ARGS);
189 static	int		 roff_cc(ROFF_ARGS);
190 static	int		 roff_ccond(struct roff *, int, int);
191 static	int		 roff_char(ROFF_ARGS);
192 static	int		 roff_cond(ROFF_ARGS);
193 static	int		 roff_cond_checkend(ROFF_ARGS);
194 static	int		 roff_cond_text(ROFF_ARGS);
195 static	int		 roff_cond_sub(ROFF_ARGS);
196 static	int		 roff_ds(ROFF_ARGS);
197 static	int		 roff_ec(ROFF_ARGS);
198 static	int		 roff_eo(ROFF_ARGS);
199 static	int		 roff_eqndelim(struct roff *, struct buf *, int);
200 static	int		 roff_evalcond(struct roff *, int, char *, int *);
201 static	int		 roff_evalnum(struct roff *, int,
202 				const char *, int *, int *, int);
203 static	int		 roff_evalpar(struct roff *, int,
204 				const char *, int *, int *, int);
205 static	int		 roff_evalstrcond(const char *, int *);
206 static	int		 roff_expand(struct roff *, struct buf *,
207 				int, int, char);
208 static	void		 roff_free1(struct roff *);
209 static	void		 roff_freereg(struct roffreg *);
210 static	void		 roff_freestr(struct roffkv *);
211 static	size_t		 roff_getname(struct roff *, char **, int, int);
212 static	int		 roff_getnum(const char *, int *, int *, int);
213 static	int		 roff_getop(const char *, int *, char *);
214 static	int		 roff_getregn(struct roff *,
215 				const char *, size_t, char);
216 static	int		 roff_getregro(const struct roff *,
217 				const char *name);
218 static	const char	*roff_getstrn(struct roff *,
219 				const char *, size_t, int *);
220 static	int		 roff_hasregn(const struct roff *,
221 				const char *, size_t);
222 static	int		 roff_insec(ROFF_ARGS);
223 static	int		 roff_it(ROFF_ARGS);
224 static	int		 roff_line_ignore(ROFF_ARGS);
225 static	void		 roff_man_alloc1(struct roff_man *);
226 static	void		 roff_man_free1(struct roff_man *);
227 static	int		 roff_manyarg(ROFF_ARGS);
228 static	int		 roff_mc(ROFF_ARGS);
229 static	int		 roff_noarg(ROFF_ARGS);
230 static	int		 roff_nop(ROFF_ARGS);
231 static	int		 roff_nr(ROFF_ARGS);
232 static	int		 roff_onearg(ROFF_ARGS);
233 static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
234 				int, int);
235 static	int		 roff_parse_comment(struct roff *, struct buf *,
236 				int, int, char);
237 static	int		 roff_parsetext(struct roff *, struct buf *,
238 				int, int *);
239 static	int		 roff_renamed(ROFF_ARGS);
240 static	int		 roff_req_or_macro(ROFF_ARGS);
241 static	int		 roff_return(ROFF_ARGS);
242 static	int		 roff_rm(ROFF_ARGS);
243 static	int		 roff_rn(ROFF_ARGS);
244 static	int		 roff_rr(ROFF_ARGS);
245 static	void		 roff_setregn(struct roff *, const char *,
246 				size_t, int, char, int);
247 static	void		 roff_setstr(struct roff *,
248 				const char *, const char *, int);
249 static	void		 roff_setstrn(struct roffkv **, const char *,
250 				size_t, const char *, size_t, int);
251 static	int		 roff_shift(ROFF_ARGS);
252 static	int		 roff_so(ROFF_ARGS);
253 static	int		 roff_tr(ROFF_ARGS);
254 static	int		 roff_Dd(ROFF_ARGS);
255 static	int		 roff_TE(ROFF_ARGS);
256 static	int		 roff_TS(ROFF_ARGS);
257 static	int		 roff_EQ(ROFF_ARGS);
258 static	int		 roff_EN(ROFF_ARGS);
259 static	int		 roff_T_(ROFF_ARGS);
260 static	int		 roff_unsupp(ROFF_ARGS);
261 static	int		 roff_userdef(ROFF_ARGS);
262 
263 /* --- constant data ------------------------------------------------------ */
264 
265 #define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
266 #define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
267 
268 const char *__roff_name[MAN_MAX + 1] = {
269 	"br",		"ce",		"fi",		"ft",
270 	"ll",		"mc",		"nf",
271 	"po",		"rj",		"sp",
272 	"ta",		"ti",		NULL,
273 	"ab",		"ad",		"af",		"aln",
274 	"als",		"am",		"am1",		"ami",
275 	"ami1",		"as",		"as1",		"asciify",
276 	"backtrace",	"bd",		"bleedat",	"blm",
277         "box",		"boxa",		"bp",		"BP",
278 	"break",	"breakchar",	"brnl",		"brp",
279 	"brpnl",	"c2",		"cc",
280 	"cf",		"cflags",	"ch",		"char",
281 	"chop",		"class",	"close",	"CL",
282 	"color",	"composite",	"continue",	"cp",
283 	"cropat",	"cs",		"cu",		"da",
284 	"dch",		"Dd",		"de",		"de1",
285 	"defcolor",	"dei",		"dei1",		"device",
286 	"devicem",	"di",		"do",		"ds",
287 	"ds1",		"dwh",		"dt",		"ec",
288 	"ecr",		"ecs",		"el",		"em",
289 	"EN",		"eo",		"EP",		"EQ",
290 	"errprint",	"ev",		"evc",		"ex",
291 	"fallback",	"fam",		"fc",		"fchar",
292 	"fcolor",	"fdeferlig",	"feature",	"fkern",
293 	"fl",		"flig",		"fp",		"fps",
294 	"fschar",	"fspacewidth",	"fspecial",	"ftr",
295 	"fzoom",	"gcolor",	"hc",		"hcode",
296 	"hidechar",	"hla",		"hlm",		"hpf",
297 	"hpfa",		"hpfcode",	"hw",		"hy",
298 	"hylang",	"hylen",	"hym",		"hypp",
299 	"hys",		"ie",		"if",		"ig",
300 	"index",	"it",		"itc",		"IX",
301 	"kern",		"kernafter",	"kernbefore",	"kernpair",
302 	"lc",		"lc_ctype",	"lds",		"length",
303 	"letadj",	"lf",		"lg",		"lhang",
304 	"linetabs",	"lnr",		"lnrf",		"lpfx",
305 	"ls",		"lsm",		"lt",
306 	"mediasize",	"minss",	"mk",		"mso",
307 	"na",		"ne",		"nh",		"nhychar",
308 	"nm",		"nn",		"nop",		"nr",
309 	"nrf",		"nroff",	"ns",		"nx",
310 	"open",		"opena",	"os",		"output",
311 	"padj",		"papersize",	"pc",		"pev",
312 	"pi",		"PI",		"pl",		"pm",
313 	"pn",		"pnr",		"ps",
314 	"psbb",		"pshape",	"pso",		"ptr",
315 	"pvs",		"rchar",	"rd",		"recursionlimit",
316 	"return",	"rfschar",	"rhang",
317 	"rm",		"rn",		"rnn",		"rr",
318 	"rs",		"rt",		"schar",	"sentchar",
319 	"shc",		"shift",	"sizes",	"so",
320 	"spacewidth",	"special",	"spreadwarn",	"ss",
321 	"sty",		"substring",	"sv",		"sy",
322 	"T&",		"tc",		"TE",
323 	"TH",		"tkf",		"tl",
324 	"tm",		"tm1",		"tmc",		"tr",
325 	"track",	"transchar",	"trf",		"trimat",
326 	"trin",		"trnt",		"troff",	"TS",
327 	"uf",		"ul",		"unformat",	"unwatch",
328 	"unwatchn",	"vpt",		"vs",		"warn",
329 	"warnscale",	"watch",	"watchlength",	"watchn",
330 	"wh",		"while",	"write",	"writec",
331 	"writem",	"xflag",	".",		NULL,
332 	NULL,		"text",
333 	"Dd",		"Dt",		"Os",		"Sh",
334 	"Ss",		"Pp",		"D1",		"Dl",
335 	"Bd",		"Ed",		"Bl",		"El",
336 	"It",		"Ad",		"An",		"Ap",
337 	"Ar",		"Cd",		"Cm",		"Dv",
338 	"Er",		"Ev",		"Ex",		"Fa",
339 	"Fd",		"Fl",		"Fn",		"Ft",
340 	"Ic",		"In",		"Li",		"Nd",
341 	"Nm",		"Op",		"Ot",		"Pa",
342 	"Rv",		"St",		"Va",		"Vt",
343 	"Xr",		"%A",		"%B",		"%D",
344 	"%I",		"%J",		"%N",		"%O",
345 	"%P",		"%R",		"%T",		"%V",
346 	"Ac",		"Ao",		"Aq",		"At",
347 	"Bc",		"Bf",		"Bo",		"Bq",
348 	"Bsx",		"Bx",		"Db",		"Dc",
349 	"Do",		"Dq",		"Ec",		"Ef",
350 	"Em",		"Eo",		"Fx",		"Ms",
351 	"No",		"Ns",		"Nx",		"Ox",
352 	"Pc",		"Pf",		"Po",		"Pq",
353 	"Qc",		"Ql",		"Qo",		"Qq",
354 	"Re",		"Rs",		"Sc",		"So",
355 	"Sq",		"Sm",		"Sx",		"Sy",
356 	"Tn",		"Ux",		"Xc",		"Xo",
357 	"Fo",		"Fc",		"Oo",		"Oc",
358 	"Bk",		"Ek",		"Bt",		"Hf",
359 	"Fr",		"Ud",		"Lb",		"Lp",
360 	"Lk",		"Mt",		"Brq",		"Bro",
361 	"Brc",		"%C",		"Es",		"En",
362 	"Dx",		"%Q",		"%U",		"Ta",
363 	"Tg",		NULL,
364 	"TH",		"SH",		"SS",		"TP",
365 	"TQ",
366 	"LP",		"PP",		"P",		"IP",
367 	"HP",		"SM",		"SB",		"BI",
368 	"IB",		"BR",		"RB",		"R",
369 	"B",		"I",		"IR",		"RI",
370 	"RE",		"RS",		"DT",		"UC",
371 	"PD",		"AT",		"in",
372 	"SY",		"YS",		"OP",
373 	"EX",		"EE",		"UR",
374 	"UE",		"MT",		"ME",		NULL
375 };
376 const	char *const *roff_name = __roff_name;
377 
378 static	struct roffmac	 roffs[TOKEN_NONE] = {
379 	{ roff_noarg, NULL, NULL, 0 },  /* br */
380 	{ roff_onearg, NULL, NULL, 0 },  /* ce */
381 	{ roff_noarg, NULL, NULL, 0 },  /* fi */
382 	{ roff_onearg, NULL, NULL, 0 },  /* ft */
383 	{ roff_onearg, NULL, NULL, 0 },  /* ll */
384 	{ roff_mc, NULL, NULL, 0 },  /* mc */
385 	{ roff_noarg, NULL, NULL, 0 },  /* nf */
386 	{ roff_onearg, NULL, NULL, 0 },  /* po */
387 	{ roff_onearg, NULL, NULL, 0 },  /* rj */
388 	{ roff_onearg, NULL, NULL, 0 },  /* sp */
389 	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
390 	{ roff_onearg, NULL, NULL, 0 },  /* ti */
391 	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
392 	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
393 	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
394 	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
395 	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
396 	{ roff_als, NULL, NULL, 0 },  /* als */
397 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
398 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
399 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
400 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
401 	{ roff_ds, NULL, NULL, 0 },  /* as */
402 	{ roff_ds, NULL, NULL, 0 },  /* as1 */
403 	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
404 	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
405 	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
406 	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
407 	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
408 	{ roff_unsupp, NULL, NULL, 0 },  /* box */
409 	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
410 	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
411 	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
412 	{ roff_break, NULL, NULL, 0 },  /* break */
413 	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
414 	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
415 	{ roff_noarg, NULL, NULL, 0 },  /* brp */
416 	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
417 	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
418 	{ roff_cc, NULL, NULL, 0 },  /* cc */
419 	{ roff_insec, NULL, NULL, 0 },  /* cf */
420 	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
421 	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
422 	{ roff_char, NULL, NULL, 0 },  /* char */
423 	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
424 	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
425 	{ roff_insec, NULL, NULL, 0 },  /* close */
426 	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
427 	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
428 	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
429 	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
430 	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
431 	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
432 	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
433 	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
434 	{ roff_unsupp, NULL, NULL, 0 },  /* da */
435 	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
436 	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
437 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
438 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
439 	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
440 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
441 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
442 	{ roff_unsupp, NULL, NULL, 0 },  /* device */
443 	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
444 	{ roff_unsupp, NULL, NULL, 0 },  /* di */
445 	{ roff_unsupp, NULL, NULL, 0 },  /* do */
446 	{ roff_ds, NULL, NULL, 0 },  /* ds */
447 	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
448 	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
449 	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
450 	{ roff_ec, NULL, NULL, 0 },  /* ec */
451 	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
452 	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
453 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
454 	{ roff_unsupp, NULL, NULL, 0 },  /* em */
455 	{ roff_EN, NULL, NULL, 0 },  /* EN */
456 	{ roff_eo, NULL, NULL, 0 },  /* eo */
457 	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
458 	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
459 	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
460 	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
461 	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
462 	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
463 	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
464 	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
465 	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
466 	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
467 	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
468 	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
469 	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
470 	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
471 	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
472 	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
473 	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
474 	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
475 	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
476 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
477 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
478 	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
479 	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
480 	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
481 	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
482 	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
483 	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
484 	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
485 	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
486 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
487 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
488 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
489 	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
490 	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
491 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
492 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
493 	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
494 	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
495 	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
496 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
497 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
498 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
499 	{ roff_unsupp, NULL, NULL, 0 },  /* index */
500 	{ roff_it, NULL, NULL, 0 },  /* it */
501 	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
502 	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
503 	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
504 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
505 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
506 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
507 	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
508 	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
509 	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
510 	{ roff_unsupp, NULL, NULL, 0 },  /* length */
511 	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
512 	{ roff_insec, NULL, NULL, 0 },  /* lf */
513 	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
514 	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
515 	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
516 	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
517 	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
518 	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
519 	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
520 	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
521 	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
522 	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
523 	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
524 	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
525 	{ roff_insec, NULL, NULL, 0 },  /* mso */
526 	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
527 	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
528 	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
529 	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
530 	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
531 	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
532 	{ roff_nop, NULL, NULL, 0 },  /* nop */
533 	{ roff_nr, NULL, NULL, 0 },  /* nr */
534 	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
535 	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
536 	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
537 	{ roff_insec, NULL, NULL, 0 },  /* nx */
538 	{ roff_insec, NULL, NULL, 0 },  /* open */
539 	{ roff_insec, NULL, NULL, 0 },  /* opena */
540 	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
541 	{ roff_unsupp, NULL, NULL, 0 },  /* output */
542 	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
543 	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
544 	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
545 	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
546 	{ roff_insec, NULL, NULL, 0 },  /* pi */
547 	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
548 	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
549 	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
550 	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
551 	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
552 	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
553 	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
554 	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
555 	{ roff_insec, NULL, NULL, 0 },  /* pso */
556 	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
557 	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
558 	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
559 	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
560 	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
561 	{ roff_return, NULL, NULL, 0 },  /* return */
562 	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
563 	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
564 	{ roff_rm, NULL, NULL, 0 },  /* rm */
565 	{ roff_rn, NULL, NULL, 0 },  /* rn */
566 	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
567 	{ roff_rr, NULL, NULL, 0 },  /* rr */
568 	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
569 	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
570 	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
571 	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
572 	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
573 	{ roff_shift, NULL, NULL, 0 },  /* shift */
574 	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
575 	{ roff_so, NULL, NULL, 0 },  /* so */
576 	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
577 	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
578 	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
579 	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
580 	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
581 	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
582 	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
583 	{ roff_insec, NULL, NULL, 0 },  /* sy */
584 	{ roff_T_, NULL, NULL, 0 },  /* T& */
585 	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
586 	{ roff_TE, NULL, NULL, 0 },  /* TE */
587 	{ roff_Dd, NULL, NULL, 0 },  /* TH */
588 	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
589 	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
590 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
591 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
592 	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
593 	{ roff_tr, NULL, NULL, 0 },  /* tr */
594 	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
595 	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
596 	{ roff_insec, NULL, NULL, 0 },  /* trf */
597 	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
598 	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
599 	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
600 	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
601 	{ roff_TS, NULL, NULL, 0 },  /* TS */
602 	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
603 	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
604 	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
605 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
606 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
607 	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
608 	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
609 	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
610 	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
611 	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
612 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
613 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
614 	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
615 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
616 	{ roff_insec, NULL, NULL, 0 },  /* write */
617 	{ roff_insec, NULL, NULL, 0 },  /* writec */
618 	{ roff_insec, NULL, NULL, 0 },  /* writem */
619 	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
620 	{ roff_cblock, NULL, NULL, 0 },  /* . */
621 	{ roff_renamed, NULL, NULL, 0 },
622 	{ roff_userdef, NULL, NULL, 0 }
623 };
624 
625 /* Array of injected predefined strings. */
626 #define	PREDEFS_MAX	 38
627 static	const struct predef predefs[PREDEFS_MAX] = {
628 #include "predefs.in"
629 };
630 
631 static	int	 roffce_lines;	/* number of input lines to center */
632 static	struct roff_node *roffce_node;  /* active request */
633 static	int	 roffit_lines;  /* number of lines to delay */
634 static	char	*roffit_macro;  /* nil-terminated macro line */
635 
636 
637 /* --- request table ------------------------------------------------------ */
638 
639 struct ohash *
640 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
641 {
642 	struct ohash	*htab;
643 	struct roffreq	*req;
644 	enum roff_tok	 tok;
645 	size_t		 sz;
646 	unsigned int	 slot;
647 
648 	htab = mandoc_malloc(sizeof(*htab));
649 	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
650 
651 	for (tok = mintok; tok < maxtok; tok++) {
652 		if (roff_name[tok] == NULL)
653 			continue;
654 		sz = strlen(roff_name[tok]);
655 		req = mandoc_malloc(sizeof(*req) + sz + 1);
656 		req->tok = tok;
657 		memcpy(req->name, roff_name[tok], sz + 1);
658 		slot = ohash_qlookup(htab, req->name);
659 		ohash_insert(htab, slot, req);
660 	}
661 	return htab;
662 }
663 
664 void
665 roffhash_free(struct ohash *htab)
666 {
667 	struct roffreq	*req;
668 	unsigned int	 slot;
669 
670 	if (htab == NULL)
671 		return;
672 	for (req = ohash_first(htab, &slot); req != NULL;
673 	     req = ohash_next(htab, &slot))
674 		free(req);
675 	ohash_delete(htab);
676 	free(htab);
677 }
678 
679 enum roff_tok
680 roffhash_find(struct ohash *htab, const char *name, size_t sz)
681 {
682 	struct roffreq	*req;
683 	const char	*end;
684 
685 	if (sz) {
686 		end = name + sz;
687 		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
688 	} else
689 		req = ohash_find(htab, ohash_qlookup(htab, name));
690 	return req == NULL ? TOKEN_NONE : req->tok;
691 }
692 
693 /* --- stack of request blocks -------------------------------------------- */
694 
695 /*
696  * Pop the current node off of the stack of roff instructions currently
697  * pending.  Return 1 if it is a loop or 0 otherwise.
698  */
699 static int
700 roffnode_pop(struct roff *r)
701 {
702 	struct roffnode	*p;
703 	int		 inloop;
704 
705 	p = r->last;
706 	inloop = p->tok == ROFF_while;
707 	r->last = p->parent;
708 	free(p->name);
709 	free(p->end);
710 	free(p);
711 	return inloop;
712 }
713 
714 /*
715  * Push a roff node onto the instruction stack.  This must later be
716  * removed with roffnode_pop().
717  */
718 static void
719 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
720 		int line, int col)
721 {
722 	struct roffnode	*p;
723 
724 	p = mandoc_calloc(1, sizeof(struct roffnode));
725 	p->tok = tok;
726 	if (name)
727 		p->name = mandoc_strdup(name);
728 	p->parent = r->last;
729 	p->line = line;
730 	p->col = col;
731 	p->rule = p->parent ? p->parent->rule : 0;
732 
733 	r->last = p;
734 }
735 
736 /* --- roff parser state data management ---------------------------------- */
737 
738 static void
739 roff_free1(struct roff *r)
740 {
741 	int		 i;
742 
743 	tbl_free(r->first_tbl);
744 	r->first_tbl = r->last_tbl = r->tbl = NULL;
745 
746 	eqn_free(r->last_eqn);
747 	r->last_eqn = r->eqn = NULL;
748 
749 	while (r->mstackpos >= 0)
750 		roff_userret(r);
751 
752 	while (r->last)
753 		roffnode_pop(r);
754 
755 	free (r->rstack);
756 	r->rstack = NULL;
757 	r->rstacksz = 0;
758 	r->rstackpos = -1;
759 
760 	roff_freereg(r->regtab);
761 	r->regtab = NULL;
762 
763 	roff_freestr(r->strtab);
764 	roff_freestr(r->rentab);
765 	roff_freestr(r->xmbtab);
766 	r->strtab = r->rentab = r->xmbtab = NULL;
767 
768 	if (r->xtab)
769 		for (i = 0; i < 128; i++)
770 			free(r->xtab[i].p);
771 	free(r->xtab);
772 	r->xtab = NULL;
773 }
774 
775 void
776 roff_reset(struct roff *r)
777 {
778 	roff_free1(r);
779 	r->options |= MPARSE_COMMENT;
780 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
781 	r->control = '\0';
782 	r->escape = '\\';
783 	roffce_lines = 0;
784 	roffce_node = NULL;
785 	roffit_lines = 0;
786 	roffit_macro = NULL;
787 }
788 
789 void
790 roff_free(struct roff *r)
791 {
792 	int		 i;
793 
794 	roff_free1(r);
795 	for (i = 0; i < r->mstacksz; i++)
796 		free(r->mstack[i].argv);
797 	free(r->mstack);
798 	roffhash_free(r->reqtab);
799 	free(r);
800 }
801 
802 struct roff *
803 roff_alloc(int options)
804 {
805 	struct roff	*r;
806 
807 	r = mandoc_calloc(1, sizeof(struct roff));
808 	r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
809 	r->options = options | MPARSE_COMMENT;
810 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
811 	r->mstackpos = -1;
812 	r->rstackpos = -1;
813 	r->escape = '\\';
814 	return r;
815 }
816 
817 /* --- syntax tree state data management ---------------------------------- */
818 
819 static void
820 roff_man_free1(struct roff_man *man)
821 {
822 	if (man->meta.first != NULL)
823 		roff_node_delete(man, man->meta.first);
824 	free(man->meta.msec);
825 	free(man->meta.vol);
826 	free(man->meta.os);
827 	free(man->meta.arch);
828 	free(man->meta.title);
829 	free(man->meta.name);
830 	free(man->meta.date);
831 	free(man->meta.sodest);
832 }
833 
834 void
835 roff_state_reset(struct roff_man *man)
836 {
837 	man->last = man->meta.first;
838 	man->last_es = NULL;
839 	man->flags = 0;
840 	man->lastsec = man->lastnamed = SEC_NONE;
841 	man->next = ROFF_NEXT_CHILD;
842 	roff_setreg(man->roff, "nS", 0, '=');
843 }
844 
845 static void
846 roff_man_alloc1(struct roff_man *man)
847 {
848 	memset(&man->meta, 0, sizeof(man->meta));
849 	man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
850 	man->meta.first->type = ROFFT_ROOT;
851 	man->meta.macroset = MACROSET_NONE;
852 	roff_state_reset(man);
853 }
854 
855 void
856 roff_man_reset(struct roff_man *man)
857 {
858 	roff_man_free1(man);
859 	roff_man_alloc1(man);
860 }
861 
862 void
863 roff_man_free(struct roff_man *man)
864 {
865 	roff_man_free1(man);
866 	free(man->os_r);
867 	free(man);
868 }
869 
870 struct roff_man *
871 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
872 {
873 	struct roff_man *man;
874 
875 	man = mandoc_calloc(1, sizeof(*man));
876 	man->roff = roff;
877 	man->os_s = os_s;
878 	man->quick = quick;
879 	roff_man_alloc1(man);
880 	roff->man = man;
881 	return man;
882 }
883 
884 /* --- syntax tree handling ----------------------------------------------- */
885 
886 struct roff_node *
887 roff_node_alloc(struct roff_man *man, int line, int pos,
888 	enum roff_type type, int tok)
889 {
890 	struct roff_node	*n;
891 
892 	n = mandoc_calloc(1, sizeof(*n));
893 	n->line = line;
894 	n->pos = pos;
895 	n->tok = tok;
896 	n->type = type;
897 	n->sec = man->lastsec;
898 
899 	if (man->flags & MDOC_SYNOPSIS)
900 		n->flags |= NODE_SYNPRETTY;
901 	else
902 		n->flags &= ~NODE_SYNPRETTY;
903 	if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
904 		n->flags |= NODE_NOFILL;
905 	else
906 		n->flags &= ~NODE_NOFILL;
907 	if (man->flags & MDOC_NEWLINE)
908 		n->flags |= NODE_LINE;
909 	man->flags &= ~MDOC_NEWLINE;
910 
911 	return n;
912 }
913 
914 void
915 roff_node_append(struct roff_man *man, struct roff_node *n)
916 {
917 
918 	switch (man->next) {
919 	case ROFF_NEXT_SIBLING:
920 		if (man->last->next != NULL) {
921 			n->next = man->last->next;
922 			man->last->next->prev = n;
923 		} else
924 			man->last->parent->last = n;
925 		man->last->next = n;
926 		n->prev = man->last;
927 		n->parent = man->last->parent;
928 		break;
929 	case ROFF_NEXT_CHILD:
930 		if (man->last->child != NULL) {
931 			n->next = man->last->child;
932 			man->last->child->prev = n;
933 		} else
934 			man->last->last = n;
935 		man->last->child = n;
936 		n->parent = man->last;
937 		break;
938 	default:
939 		abort();
940 	}
941 	man->last = n;
942 
943 	switch (n->type) {
944 	case ROFFT_HEAD:
945 		n->parent->head = n;
946 		break;
947 	case ROFFT_BODY:
948 		if (n->end != ENDBODY_NOT)
949 			return;
950 		n->parent->body = n;
951 		break;
952 	case ROFFT_TAIL:
953 		n->parent->tail = n;
954 		break;
955 	default:
956 		return;
957 	}
958 
959 	/*
960 	 * Copy over the normalised-data pointer of our parent.  Not
961 	 * everybody has one, but copying a null pointer is fine.
962 	 */
963 
964 	n->norm = n->parent->norm;
965 	assert(n->parent->type == ROFFT_BLOCK);
966 }
967 
968 void
969 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
970 {
971 	struct roff_node	*n;
972 
973 	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
974 	n->string = roff_strdup(man->roff, word);
975 	roff_node_append(man, n);
976 	n->flags |= NODE_VALID | NODE_ENDED;
977 	man->next = ROFF_NEXT_SIBLING;
978 }
979 
980 void
981 roff_word_append(struct roff_man *man, const char *word)
982 {
983 	struct roff_node	*n;
984 	char			*addstr, *newstr;
985 
986 	n = man->last;
987 	addstr = roff_strdup(man->roff, word);
988 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
989 	free(addstr);
990 	free(n->string);
991 	n->string = newstr;
992 	man->next = ROFF_NEXT_SIBLING;
993 }
994 
995 void
996 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
997 {
998 	struct roff_node	*n;
999 
1000 	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
1001 	roff_node_append(man, n);
1002 	man->next = ROFF_NEXT_CHILD;
1003 }
1004 
1005 struct roff_node *
1006 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1007 {
1008 	struct roff_node	*n;
1009 
1010 	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1011 	roff_node_append(man, n);
1012 	man->next = ROFF_NEXT_CHILD;
1013 	return n;
1014 }
1015 
1016 struct roff_node *
1017 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1018 {
1019 	struct roff_node	*n;
1020 
1021 	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1022 	roff_node_append(man, n);
1023 	man->next = ROFF_NEXT_CHILD;
1024 	return n;
1025 }
1026 
1027 struct roff_node *
1028 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1029 {
1030 	struct roff_node	*n;
1031 
1032 	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1033 	roff_node_append(man, n);
1034 	man->next = ROFF_NEXT_CHILD;
1035 	return n;
1036 }
1037 
1038 static void
1039 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1040 {
1041 	struct roff_node	*n;
1042 	struct tbl_span		*span;
1043 
1044 	if (man->meta.macroset == MACROSET_MAN)
1045 		man_breakscope(man, ROFF_TS);
1046 	while ((span = tbl_span(tbl)) != NULL) {
1047 		n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1048 		n->span = span;
1049 		roff_node_append(man, n);
1050 		n->flags |= NODE_VALID | NODE_ENDED;
1051 		man->next = ROFF_NEXT_SIBLING;
1052 	}
1053 }
1054 
1055 void
1056 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1057 {
1058 
1059 	/* Adjust siblings. */
1060 
1061 	if (n->prev)
1062 		n->prev->next = n->next;
1063 	if (n->next)
1064 		n->next->prev = n->prev;
1065 
1066 	/* Adjust parent. */
1067 
1068 	if (n->parent != NULL) {
1069 		if (n->parent->child == n)
1070 			n->parent->child = n->next;
1071 		if (n->parent->last == n)
1072 			n->parent->last = n->prev;
1073 	}
1074 
1075 	/* Adjust parse point. */
1076 
1077 	if (man == NULL)
1078 		return;
1079 	if (man->last == n) {
1080 		if (n->prev == NULL) {
1081 			man->last = n->parent;
1082 			man->next = ROFF_NEXT_CHILD;
1083 		} else {
1084 			man->last = n->prev;
1085 			man->next = ROFF_NEXT_SIBLING;
1086 		}
1087 	}
1088 	if (man->meta.first == n)
1089 		man->meta.first = NULL;
1090 }
1091 
1092 void
1093 roff_node_relink(struct roff_man *man, struct roff_node *n)
1094 {
1095 	roff_node_unlink(man, n);
1096 	n->prev = n->next = NULL;
1097 	roff_node_append(man, n);
1098 }
1099 
1100 void
1101 roff_node_free(struct roff_node *n)
1102 {
1103 
1104 	if (n->args != NULL)
1105 		mdoc_argv_free(n->args);
1106 	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1107 		free(n->norm);
1108 	eqn_box_free(n->eqn);
1109 	free(n->string);
1110 	free(n->tag);
1111 	free(n);
1112 }
1113 
1114 void
1115 roff_node_delete(struct roff_man *man, struct roff_node *n)
1116 {
1117 
1118 	while (n->child != NULL)
1119 		roff_node_delete(man, n->child);
1120 	roff_node_unlink(man, n);
1121 	roff_node_free(n);
1122 }
1123 
1124 int
1125 roff_node_transparent(struct roff_node *n)
1126 {
1127 	if (n == NULL)
1128 		return 0;
1129 	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1130 		return 1;
1131 	return roff_tok_transparent(n->tok);
1132 }
1133 
1134 int
1135 roff_tok_transparent(enum roff_tok tok)
1136 {
1137 	switch (tok) {
1138 	case ROFF_ft:
1139 	case ROFF_ll:
1140 	case ROFF_mc:
1141 	case ROFF_po:
1142 	case ROFF_ta:
1143 	case MDOC_Db:
1144 	case MDOC_Es:
1145 	case MDOC_Sm:
1146 	case MDOC_Tg:
1147 	case MAN_DT:
1148 	case MAN_UC:
1149 	case MAN_PD:
1150 	case MAN_AT:
1151 		return 1;
1152 	default:
1153 		return 0;
1154 	}
1155 }
1156 
1157 struct roff_node *
1158 roff_node_child(struct roff_node *n)
1159 {
1160 	for (n = n->child; roff_node_transparent(n); n = n->next)
1161 		continue;
1162 	return n;
1163 }
1164 
1165 struct roff_node *
1166 roff_node_prev(struct roff_node *n)
1167 {
1168 	do {
1169 		n = n->prev;
1170 	} while (roff_node_transparent(n));
1171 	return n;
1172 }
1173 
1174 struct roff_node *
1175 roff_node_next(struct roff_node *n)
1176 {
1177 	do {
1178 		n = n->next;
1179 	} while (roff_node_transparent(n));
1180 	return n;
1181 }
1182 
1183 void
1184 deroff(char **dest, const struct roff_node *n)
1185 {
1186 	char	*cp;
1187 	size_t	 sz;
1188 
1189 	if (n->string == NULL) {
1190 		for (n = n->child; n != NULL; n = n->next)
1191 			deroff(dest, n);
1192 		return;
1193 	}
1194 
1195 	/* Skip leading whitespace. */
1196 
1197 	for (cp = n->string; *cp != '\0'; cp++) {
1198 		if (cp[0] == '\\' && cp[1] != '\0' &&
1199 		    strchr(" %&0^|~", cp[1]) != NULL)
1200 			cp++;
1201 		else if ( ! isspace((unsigned char)*cp))
1202 			break;
1203 	}
1204 
1205 	/* Skip trailing backslash. */
1206 
1207 	sz = strlen(cp);
1208 	if (sz > 0 && cp[sz - 1] == '\\')
1209 		sz--;
1210 
1211 	/* Skip trailing whitespace. */
1212 
1213 	for (; sz; sz--)
1214 		if ( ! isspace((unsigned char)cp[sz-1]))
1215 			break;
1216 
1217 	/* Skip empty strings. */
1218 
1219 	if (sz == 0)
1220 		return;
1221 
1222 	if (*dest == NULL) {
1223 		*dest = mandoc_strndup(cp, sz);
1224 		return;
1225 	}
1226 
1227 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1228 	free(*dest);
1229 	*dest = cp;
1230 }
1231 
1232 /* --- main functions of the roff parser ---------------------------------- */
1233 
1234 static int
1235 roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos,
1236     char newesc)
1237 {
1238 	struct roff_node *n;	/* used for header comments */
1239 	const char	*start;	/* start of the string to process */
1240 	const char	*cp;	/* for RCS id parsing */
1241 	char		*stesc;	/* start of an escape sequence ('\\') */
1242 	char		*ep;	/* end of comment string */
1243 	int		 rcsid;	/* kind of RCS id seen */
1244 
1245 	for (start = stesc = buf->buf + pos;; stesc++) {
1246 		/* The line ends without continuation or comment. */
1247 		if (stesc[0] == '\0')
1248 			return ROFF_CONT;
1249 
1250 		/* Unescaped byte: skip it. */
1251 		if (stesc[0] != newesc)
1252 			continue;
1253 
1254 		/* Backslash at end of line requests line continuation. */
1255 		if (stesc[1] == '\0') {
1256 			stesc[0] = '\0';
1257 			return ROFF_IGN | ROFF_APPEND;
1258 		}
1259 
1260 		/* Found a comment: process it. */
1261 		if (stesc[1] == '"' || stesc[1] == '#')
1262 			break;
1263 
1264 		/* Escaped escape character: skip them both. */
1265 		if (stesc[1] == newesc)
1266 			stesc++;
1267 	}
1268 
1269 	/* Look for an RCS id in the comment. */
1270 
1271 	rcsid = 0;
1272 	if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
1273 		rcsid = 1 << MANDOC_OS_OPENBSD;
1274 		cp += 8;
1275 	} else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
1276 		rcsid = 1 << MANDOC_OS_NETBSD;
1277 		cp += 7;
1278 	}
1279 	if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
1280 	    strchr(cp, '$') != NULL) {
1281 		if (r->man->meta.rcsids & rcsid)
1282 			mandoc_msg(MANDOCERR_RCS_REP, ln,
1283 			    (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
1284 		r->man->meta.rcsids |= rcsid;
1285 	}
1286 
1287 	/* Warn about trailing whitespace at the end of the comment. */
1288 
1289 	ep = strchr(stesc + 2, '\0') - 1;
1290 	if (*ep == '\n')
1291 		*ep-- = '\0';
1292 	if (*ep == ' ' || *ep == '\t')
1293 		mandoc_msg(MANDOCERR_SPACE_EOL,
1294 		    ln, (int)(ep - buf->buf), NULL);
1295 
1296 	/* Save comments preceding the title macro in the syntax tree. */
1297 
1298 	if (r->options & MPARSE_COMMENT) {
1299 		while (*ep == ' ' || *ep == '\t')
1300 			ep--;
1301 		ep[1] = '\0';
1302 		n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
1303 		    ROFFT_COMMENT, TOKEN_NONE);
1304 		n->string = mandoc_strdup(stesc + 2);
1305 		roff_node_append(r->man, n);
1306 		n->flags |= NODE_VALID | NODE_ENDED;
1307 		r->man->next = ROFF_NEXT_SIBLING;
1308 	}
1309 
1310 	/* The comment requests line continuation. */
1311 
1312 	if (stesc[1] == '#') {
1313 		*stesc = '\0';
1314 		return ROFF_IGN | ROFF_APPEND;
1315 	}
1316 
1317 	/* Discard the comment including preceding whitespace. */
1318 
1319 	while (stesc > start && stesc[-1] == ' ' &&
1320 	    (stesc == start + 1 || stesc[-2] != '\\'))
1321 		stesc--;
1322 	*stesc = '\0';
1323 	return ROFF_CONT;
1324 }
1325 
1326 /*
1327  * In the current line, expand escape sequences that produce parsable
1328  * input text.  Also check the syntax of the remaining escape sequences,
1329  * which typically produce output glyphs or change formatter state.
1330  */
1331 static int
1332 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1333 {
1334 	struct mctx	*ctx;	/* current macro call context */
1335 	char		 ubuf[24]; /* buffer to print the number */
1336 	const char	*start;	/* start of the string to process */
1337 	char		*stesc;	/* start of an escape sequence ('\\') */
1338 	const char	*esct;	/* type of esccape sequence */
1339 	const char	*stnam;	/* start of the name, after "[(*" */
1340 	const char	*cp;	/* end of the name, e.g. before ']' */
1341 	const char	*res;	/* the string to be substituted */
1342 	char		*nbuf;	/* new buffer to copy buf->buf to */
1343 	size_t		 maxl;  /* expected length of the escape name */
1344 	size_t		 naml;	/* actual length of the escape name */
1345 	size_t		 asz;	/* length of the replacement */
1346 	size_t		 rsz;	/* length of the rest of the string */
1347 	int		 inaml;	/* length returned from mandoc_escape() */
1348 	int		 expand_count;	/* to avoid infinite loops */
1349 	int		 npos;	/* position in numeric expression */
1350 	int		 arg_complete; /* argument not interrupted by eol */
1351 	int		 quote_args; /* true for \\$@, false for \\$* */
1352 	int		 deftype; /* type of definition to paste */
1353 	enum mandocerr	 err;	/* for escape sequence problems */
1354 	char		 sign;	/* increment number register */
1355 	char		 term;	/* character terminating the escape */
1356 
1357 	start = buf->buf + pos;
1358 	stesc = strchr(start, '\0') - 1;
1359 	if (stesc >= start && *stesc == '\n')
1360 		*stesc-- = '\0';
1361 
1362 	expand_count = 0;
1363 	while (stesc >= start) {
1364 		if (*stesc != newesc) {
1365 
1366 			/*
1367 			 * If we have a non-standard escape character,
1368 			 * escape literal backslashes because all
1369 			 * processing in subsequent functions uses
1370 			 * the standard escaping rules.
1371 			 */
1372 
1373 			if (newesc != ASCII_ESC && *stesc == '\\') {
1374 				*stesc = '\0';
1375 				buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1376 				    buf->buf, stesc + 1) + 1;
1377 				start = nbuf + pos;
1378 				stesc = nbuf + (stesc - buf->buf);
1379 				free(buf->buf);
1380 				buf->buf = nbuf;
1381 			}
1382 
1383 			/* Search backwards for the next escape. */
1384 
1385 			stesc--;
1386 			continue;
1387 		}
1388 
1389 		/* If it is escaped, skip it. */
1390 
1391 		for (cp = stesc - 1; cp >= start; cp--)
1392 			if (*cp != r->escape)
1393 				break;
1394 
1395 		if ((stesc - cp) % 2 == 0) {
1396 			while (stesc > cp)
1397 				*stesc-- = '\\';
1398 			continue;
1399 		} else if (stesc[1] == '\0') {
1400 			*stesc-- = '\0';
1401 			continue;
1402 		} else
1403 			*stesc = '\\';
1404 
1405 		/* Decide whether to expand or to check only. */
1406 
1407 		term = '\0';
1408 		cp = stesc + 1;
1409 		while (*cp == 'E')
1410 			cp++;
1411 		esct = cp;
1412 		switch (*esct) {
1413 		case '*':
1414 		case '$':
1415 			res = NULL;
1416 			break;
1417 		case 'B':
1418 		case 'w':
1419 			term = cp[1];
1420 			/* FALLTHROUGH */
1421 		case 'n':
1422 			sign = cp[1];
1423 			if (sign == '+' || sign == '-')
1424 				cp++;
1425 			res = ubuf;
1426 			break;
1427 		default:
1428 			err = MANDOCERR_OK;
1429 			switch(mandoc_escape(&cp, &stnam, &inaml)) {
1430 			case ESCAPE_SPECIAL:
1431 				if (mchars_spec2cp(stnam, inaml) >= 0)
1432 					break;
1433 				/* FALLTHROUGH */
1434 			case ESCAPE_ERROR:
1435 				err = MANDOCERR_ESC_BAD;
1436 				break;
1437 			case ESCAPE_UNDEF:
1438 				err = MANDOCERR_ESC_UNDEF;
1439 				break;
1440 			case ESCAPE_UNSUPP:
1441 				err = MANDOCERR_ESC_UNSUPP;
1442 				break;
1443 			default:
1444 				break;
1445 			}
1446 			if (err != MANDOCERR_OK)
1447 				mandoc_msg(err, ln, (int)(stesc - buf->buf),
1448 				    "%.*s", (int)(cp - stesc), stesc);
1449 			stesc--;
1450 			continue;
1451 		}
1452 
1453 		if (EXPAND_LIMIT < ++expand_count) {
1454 			mandoc_msg(MANDOCERR_ROFFLOOP,
1455 			    ln, (int)(stesc - buf->buf), NULL);
1456 			return ROFF_IGN;
1457 		}
1458 
1459 		/*
1460 		 * The third character decides the length
1461 		 * of the name of the string or register.
1462 		 * Save a pointer to the name.
1463 		 */
1464 
1465 		if (term == '\0') {
1466 			switch (*++cp) {
1467 			case '\0':
1468 				maxl = 0;
1469 				break;
1470 			case '(':
1471 				cp++;
1472 				maxl = 2;
1473 				break;
1474 			case '[':
1475 				cp++;
1476 				term = ']';
1477 				maxl = 0;
1478 				break;
1479 			default:
1480 				maxl = 1;
1481 				break;
1482 			}
1483 		} else {
1484 			cp += 2;
1485 			maxl = 0;
1486 		}
1487 		stnam = cp;
1488 
1489 		/* Advance to the end of the name. */
1490 
1491 		naml = 0;
1492 		arg_complete = 1;
1493 		while (maxl == 0 || naml < maxl) {
1494 			if (*cp == '\0') {
1495 				mandoc_msg(MANDOCERR_ESC_BAD, ln,
1496 				    (int)(stesc - buf->buf), "%s", stesc);
1497 				arg_complete = 0;
1498 				break;
1499 			}
1500 			if (maxl == 0 && *cp == term) {
1501 				cp++;
1502 				break;
1503 			}
1504 			if (*cp++ != '\\' || *esct != 'w') {
1505 				naml++;
1506 				continue;
1507 			}
1508 			switch (mandoc_escape(&cp, NULL, NULL)) {
1509 			case ESCAPE_SPECIAL:
1510 			case ESCAPE_UNICODE:
1511 			case ESCAPE_NUMBERED:
1512 			case ESCAPE_UNDEF:
1513 			case ESCAPE_OVERSTRIKE:
1514 				naml++;
1515 				break;
1516 			default:
1517 				break;
1518 			}
1519 		}
1520 
1521 		/*
1522 		 * Retrieve the replacement string; if it is
1523 		 * undefined, resume searching for escapes.
1524 		 */
1525 
1526 		switch (*esct) {
1527 		case '*':
1528 			if (arg_complete) {
1529 				deftype = ROFFDEF_USER | ROFFDEF_PRE;
1530 				res = roff_getstrn(r, stnam, naml, &deftype);
1531 
1532 				/*
1533 				 * If not overriden, let \*(.T
1534 				 * through to the formatters.
1535 				 */
1536 
1537 				if (res == NULL && naml == 2 &&
1538 				    stnam[0] == '.' && stnam[1] == 'T') {
1539 					roff_setstrn(&r->strtab,
1540 					    ".T", 2, NULL, 0, 0);
1541 					stesc--;
1542 					continue;
1543 				}
1544 			}
1545 			break;
1546 		case '$':
1547 			if (r->mstackpos < 0) {
1548 				mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1549 				    (int)(stesc - buf->buf), "%.3s", stesc);
1550 				break;
1551 			}
1552 			ctx = r->mstack + r->mstackpos;
1553 			npos = esct[1] - '1';
1554 			if (npos >= 0 && npos <= 8) {
1555 				res = npos < ctx->argc ?
1556 				    ctx->argv[npos] : "";
1557 				break;
1558 			}
1559 			if (esct[1] == '*')
1560 				quote_args = 0;
1561 			else if (esct[1] == '@')
1562 				quote_args = 1;
1563 			else {
1564 				mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1565 				    (int)(stesc - buf->buf), "%.3s", stesc);
1566 				break;
1567 			}
1568 			asz = 0;
1569 			for (npos = 0; npos < ctx->argc; npos++) {
1570 				if (npos)
1571 					asz++;  /* blank */
1572 				if (quote_args)
1573 					asz += 2;  /* quotes */
1574 				asz += strlen(ctx->argv[npos]);
1575 			}
1576 			if (asz != 3) {
1577 				rsz = buf->sz - (stesc - buf->buf) - 3;
1578 				if (asz < 3)
1579 					memmove(stesc + asz, stesc + 3, rsz);
1580 				buf->sz += asz - 3;
1581 				nbuf = mandoc_realloc(buf->buf, buf->sz);
1582 				start = nbuf + pos;
1583 				stesc = nbuf + (stesc - buf->buf);
1584 				buf->buf = nbuf;
1585 				if (asz > 3)
1586 					memmove(stesc + asz, stesc + 3, rsz);
1587 			}
1588 			for (npos = 0; npos < ctx->argc; npos++) {
1589 				if (npos)
1590 					*stesc++ = ' ';
1591 				if (quote_args)
1592 					*stesc++ = '"';
1593 				cp = ctx->argv[npos];
1594 				while (*cp != '\0')
1595 					*stesc++ = *cp++;
1596 				if (quote_args)
1597 					*stesc++ = '"';
1598 			}
1599 			continue;
1600 		case 'B':
1601 			npos = 0;
1602 			ubuf[0] = arg_complete &&
1603 			    roff_evalnum(r, ln, stnam, &npos,
1604 			      NULL, ROFFNUM_SCALE) &&
1605 			    stnam + npos + 1 == cp ? '1' : '0';
1606 			ubuf[1] = '\0';
1607 			break;
1608 		case 'n':
1609 			if (arg_complete)
1610 				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1611 				    roff_getregn(r, stnam, naml, sign));
1612 			else
1613 				ubuf[0] = '\0';
1614 			break;
1615 		case 'w':
1616 			/* use even incomplete args */
1617 			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1618 			    24 * (int)naml);
1619 			break;
1620 		}
1621 
1622 		if (res == NULL) {
1623 			if (*esct == '*')
1624 				mandoc_msg(MANDOCERR_STR_UNDEF,
1625 				    ln, (int)(stesc - buf->buf),
1626 				    "%.*s", (int)naml, stnam);
1627 			res = "";
1628 		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1629 			mandoc_msg(MANDOCERR_ROFFLOOP,
1630 			    ln, (int)(stesc - buf->buf), NULL);
1631 			return ROFF_IGN;
1632 		}
1633 
1634 		/* Replace the escape sequence by the string. */
1635 
1636 		*stesc = '\0';
1637 		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1638 		    buf->buf, res, cp) + 1;
1639 
1640 		/* Prepare for the next replacement. */
1641 
1642 		start = nbuf + pos;
1643 		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1644 		free(buf->buf);
1645 		buf->buf = nbuf;
1646 	}
1647 	return ROFF_CONT;
1648 }
1649 
1650 /*
1651  * Parse a quoted or unquoted roff-style request or macro argument.
1652  * Return a pointer to the parsed argument, which is either the original
1653  * pointer or advanced by one byte in case the argument is quoted.
1654  * NUL-terminate the argument in place.
1655  * Collapse pairs of quotes inside quoted arguments.
1656  * Advance the argument pointer to the next argument,
1657  * or to the NUL byte terminating the argument line.
1658  */
1659 char *
1660 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1661 {
1662 	struct buf	 buf;
1663 	char		*cp, *start;
1664 	int		 newesc, pairs, quoted, white;
1665 
1666 	/* Quoting can only start with a new word. */
1667 	start = *cpp;
1668 	quoted = 0;
1669 	if ('"' == *start) {
1670 		quoted = 1;
1671 		start++;
1672 	}
1673 
1674 	newesc = pairs = white = 0;
1675 	for (cp = start; '\0' != *cp; cp++) {
1676 
1677 		/*
1678 		 * Move the following text left
1679 		 * after quoted quotes and after "\\" and "\t".
1680 		 */
1681 		if (pairs)
1682 			cp[-pairs] = cp[0];
1683 
1684 		if ('\\' == cp[0]) {
1685 			/*
1686 			 * In copy mode, translate double to single
1687 			 * backslashes and backslash-t to literal tabs.
1688 			 */
1689 			switch (cp[1]) {
1690 			case 'a':
1691 			case 't':
1692 				cp[-pairs] = '\t';
1693 				pairs++;
1694 				cp++;
1695 				break;
1696 			case '\\':
1697 				newesc = 1;
1698 				cp[-pairs] = ASCII_ESC;
1699 				pairs++;
1700 				cp++;
1701 				break;
1702 			case ' ':
1703 				/* Skip escaped blanks. */
1704 				if (0 == quoted)
1705 					cp++;
1706 				break;
1707 			default:
1708 				break;
1709 			}
1710 		} else if (0 == quoted) {
1711 			if (' ' == cp[0]) {
1712 				/* Unescaped blanks end unquoted args. */
1713 				white = 1;
1714 				break;
1715 			}
1716 		} else if ('"' == cp[0]) {
1717 			if ('"' == cp[1]) {
1718 				/* Quoted quotes collapse. */
1719 				pairs++;
1720 				cp++;
1721 			} else {
1722 				/* Unquoted quotes end quoted args. */
1723 				quoted = 2;
1724 				break;
1725 			}
1726 		}
1727 	}
1728 
1729 	/* Quoted argument without a closing quote. */
1730 	if (1 == quoted)
1731 		mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1732 
1733 	/* NUL-terminate this argument and move to the next one. */
1734 	if (pairs)
1735 		cp[-pairs] = '\0';
1736 	if ('\0' != *cp) {
1737 		*cp++ = '\0';
1738 		while (' ' == *cp)
1739 			cp++;
1740 	}
1741 	*pos += (int)(cp - start) + (quoted ? 1 : 0);
1742 	*cpp = cp;
1743 
1744 	if ('\0' == *cp && (white || ' ' == cp[-1]))
1745 		mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1746 
1747 	start = mandoc_strdup(start);
1748 	if (newesc == 0)
1749 		return start;
1750 
1751 	buf.buf = start;
1752 	buf.sz = strlen(start) + 1;
1753 	buf.next = NULL;
1754 	if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1755 		free(buf.buf);
1756 		buf.buf = mandoc_strdup("");
1757 	}
1758 	return buf.buf;
1759 }
1760 
1761 
1762 /*
1763  * Process text streams.
1764  */
1765 static int
1766 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1767 {
1768 	size_t		 sz;
1769 	const char	*start;
1770 	char		*p;
1771 	int		 isz;
1772 	enum mandoc_esc	 esc;
1773 
1774 	/* Spring the input line trap. */
1775 
1776 	if (roffit_lines == 1) {
1777 		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1778 		free(buf->buf);
1779 		buf->buf = p;
1780 		buf->sz = isz + 1;
1781 		*offs = 0;
1782 		free(roffit_macro);
1783 		roffit_lines = 0;
1784 		return ROFF_REPARSE;
1785 	} else if (roffit_lines > 1)
1786 		--roffit_lines;
1787 
1788 	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1789 		if (roffce_lines < 1) {
1790 			r->man->last = roffce_node;
1791 			r->man->next = ROFF_NEXT_SIBLING;
1792 			roffce_lines = 0;
1793 			roffce_node = NULL;
1794 		} else
1795 			roffce_lines--;
1796 	}
1797 
1798 	/* Convert all breakable hyphens into ASCII_HYPH. */
1799 
1800 	start = p = buf->buf + pos;
1801 
1802 	while (*p != '\0') {
1803 		sz = strcspn(p, "-\\");
1804 		p += sz;
1805 
1806 		if (*p == '\0')
1807 			break;
1808 
1809 		if (*p == '\\') {
1810 			/* Skip over escapes. */
1811 			p++;
1812 			esc = mandoc_escape((const char **)&p, NULL, NULL);
1813 			if (esc == ESCAPE_ERROR)
1814 				break;
1815 			while (*p == '-')
1816 				p++;
1817 			continue;
1818 		} else if (p == start) {
1819 			p++;
1820 			continue;
1821 		}
1822 
1823 		if (isalpha((unsigned char)p[-1]) &&
1824 		    isalpha((unsigned char)p[1]))
1825 			*p = ASCII_HYPH;
1826 		p++;
1827 	}
1828 	return ROFF_CONT;
1829 }
1830 
1831 int
1832 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1833 {
1834 	enum roff_tok	 t;
1835 	int		 e;
1836 	int		 pos;	/* parse point */
1837 	int		 spos;	/* saved parse point for messages */
1838 	int		 ppos;	/* original offset in buf->buf */
1839 	int		 ctl;	/* macro line (boolean) */
1840 
1841 	ppos = pos = *offs;
1842 
1843 	if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1844 	    (r->man->flags & ROFF_NOFILL) == 0 &&
1845 	    strchr(" .\\", buf->buf[pos]) == NULL &&
1846 	    buf->buf[pos] != r->control &&
1847 	    strcspn(buf->buf, " ") < 80)
1848 		mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1849 		    "%.20s...", buf->buf + pos);
1850 
1851 	/* Handle in-line equation delimiters. */
1852 
1853 	if (r->tbl == NULL &&
1854 	    r->last_eqn != NULL && r->last_eqn->delim &&
1855 	    (r->eqn == NULL || r->eqn_inline)) {
1856 		e = roff_eqndelim(r, buf, pos);
1857 		if (e == ROFF_REPARSE)
1858 			return e;
1859 		assert(e == ROFF_CONT);
1860 	}
1861 
1862 	/* Handle comments and escape sequences. */
1863 
1864 	e = roff_parse_comment(r, buf, ln, pos, r->escape);
1865 	if ((e & ROFF_MASK) == ROFF_IGN)
1866 		return e;
1867 	assert(e == ROFF_CONT);
1868 
1869 	e = roff_expand(r, buf, ln, pos, r->escape);
1870 	if ((e & ROFF_MASK) == ROFF_IGN)
1871 		return e;
1872 	assert(e == ROFF_CONT);
1873 
1874 	ctl = roff_getcontrol(r, buf->buf, &pos);
1875 
1876 	/*
1877 	 * First, if a scope is open and we're not a macro, pass the
1878 	 * text through the macro's filter.
1879 	 * Equations process all content themselves.
1880 	 * Tables process almost all content themselves, but we want
1881 	 * to warn about macros before passing it there.
1882 	 */
1883 
1884 	if (r->last != NULL && ! ctl) {
1885 		t = r->last->tok;
1886 		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1887 		if ((e & ROFF_MASK) == ROFF_IGN)
1888 			return e;
1889 		e &= ~ROFF_MASK;
1890 	} else
1891 		e = ROFF_IGN;
1892 	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1893 		eqn_read(r->eqn, buf->buf + ppos);
1894 		return e;
1895 	}
1896 	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1897 		tbl_read(r->tbl, ln, buf->buf, ppos);
1898 		roff_addtbl(r->man, ln, r->tbl);
1899 		return e;
1900 	}
1901 	if ( ! ctl) {
1902 		r->options &= ~MPARSE_COMMENT;
1903 		return roff_parsetext(r, buf, pos, offs) | e;
1904 	}
1905 
1906 	/* Skip empty request lines. */
1907 
1908 	if (buf->buf[pos] == '"') {
1909 		mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1910 		return ROFF_IGN;
1911 	} else if (buf->buf[pos] == '\0')
1912 		return ROFF_IGN;
1913 
1914 	/*
1915 	 * If a scope is open, go to the child handler for that macro,
1916 	 * as it may want to preprocess before doing anything with it.
1917 	 */
1918 
1919 	if (r->last) {
1920 		t = r->last->tok;
1921 		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1922 	}
1923 
1924 	r->options &= ~MPARSE_COMMENT;
1925 	spos = pos;
1926 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1927 	return roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
1928 }
1929 
1930 /*
1931  * Handle a new request or macro.
1932  * May be called outside any scope or from inside a conditional scope.
1933  */
1934 static int
1935 roff_req_or_macro(ROFF_ARGS) {
1936 
1937 	/* For now, tables ignore most macros and some request. */
1938 
1939 	if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS ||
1940 	    tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj ||
1941 	    tok == ROFF_sp)) {
1942 		mandoc_msg(MANDOCERR_TBLMACRO,
1943 		    ln, ppos, "%s", buf->buf + ppos);
1944 		if (tok != TOKEN_NONE)
1945 			return ROFF_IGN;
1946 		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1947 			pos++;
1948 		while (buf->buf[pos] == ' ')
1949 			pos++;
1950 		tbl_read(r->tbl, ln, buf->buf, pos);
1951 		roff_addtbl(r->man, ln, r->tbl);
1952 		return ROFF_IGN;
1953 	}
1954 
1955 	/* For now, let high level macros abort .ce mode. */
1956 
1957 	if (roffce_node != NULL &&
1958 	    (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ ||
1959 	     tok == ROFF_TH || tok == ROFF_TS)) {
1960 		r->man->last = roffce_node;
1961 		r->man->next = ROFF_NEXT_SIBLING;
1962 		roffce_lines = 0;
1963 		roffce_node = NULL;
1964 	}
1965 
1966 	/*
1967 	 * This is neither a roff request nor a user-defined macro.
1968 	 * Let the standard macro set parsers handle it.
1969 	 */
1970 
1971 	if (tok == TOKEN_NONE)
1972 		return ROFF_CONT;
1973 
1974 	/* Execute a roff request or a user-defined macro. */
1975 
1976 	return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs);
1977 }
1978 
1979 /*
1980  * Internal interface function to tell the roff parser that execution
1981  * of the current macro ended.  This is required because macro
1982  * definitions usually do not end with a .return request.
1983  */
1984 void
1985 roff_userret(struct roff *r)
1986 {
1987 	struct mctx	*ctx;
1988 	int		 i;
1989 
1990 	assert(r->mstackpos >= 0);
1991 	ctx = r->mstack + r->mstackpos;
1992 	for (i = 0; i < ctx->argc; i++)
1993 		free(ctx->argv[i]);
1994 	ctx->argc = 0;
1995 	r->mstackpos--;
1996 }
1997 
1998 void
1999 roff_endparse(struct roff *r)
2000 {
2001 	if (r->last != NULL)
2002 		mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
2003 		    r->last->col, "%s", roff_name[r->last->tok]);
2004 
2005 	if (r->eqn != NULL) {
2006 		mandoc_msg(MANDOCERR_BLK_NOEND,
2007 		    r->eqn->node->line, r->eqn->node->pos, "EQ");
2008 		eqn_parse(r->eqn);
2009 		r->eqn = NULL;
2010 	}
2011 
2012 	if (r->tbl != NULL) {
2013 		tbl_end(r->tbl, 1);
2014 		r->tbl = NULL;
2015 	}
2016 }
2017 
2018 /*
2019  * Parse the request or macro name at buf[*pos].
2020  * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value.
2021  * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE.
2022  * As a side effect, set r->current_string to the definition or to NULL.
2023  */
2024 static enum roff_tok
2025 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
2026 {
2027 	char		*cp;
2028 	const char	*mac;
2029 	size_t		 maclen;
2030 	int		 deftype;
2031 	enum roff_tok	 t;
2032 
2033 	cp = buf + *pos;
2034 
2035 	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2036 		return TOKEN_NONE;
2037 
2038 	mac = cp;
2039 	maclen = roff_getname(r, &cp, ln, ppos);
2040 
2041 	deftype = ROFFDEF_USER | ROFFDEF_REN;
2042 	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2043 	switch (deftype) {
2044 	case ROFFDEF_USER:
2045 		t = ROFF_USERDEF;
2046 		break;
2047 	case ROFFDEF_REN:
2048 		t = ROFF_RENAMED;
2049 		break;
2050 	default:
2051 		t = roffhash_find(r->reqtab, mac, maclen);
2052 		break;
2053 	}
2054 	if (t != TOKEN_NONE)
2055 		*pos = cp - buf;
2056 	else if (deftype == ROFFDEF_UNDEF) {
2057 		/* Using an undefined macro defines it to be empty. */
2058 		roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2059 		roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2060 	}
2061 	return t;
2062 }
2063 
2064 /* --- handling of request blocks ----------------------------------------- */
2065 
2066 /*
2067  * Close a macro definition block or an "ignore" block.
2068  */
2069 static int
2070 roff_cblock(ROFF_ARGS)
2071 {
2072 	int	 rr;
2073 
2074 	if (r->last == NULL) {
2075 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2076 		return ROFF_IGN;
2077 	}
2078 
2079 	switch (r->last->tok) {
2080 	case ROFF_am:
2081 	case ROFF_ami:
2082 	case ROFF_de:
2083 	case ROFF_dei:
2084 	case ROFF_ig:
2085 		break;
2086 	case ROFF_am1:
2087 	case ROFF_de1:
2088 		/* Remapped in roff_block(). */
2089 		abort();
2090 	default:
2091 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2092 		return ROFF_IGN;
2093 	}
2094 
2095 	roffnode_pop(r);
2096 	roffnode_cleanscope(r);
2097 
2098 	/*
2099 	 * If a conditional block with braces is still open,
2100 	 * check for "\}" block end markers.
2101 	 */
2102 
2103 	if (r->last != NULL && r->last->endspan < 0) {
2104 		rr = 1;  /* If arguments follow "\}", warn about them. */
2105 		roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2106 	}
2107 
2108 	if (buf->buf[pos] != '\0')
2109 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2110 		    ".. %s", buf->buf + pos);
2111 
2112 	return ROFF_IGN;
2113 }
2114 
2115 /*
2116  * Pop all nodes ending at the end of the current input line.
2117  * Return the number of loops ended.
2118  */
2119 static int
2120 roffnode_cleanscope(struct roff *r)
2121 {
2122 	int inloop;
2123 
2124 	inloop = 0;
2125 	while (r->last != NULL && r->last->endspan > 0) {
2126 		if (--r->last->endspan != 0)
2127 			break;
2128 		inloop += roffnode_pop(r);
2129 	}
2130 	return inloop;
2131 }
2132 
2133 /*
2134  * Handle the closing "\}" of a conditional block.
2135  * Apart from generating warnings, this only pops nodes.
2136  * Return the number of loops ended.
2137  */
2138 static int
2139 roff_ccond(struct roff *r, int ln, int ppos)
2140 {
2141 	if (NULL == r->last) {
2142 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2143 		return 0;
2144 	}
2145 
2146 	switch (r->last->tok) {
2147 	case ROFF_el:
2148 	case ROFF_ie:
2149 	case ROFF_if:
2150 	case ROFF_while:
2151 		break;
2152 	default:
2153 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2154 		return 0;
2155 	}
2156 
2157 	if (r->last->endspan > -1) {
2158 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2159 		return 0;
2160 	}
2161 
2162 	return roffnode_pop(r) + roffnode_cleanscope(r);
2163 }
2164 
2165 static int
2166 roff_block(ROFF_ARGS)
2167 {
2168 	const char	*name, *value;
2169 	char		*call, *cp, *iname, *rname;
2170 	size_t		 csz, namesz, rsz;
2171 	int		 deftype;
2172 
2173 	/* Ignore groff compatibility mode for now. */
2174 
2175 	if (tok == ROFF_de1)
2176 		tok = ROFF_de;
2177 	else if (tok == ROFF_dei1)
2178 		tok = ROFF_dei;
2179 	else if (tok == ROFF_am1)
2180 		tok = ROFF_am;
2181 	else if (tok == ROFF_ami1)
2182 		tok = ROFF_ami;
2183 
2184 	/* Parse the macro name argument. */
2185 
2186 	cp = buf->buf + pos;
2187 	if (tok == ROFF_ig) {
2188 		iname = NULL;
2189 		namesz = 0;
2190 	} else {
2191 		iname = cp;
2192 		namesz = roff_getname(r, &cp, ln, ppos);
2193 		iname[namesz] = '\0';
2194 	}
2195 
2196 	/* Resolve the macro name argument if it is indirect. */
2197 
2198 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2199 		deftype = ROFFDEF_USER;
2200 		name = roff_getstrn(r, iname, namesz, &deftype);
2201 		if (name == NULL) {
2202 			mandoc_msg(MANDOCERR_STR_UNDEF,
2203 			    ln, (int)(iname - buf->buf),
2204 			    "%.*s", (int)namesz, iname);
2205 			namesz = 0;
2206 		} else
2207 			namesz = strlen(name);
2208 	} else
2209 		name = iname;
2210 
2211 	if (namesz == 0 && tok != ROFF_ig) {
2212 		mandoc_msg(MANDOCERR_REQ_EMPTY,
2213 		    ln, ppos, "%s", roff_name[tok]);
2214 		return ROFF_IGN;
2215 	}
2216 
2217 	roffnode_push(r, tok, name, ln, ppos);
2218 
2219 	/*
2220 	 * At the beginning of a `de' macro, clear the existing string
2221 	 * with the same name, if there is one.  New content will be
2222 	 * appended from roff_block_text() in multiline mode.
2223 	 */
2224 
2225 	if (tok == ROFF_de || tok == ROFF_dei) {
2226 		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2227 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2228 	} else if (tok == ROFF_am || tok == ROFF_ami) {
2229 		deftype = ROFFDEF_ANY;
2230 		value = roff_getstrn(r, iname, namesz, &deftype);
2231 		switch (deftype) {  /* Before appending, ... */
2232 		case ROFFDEF_PRE: /* copy predefined to user-defined. */
2233 			roff_setstrn(&r->strtab, name, namesz,
2234 			    value, strlen(value), 0);
2235 			break;
2236 		case ROFFDEF_REN: /* call original standard macro. */
2237 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2238 			    (int)strlen(value), value);
2239 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2240 			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2241 			free(call);
2242 			break;
2243 		case ROFFDEF_STD:  /* rename and call standard macro. */
2244 			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2245 			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2246 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2247 			    (int)rsz, rname);
2248 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2249 			free(call);
2250 			free(rname);
2251 			break;
2252 		default:
2253 			break;
2254 		}
2255 	}
2256 
2257 	if (*cp == '\0')
2258 		return ROFF_IGN;
2259 
2260 	/* Get the custom end marker. */
2261 
2262 	iname = cp;
2263 	namesz = roff_getname(r, &cp, ln, ppos);
2264 
2265 	/* Resolve the end marker if it is indirect. */
2266 
2267 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2268 		deftype = ROFFDEF_USER;
2269 		name = roff_getstrn(r, iname, namesz, &deftype);
2270 		if (name == NULL) {
2271 			mandoc_msg(MANDOCERR_STR_UNDEF,
2272 			    ln, (int)(iname - buf->buf),
2273 			    "%.*s", (int)namesz, iname);
2274 			namesz = 0;
2275 		} else
2276 			namesz = strlen(name);
2277 	} else
2278 		name = iname;
2279 
2280 	if (namesz)
2281 		r->last->end = mandoc_strndup(name, namesz);
2282 
2283 	if (*cp != '\0')
2284 		mandoc_msg(MANDOCERR_ARG_EXCESS,
2285 		    ln, pos, ".%s ... %s", roff_name[tok], cp);
2286 
2287 	return ROFF_IGN;
2288 }
2289 
2290 static int
2291 roff_block_sub(ROFF_ARGS)
2292 {
2293 	enum roff_tok	t;
2294 	int		i, j;
2295 
2296 	/*
2297 	 * If a custom end marker is a user-defined or predefined macro
2298 	 * or a request, interpret it.
2299 	 */
2300 
2301 	if (r->last->end) {
2302 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
2303 			if (buf->buf[i] != r->last->end[j])
2304 				break;
2305 
2306 		if (r->last->end[j] == '\0' &&
2307 		    (buf->buf[i] == '\0' ||
2308 		     buf->buf[i] == ' ' ||
2309 		     buf->buf[i] == '\t')) {
2310 			roffnode_pop(r);
2311 			roffnode_cleanscope(r);
2312 
2313 			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2314 				i++;
2315 
2316 			pos = i;
2317 			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2318 			    TOKEN_NONE)
2319 				return ROFF_RERUN;
2320 			return ROFF_IGN;
2321 		}
2322 	}
2323 
2324 	/* Handle the standard end marker. */
2325 
2326 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2327 	if (t == ROFF_cblock)
2328 		return roff_cblock(r, t, buf, ln, ppos, pos, offs);
2329 
2330 	/* Not an end marker, so append the line to the block. */
2331 
2332 	if (tok != ROFF_ig)
2333 		roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2334 	return ROFF_IGN;
2335 }
2336 
2337 static int
2338 roff_block_text(ROFF_ARGS)
2339 {
2340 
2341 	if (tok != ROFF_ig)
2342 		roff_setstr(r, r->last->name, buf->buf + pos, 2);
2343 
2344 	return ROFF_IGN;
2345 }
2346 
2347 /*
2348  * Check for a closing "\}" and handle it.
2349  * In this function, the final "int *offs" argument is used for
2350  * different purposes than elsewhere:
2351  * Input: *offs == 0: caller wants to discard arguments following \}
2352  *        *offs == 1: caller wants to preserve text following \}
2353  * Output: *offs = 0: tell caller to discard input line
2354  *         *offs = 1: tell caller to use input line
2355  */
2356 static int
2357 roff_cond_checkend(ROFF_ARGS)
2358 {
2359 	char		*ep;
2360 	int		 endloop, irc, rr;
2361 
2362 	irc = ROFF_IGN;
2363 	rr = r->last->rule;
2364 	endloop = tok != ROFF_while ? ROFF_IGN :
2365 	    rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2366 	if (roffnode_cleanscope(r))
2367 		irc |= endloop;
2368 
2369 	/*
2370 	 * If "\}" occurs on a macro line without a preceding macro or
2371 	 * a text line contains nothing else, drop the line completely.
2372 	 */
2373 
2374 	ep = buf->buf + pos;
2375 	if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2376 		rr = 0;
2377 
2378 	/*
2379 	 * The closing delimiter "\}" rewinds the conditional scope
2380 	 * but is otherwise ignored when interpreting the line.
2381 	 */
2382 
2383 	while ((ep = strchr(ep, '\\')) != NULL) {
2384 		switch (ep[1]) {
2385 		case '}':
2386 			if (ep[2] == '\0')
2387 				ep[0] = '\0';
2388 			else if (rr)
2389 				ep[1] = '&';
2390 			else
2391 				memmove(ep, ep + 2, strlen(ep + 2) + 1);
2392 			if (roff_ccond(r, ln, ep - buf->buf))
2393 				irc |= endloop;
2394 			break;
2395 		case '\0':
2396 			++ep;
2397 			break;
2398 		default:
2399 			ep += 2;
2400 			break;
2401 		}
2402 	}
2403 	*offs = rr;
2404 	return irc;
2405 }
2406 
2407 /*
2408  * Parse and process a request or macro line in conditional scope.
2409  */
2410 static int
2411 roff_cond_sub(ROFF_ARGS)
2412 {
2413 	struct roffnode	*bl;
2414 	int		 irc, rr, spos;
2415 	enum roff_tok	 t;
2416 
2417 	rr = 0;  /* If arguments follow "\}", skip them. */
2418 	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2419 	spos = pos;
2420 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2421 
2422 	/*
2423 	 * Handle requests and macros if the conditional evaluated
2424 	 * to true or if they are structurally required.
2425 	 * The .break request is always handled specially.
2426 	 */
2427 
2428 	if (t == ROFF_break) {
2429 		if (irc & ROFF_LOOPMASK)
2430 			irc = ROFF_IGN | ROFF_LOOPEXIT;
2431 		else if (rr) {
2432 			for (bl = r->last; bl != NULL; bl = bl->parent) {
2433 				bl->rule = 0;
2434 				if (bl->tok == ROFF_while)
2435 					break;
2436 			}
2437 		}
2438 	} else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) {
2439 		irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
2440 		if (irc & ROFF_WHILE)
2441 			irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
2442 	}
2443 	return irc;
2444 }
2445 
2446 /*
2447  * Parse and process a text line in conditional scope.
2448  */
2449 static int
2450 roff_cond_text(ROFF_ARGS)
2451 {
2452 	int	 irc, rr;
2453 
2454 	rr = 1;  /* If arguments follow "\}", preserve them. */
2455 	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2456 	if (rr)
2457 		irc |= ROFF_CONT;
2458 	return irc;
2459 }
2460 
2461 /* --- handling of numeric and conditional expressions -------------------- */
2462 
2463 /*
2464  * Parse a single signed integer number.  Stop at the first non-digit.
2465  * If there is at least one digit, return success and advance the
2466  * parse point, else return failure and let the parse point unchanged.
2467  * Ignore overflows, treat them just like the C language.
2468  */
2469 static int
2470 roff_getnum(const char *v, int *pos, int *res, int flags)
2471 {
2472 	int	 myres, scaled, n, p;
2473 
2474 	if (NULL == res)
2475 		res = &myres;
2476 
2477 	p = *pos;
2478 	n = v[p] == '-';
2479 	if (n || v[p] == '+')
2480 		p++;
2481 
2482 	if (flags & ROFFNUM_WHITE)
2483 		while (isspace((unsigned char)v[p]))
2484 			p++;
2485 
2486 	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2487 		*res = 10 * *res + v[p] - '0';
2488 	if (p == *pos + n)
2489 		return 0;
2490 
2491 	if (n)
2492 		*res = -*res;
2493 
2494 	/* Each number may be followed by one optional scaling unit. */
2495 
2496 	switch (v[p]) {
2497 	case 'f':
2498 		scaled = *res * 65536;
2499 		break;
2500 	case 'i':
2501 		scaled = *res * 240;
2502 		break;
2503 	case 'c':
2504 		scaled = *res * 240 / 2.54;
2505 		break;
2506 	case 'v':
2507 	case 'P':
2508 		scaled = *res * 40;
2509 		break;
2510 	case 'm':
2511 	case 'n':
2512 		scaled = *res * 24;
2513 		break;
2514 	case 'p':
2515 		scaled = *res * 10 / 3;
2516 		break;
2517 	case 'u':
2518 		scaled = *res;
2519 		break;
2520 	case 'M':
2521 		scaled = *res * 6 / 25;
2522 		break;
2523 	default:
2524 		scaled = *res;
2525 		p--;
2526 		break;
2527 	}
2528 	if (flags & ROFFNUM_SCALE)
2529 		*res = scaled;
2530 
2531 	*pos = p + 1;
2532 	return 1;
2533 }
2534 
2535 /*
2536  * Evaluate a string comparison condition.
2537  * The first character is the delimiter.
2538  * Succeed if the string up to its second occurrence
2539  * matches the string up to its third occurence.
2540  * Advance the cursor after the third occurrence
2541  * or lacking that, to the end of the line.
2542  */
2543 static int
2544 roff_evalstrcond(const char *v, int *pos)
2545 {
2546 	const char	*s1, *s2, *s3;
2547 	int		 match;
2548 
2549 	match = 0;
2550 	s1 = v + *pos;		/* initial delimiter */
2551 	s2 = s1 + 1;		/* for scanning the first string */
2552 	s3 = strchr(s2, *s1);	/* for scanning the second string */
2553 
2554 	if (NULL == s3)		/* found no middle delimiter */
2555 		goto out;
2556 
2557 	while ('\0' != *++s3) {
2558 		if (*s2 != *s3) {  /* mismatch */
2559 			s3 = strchr(s3, *s1);
2560 			break;
2561 		}
2562 		if (*s3 == *s1) {  /* found the final delimiter */
2563 			match = 1;
2564 			break;
2565 		}
2566 		s2++;
2567 	}
2568 
2569 out:
2570 	if (NULL == s3)
2571 		s3 = strchr(s2, '\0');
2572 	else if (*s3 != '\0')
2573 		s3++;
2574 	*pos = s3 - v;
2575 	return match;
2576 }
2577 
2578 /*
2579  * Evaluate an optionally negated single character, numerical,
2580  * or string condition.
2581  */
2582 static int
2583 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2584 {
2585 	const char	*start, *end;
2586 	char		*cp, *name;
2587 	size_t		 sz;
2588 	int		 deftype, len, number, savepos, istrue, wanttrue;
2589 
2590 	if ('!' == v[*pos]) {
2591 		wanttrue = 0;
2592 		(*pos)++;
2593 	} else
2594 		wanttrue = 1;
2595 
2596 	switch (v[*pos]) {
2597 	case '\0':
2598 		return 0;
2599 	case 'n':
2600 	case 'o':
2601 		(*pos)++;
2602 		return wanttrue;
2603 	case 'e':
2604 	case 't':
2605 	case 'v':
2606 		(*pos)++;
2607 		return !wanttrue;
2608 	case 'c':
2609 		do {
2610 			(*pos)++;
2611 		} while (v[*pos] == ' ');
2612 
2613 		/*
2614 		 * Quirk for groff compatibility:
2615 		 * The horizontal tab is neither available nor unavailable.
2616 		 */
2617 
2618 		if (v[*pos] == '\t') {
2619 			(*pos)++;
2620 			return 0;
2621 		}
2622 
2623 		/* Printable ASCII characters are available. */
2624 
2625 		if (v[*pos] != '\\') {
2626 			(*pos)++;
2627 			return wanttrue;
2628 		}
2629 
2630 		end = v + ++*pos;
2631 		switch (mandoc_escape(&end, &start, &len)) {
2632 		case ESCAPE_SPECIAL:
2633 			istrue = mchars_spec2cp(start, len) != -1;
2634 			break;
2635 		case ESCAPE_UNICODE:
2636 			istrue = 1;
2637 			break;
2638 		case ESCAPE_NUMBERED:
2639 			istrue = mchars_num2char(start, len) != -1;
2640 			break;
2641 		default:
2642 			istrue = !wanttrue;
2643 			break;
2644 		}
2645 		*pos = end - v;
2646 		return istrue == wanttrue;
2647 	case 'd':
2648 	case 'r':
2649 		cp = v + *pos + 1;
2650 		while (*cp == ' ')
2651 			cp++;
2652 		name = cp;
2653 		sz = roff_getname(r, &cp, ln, cp - v);
2654 		if (sz == 0)
2655 			istrue = 0;
2656 		else if (v[*pos] == 'r')
2657 			istrue = roff_hasregn(r, name, sz);
2658 		else {
2659 			deftype = ROFFDEF_ANY;
2660 		        roff_getstrn(r, name, sz, &deftype);
2661 			istrue = !!deftype;
2662 		}
2663 		*pos = (name + sz) - v;
2664 		return istrue == wanttrue;
2665 	default:
2666 		break;
2667 	}
2668 
2669 	savepos = *pos;
2670 	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2671 		return (number > 0) == wanttrue;
2672 	else if (*pos == savepos)
2673 		return roff_evalstrcond(v, pos) == wanttrue;
2674 	else
2675 		return 0;
2676 }
2677 
2678 static int
2679 roff_line_ignore(ROFF_ARGS)
2680 {
2681 
2682 	return ROFF_IGN;
2683 }
2684 
2685 static int
2686 roff_insec(ROFF_ARGS)
2687 {
2688 
2689 	mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2690 	return ROFF_IGN;
2691 }
2692 
2693 static int
2694 roff_unsupp(ROFF_ARGS)
2695 {
2696 
2697 	mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2698 	return ROFF_IGN;
2699 }
2700 
2701 static int
2702 roff_cond(ROFF_ARGS)
2703 {
2704 	int	 irc;
2705 
2706 	roffnode_push(r, tok, NULL, ln, ppos);
2707 
2708 	/*
2709 	 * An `.el' has no conditional body: it will consume the value
2710 	 * of the current rstack entry set in prior `ie' calls or
2711 	 * defaults to DENY.
2712 	 *
2713 	 * If we're not an `el', however, then evaluate the conditional.
2714 	 */
2715 
2716 	r->last->rule = tok == ROFF_el ?
2717 	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2718 	    roff_evalcond(r, ln, buf->buf, &pos);
2719 
2720 	/*
2721 	 * An if-else will put the NEGATION of the current evaluated
2722 	 * conditional into the stack of rules.
2723 	 */
2724 
2725 	if (tok == ROFF_ie) {
2726 		if (r->rstackpos + 1 == r->rstacksz) {
2727 			r->rstacksz += 16;
2728 			r->rstack = mandoc_reallocarray(r->rstack,
2729 			    r->rstacksz, sizeof(int));
2730 		}
2731 		r->rstack[++r->rstackpos] = !r->last->rule;
2732 	}
2733 
2734 	/* If the parent has false as its rule, then so do we. */
2735 
2736 	if (r->last->parent && !r->last->parent->rule)
2737 		r->last->rule = 0;
2738 
2739 	/*
2740 	 * Determine scope.
2741 	 * If there is nothing on the line after the conditional,
2742 	 * not even whitespace, use next-line scope.
2743 	 * Except that .while does not support next-line scope.
2744 	 */
2745 
2746 	if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2747 		r->last->endspan = 2;
2748 		goto out;
2749 	}
2750 
2751 	while (buf->buf[pos] == ' ')
2752 		pos++;
2753 
2754 	/* An opening brace requests multiline scope. */
2755 
2756 	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2757 		r->last->endspan = -1;
2758 		pos += 2;
2759 		while (buf->buf[pos] == ' ')
2760 			pos++;
2761 		goto out;
2762 	}
2763 
2764 	/*
2765 	 * Anything else following the conditional causes
2766 	 * single-line scope.  Warn if the scope contains
2767 	 * nothing but trailing whitespace.
2768 	 */
2769 
2770 	if (buf->buf[pos] == '\0')
2771 		mandoc_msg(MANDOCERR_COND_EMPTY,
2772 		    ln, ppos, "%s", roff_name[tok]);
2773 
2774 	r->last->endspan = 1;
2775 
2776 out:
2777 	*offs = pos;
2778 	irc = ROFF_RERUN;
2779 	if (tok == ROFF_while)
2780 		irc |= ROFF_WHILE;
2781 	return irc;
2782 }
2783 
2784 static int
2785 roff_ds(ROFF_ARGS)
2786 {
2787 	char		*string;
2788 	const char	*name;
2789 	size_t		 namesz;
2790 
2791 	/* Ignore groff compatibility mode for now. */
2792 
2793 	if (tok == ROFF_ds1)
2794 		tok = ROFF_ds;
2795 	else if (tok == ROFF_as1)
2796 		tok = ROFF_as;
2797 
2798 	/*
2799 	 * The first word is the name of the string.
2800 	 * If it is empty or terminated by an escape sequence,
2801 	 * abort the `ds' request without defining anything.
2802 	 */
2803 
2804 	name = string = buf->buf + pos;
2805 	if (*name == '\0')
2806 		return ROFF_IGN;
2807 
2808 	namesz = roff_getname(r, &string, ln, pos);
2809 	switch (name[namesz]) {
2810 	case '\\':
2811 		return ROFF_IGN;
2812 	case '\t':
2813 		string = buf->buf + pos + namesz;
2814 		break;
2815 	default:
2816 		break;
2817 	}
2818 
2819 	/* Read past the initial double-quote, if any. */
2820 	if (*string == '"')
2821 		string++;
2822 
2823 	/* The rest is the value. */
2824 	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2825 	    ROFF_as == tok);
2826 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2827 	return ROFF_IGN;
2828 }
2829 
2830 /*
2831  * Parse a single operator, one or two characters long.
2832  * If the operator is recognized, return success and advance the
2833  * parse point, else return failure and let the parse point unchanged.
2834  */
2835 static int
2836 roff_getop(const char *v, int *pos, char *res)
2837 {
2838 
2839 	*res = v[*pos];
2840 
2841 	switch (*res) {
2842 	case '+':
2843 	case '-':
2844 	case '*':
2845 	case '/':
2846 	case '%':
2847 	case '&':
2848 	case ':':
2849 		break;
2850 	case '<':
2851 		switch (v[*pos + 1]) {
2852 		case '=':
2853 			*res = 'l';
2854 			(*pos)++;
2855 			break;
2856 		case '>':
2857 			*res = '!';
2858 			(*pos)++;
2859 			break;
2860 		case '?':
2861 			*res = 'i';
2862 			(*pos)++;
2863 			break;
2864 		default:
2865 			break;
2866 		}
2867 		break;
2868 	case '>':
2869 		switch (v[*pos + 1]) {
2870 		case '=':
2871 			*res = 'g';
2872 			(*pos)++;
2873 			break;
2874 		case '?':
2875 			*res = 'a';
2876 			(*pos)++;
2877 			break;
2878 		default:
2879 			break;
2880 		}
2881 		break;
2882 	case '=':
2883 		if ('=' == v[*pos + 1])
2884 			(*pos)++;
2885 		break;
2886 	default:
2887 		return 0;
2888 	}
2889 	(*pos)++;
2890 
2891 	return *res;
2892 }
2893 
2894 /*
2895  * Evaluate either a parenthesized numeric expression
2896  * or a single signed integer number.
2897  */
2898 static int
2899 roff_evalpar(struct roff *r, int ln,
2900 	const char *v, int *pos, int *res, int flags)
2901 {
2902 
2903 	if ('(' != v[*pos])
2904 		return roff_getnum(v, pos, res, flags);
2905 
2906 	(*pos)++;
2907 	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2908 		return 0;
2909 
2910 	/*
2911 	 * Omission of the closing parenthesis
2912 	 * is an error in validation mode,
2913 	 * but ignored in evaluation mode.
2914 	 */
2915 
2916 	if (')' == v[*pos])
2917 		(*pos)++;
2918 	else if (NULL == res)
2919 		return 0;
2920 
2921 	return 1;
2922 }
2923 
2924 /*
2925  * Evaluate a complete numeric expression.
2926  * Proceed left to right, there is no concept of precedence.
2927  */
2928 static int
2929 roff_evalnum(struct roff *r, int ln, const char *v,
2930 	int *pos, int *res, int flags)
2931 {
2932 	int		 mypos, operand2;
2933 	char		 operator;
2934 
2935 	if (NULL == pos) {
2936 		mypos = 0;
2937 		pos = &mypos;
2938 	}
2939 
2940 	if (flags & ROFFNUM_WHITE)
2941 		while (isspace((unsigned char)v[*pos]))
2942 			(*pos)++;
2943 
2944 	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2945 		return 0;
2946 
2947 	while (1) {
2948 		if (flags & ROFFNUM_WHITE)
2949 			while (isspace((unsigned char)v[*pos]))
2950 				(*pos)++;
2951 
2952 		if ( ! roff_getop(v, pos, &operator))
2953 			break;
2954 
2955 		if (flags & ROFFNUM_WHITE)
2956 			while (isspace((unsigned char)v[*pos]))
2957 				(*pos)++;
2958 
2959 		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2960 			return 0;
2961 
2962 		if (flags & ROFFNUM_WHITE)
2963 			while (isspace((unsigned char)v[*pos]))
2964 				(*pos)++;
2965 
2966 		if (NULL == res)
2967 			continue;
2968 
2969 		switch (operator) {
2970 		case '+':
2971 			*res += operand2;
2972 			break;
2973 		case '-':
2974 			*res -= operand2;
2975 			break;
2976 		case '*':
2977 			*res *= operand2;
2978 			break;
2979 		case '/':
2980 			if (operand2 == 0) {
2981 				mandoc_msg(MANDOCERR_DIVZERO,
2982 					ln, *pos, "%s", v);
2983 				*res = 0;
2984 				break;
2985 			}
2986 			*res /= operand2;
2987 			break;
2988 		case '%':
2989 			if (operand2 == 0) {
2990 				mandoc_msg(MANDOCERR_DIVZERO,
2991 					ln, *pos, "%s", v);
2992 				*res = 0;
2993 				break;
2994 			}
2995 			*res %= operand2;
2996 			break;
2997 		case '<':
2998 			*res = *res < operand2;
2999 			break;
3000 		case '>':
3001 			*res = *res > operand2;
3002 			break;
3003 		case 'l':
3004 			*res = *res <= operand2;
3005 			break;
3006 		case 'g':
3007 			*res = *res >= operand2;
3008 			break;
3009 		case '=':
3010 			*res = *res == operand2;
3011 			break;
3012 		case '!':
3013 			*res = *res != operand2;
3014 			break;
3015 		case '&':
3016 			*res = *res && operand2;
3017 			break;
3018 		case ':':
3019 			*res = *res || operand2;
3020 			break;
3021 		case 'i':
3022 			if (operand2 < *res)
3023 				*res = operand2;
3024 			break;
3025 		case 'a':
3026 			if (operand2 > *res)
3027 				*res = operand2;
3028 			break;
3029 		default:
3030 			abort();
3031 		}
3032 	}
3033 	return 1;
3034 }
3035 
3036 /* --- register management ------------------------------------------------ */
3037 
3038 void
3039 roff_setreg(struct roff *r, const char *name, int val, char sign)
3040 {
3041 	roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3042 }
3043 
3044 static void
3045 roff_setregn(struct roff *r, const char *name, size_t len,
3046     int val, char sign, int step)
3047 {
3048 	struct roffreg	*reg;
3049 
3050 	/* Search for an existing register with the same name. */
3051 	reg = r->regtab;
3052 
3053 	while (reg != NULL && (reg->key.sz != len ||
3054 	    strncmp(reg->key.p, name, len) != 0))
3055 		reg = reg->next;
3056 
3057 	if (NULL == reg) {
3058 		/* Create a new register. */
3059 		reg = mandoc_malloc(sizeof(struct roffreg));
3060 		reg->key.p = mandoc_strndup(name, len);
3061 		reg->key.sz = len;
3062 		reg->val = 0;
3063 		reg->step = 0;
3064 		reg->next = r->regtab;
3065 		r->regtab = reg;
3066 	}
3067 
3068 	if ('+' == sign)
3069 		reg->val += val;
3070 	else if ('-' == sign)
3071 		reg->val -= val;
3072 	else
3073 		reg->val = val;
3074 	if (step != INT_MIN)
3075 		reg->step = step;
3076 }
3077 
3078 /*
3079  * Handle some predefined read-only number registers.
3080  * For now, return -1 if the requested register is not predefined;
3081  * in case a predefined read-only register having the value -1
3082  * were to turn up, another special value would have to be chosen.
3083  */
3084 static int
3085 roff_getregro(const struct roff *r, const char *name)
3086 {
3087 
3088 	switch (*name) {
3089 	case '$':  /* Number of arguments of the last macro evaluated. */
3090 		return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3091 	case 'A':  /* ASCII approximation mode is always off. */
3092 		return 0;
3093 	case 'g':  /* Groff compatibility mode is always on. */
3094 		return 1;
3095 	case 'H':  /* Fixed horizontal resolution. */
3096 		return 24;
3097 	case 'j':  /* Always adjust left margin only. */
3098 		return 0;
3099 	case 'T':  /* Some output device is always defined. */
3100 		return 1;
3101 	case 'V':  /* Fixed vertical resolution. */
3102 		return 40;
3103 	default:
3104 		return -1;
3105 	}
3106 }
3107 
3108 int
3109 roff_getreg(struct roff *r, const char *name)
3110 {
3111 	return roff_getregn(r, name, strlen(name), '\0');
3112 }
3113 
3114 static int
3115 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3116 {
3117 	struct roffreg	*reg;
3118 	int		 val;
3119 
3120 	if ('.' == name[0] && 2 == len) {
3121 		val = roff_getregro(r, name + 1);
3122 		if (-1 != val)
3123 			return val;
3124 	}
3125 
3126 	for (reg = r->regtab; reg; reg = reg->next) {
3127 		if (len == reg->key.sz &&
3128 		    0 == strncmp(name, reg->key.p, len)) {
3129 			switch (sign) {
3130 			case '+':
3131 				reg->val += reg->step;
3132 				break;
3133 			case '-':
3134 				reg->val -= reg->step;
3135 				break;
3136 			default:
3137 				break;
3138 			}
3139 			return reg->val;
3140 		}
3141 	}
3142 
3143 	roff_setregn(r, name, len, 0, '\0', INT_MIN);
3144 	return 0;
3145 }
3146 
3147 static int
3148 roff_hasregn(const struct roff *r, const char *name, size_t len)
3149 {
3150 	struct roffreg	*reg;
3151 	int		 val;
3152 
3153 	if ('.' == name[0] && 2 == len) {
3154 		val = roff_getregro(r, name + 1);
3155 		if (-1 != val)
3156 			return 1;
3157 	}
3158 
3159 	for (reg = r->regtab; reg; reg = reg->next)
3160 		if (len == reg->key.sz &&
3161 		    0 == strncmp(name, reg->key.p, len))
3162 			return 1;
3163 
3164 	return 0;
3165 }
3166 
3167 static void
3168 roff_freereg(struct roffreg *reg)
3169 {
3170 	struct roffreg	*old_reg;
3171 
3172 	while (NULL != reg) {
3173 		free(reg->key.p);
3174 		old_reg = reg;
3175 		reg = reg->next;
3176 		free(old_reg);
3177 	}
3178 }
3179 
3180 static int
3181 roff_nr(ROFF_ARGS)
3182 {
3183 	char		*key, *val, *step;
3184 	size_t		 keysz;
3185 	int		 iv, is, len;
3186 	char		 sign;
3187 
3188 	key = val = buf->buf + pos;
3189 	if (*key == '\0')
3190 		return ROFF_IGN;
3191 
3192 	keysz = roff_getname(r, &val, ln, pos);
3193 	if (key[keysz] == '\\' || key[keysz] == '\t')
3194 		return ROFF_IGN;
3195 
3196 	sign = *val;
3197 	if (sign == '+' || sign == '-')
3198 		val++;
3199 
3200 	len = 0;
3201 	if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3202 		return ROFF_IGN;
3203 
3204 	step = val + len;
3205 	while (isspace((unsigned char)*step))
3206 		step++;
3207 	if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3208 		is = INT_MIN;
3209 
3210 	roff_setregn(r, key, keysz, iv, sign, is);
3211 	return ROFF_IGN;
3212 }
3213 
3214 static int
3215 roff_rr(ROFF_ARGS)
3216 {
3217 	struct roffreg	*reg, **prev;
3218 	char		*name, *cp;
3219 	size_t		 namesz;
3220 
3221 	name = cp = buf->buf + pos;
3222 	if (*name == '\0')
3223 		return ROFF_IGN;
3224 	namesz = roff_getname(r, &cp, ln, pos);
3225 	name[namesz] = '\0';
3226 
3227 	prev = &r->regtab;
3228 	while (1) {
3229 		reg = *prev;
3230 		if (reg == NULL || !strcmp(name, reg->key.p))
3231 			break;
3232 		prev = &reg->next;
3233 	}
3234 	if (reg != NULL) {
3235 		*prev = reg->next;
3236 		free(reg->key.p);
3237 		free(reg);
3238 	}
3239 	return ROFF_IGN;
3240 }
3241 
3242 /* --- handler functions for roff requests -------------------------------- */
3243 
3244 static int
3245 roff_rm(ROFF_ARGS)
3246 {
3247 	const char	 *name;
3248 	char		 *cp;
3249 	size_t		  namesz;
3250 
3251 	cp = buf->buf + pos;
3252 	while (*cp != '\0') {
3253 		name = cp;
3254 		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3255 		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3256 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3257 		if (name[namesz] == '\\' || name[namesz] == '\t')
3258 			break;
3259 	}
3260 	return ROFF_IGN;
3261 }
3262 
3263 static int
3264 roff_it(ROFF_ARGS)
3265 {
3266 	int		 iv;
3267 
3268 	/* Parse the number of lines. */
3269 
3270 	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3271 		mandoc_msg(MANDOCERR_IT_NONUM,
3272 		    ln, ppos, "%s", buf->buf + 1);
3273 		return ROFF_IGN;
3274 	}
3275 
3276 	while (isspace((unsigned char)buf->buf[pos]))
3277 		pos++;
3278 
3279 	/*
3280 	 * Arm the input line trap.
3281 	 * Special-casing "an-trap" is an ugly workaround to cope
3282 	 * with DocBook stupidly fiddling with man(7) internals.
3283 	 */
3284 
3285 	roffit_lines = iv;
3286 	roffit_macro = mandoc_strdup(iv != 1 ||
3287 	    strcmp(buf->buf + pos, "an-trap") ?
3288 	    buf->buf + pos : "br");
3289 	return ROFF_IGN;
3290 }
3291 
3292 static int
3293 roff_Dd(ROFF_ARGS)
3294 {
3295 	int		 mask;
3296 	enum roff_tok	 t, te;
3297 
3298 	switch (tok) {
3299 	case ROFF_Dd:
3300 		tok = MDOC_Dd;
3301 		te = MDOC_MAX;
3302 		if (r->format == 0)
3303 			r->format = MPARSE_MDOC;
3304 		mask = MPARSE_MDOC | MPARSE_QUICK;
3305 		break;
3306 	case ROFF_TH:
3307 		tok = MAN_TH;
3308 		te = MAN_MAX;
3309 		if (r->format == 0)
3310 			r->format = MPARSE_MAN;
3311 		mask = MPARSE_QUICK;
3312 		break;
3313 	default:
3314 		abort();
3315 	}
3316 	if ((r->options & mask) == 0)
3317 		for (t = tok; t < te; t++)
3318 			roff_setstr(r, roff_name[t], NULL, 0);
3319 	return ROFF_CONT;
3320 }
3321 
3322 static int
3323 roff_TE(ROFF_ARGS)
3324 {
3325 	r->man->flags &= ~ROFF_NONOFILL;
3326 	if (r->tbl == NULL) {
3327 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3328 		return ROFF_IGN;
3329 	}
3330 	if (tbl_end(r->tbl, 0) == 0) {
3331 		r->tbl = NULL;
3332 		free(buf->buf);
3333 		buf->buf = mandoc_strdup(".sp");
3334 		buf->sz = 4;
3335 		*offs = 0;
3336 		return ROFF_REPARSE;
3337 	}
3338 	r->tbl = NULL;
3339 	return ROFF_IGN;
3340 }
3341 
3342 static int
3343 roff_T_(ROFF_ARGS)
3344 {
3345 
3346 	if (NULL == r->tbl)
3347 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3348 	else
3349 		tbl_restart(ln, ppos, r->tbl);
3350 
3351 	return ROFF_IGN;
3352 }
3353 
3354 /*
3355  * Handle in-line equation delimiters.
3356  */
3357 static int
3358 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3359 {
3360 	char		*cp1, *cp2;
3361 	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3362 
3363 	/*
3364 	 * Outside equations, look for an opening delimiter.
3365 	 * If we are inside an equation, we already know it is
3366 	 * in-line, or this function wouldn't have been called;
3367 	 * so look for a closing delimiter.
3368 	 */
3369 
3370 	cp1 = buf->buf + pos;
3371 	cp2 = strchr(cp1, r->eqn == NULL ?
3372 	    r->last_eqn->odelim : r->last_eqn->cdelim);
3373 	if (cp2 == NULL)
3374 		return ROFF_CONT;
3375 
3376 	*cp2++ = '\0';
3377 	bef_pr = bef_nl = aft_nl = aft_pr = "";
3378 
3379 	/* Handle preceding text, protecting whitespace. */
3380 
3381 	if (*buf->buf != '\0') {
3382 		if (r->eqn == NULL)
3383 			bef_pr = "\\&";
3384 		bef_nl = "\n";
3385 	}
3386 
3387 	/*
3388 	 * Prepare replacing the delimiter with an equation macro
3389 	 * and drop leading white space from the equation.
3390 	 */
3391 
3392 	if (r->eqn == NULL) {
3393 		while (*cp2 == ' ')
3394 			cp2++;
3395 		mac = ".EQ";
3396 	} else
3397 		mac = ".EN";
3398 
3399 	/* Handle following text, protecting whitespace. */
3400 
3401 	if (*cp2 != '\0') {
3402 		aft_nl = "\n";
3403 		if (r->eqn != NULL)
3404 			aft_pr = "\\&";
3405 	}
3406 
3407 	/* Do the actual replacement. */
3408 
3409 	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3410 	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3411 	free(buf->buf);
3412 	buf->buf = cp1;
3413 
3414 	/* Toggle the in-line state of the eqn subsystem. */
3415 
3416 	r->eqn_inline = r->eqn == NULL;
3417 	return ROFF_REPARSE;
3418 }
3419 
3420 static int
3421 roff_EQ(ROFF_ARGS)
3422 {
3423 	struct roff_node	*n;
3424 
3425 	if (r->man->meta.macroset == MACROSET_MAN)
3426 		man_breakscope(r->man, ROFF_EQ);
3427 	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3428 	if (ln > r->man->last->line)
3429 		n->flags |= NODE_LINE;
3430 	n->eqn = eqn_box_new();
3431 	roff_node_append(r->man, n);
3432 	r->man->next = ROFF_NEXT_SIBLING;
3433 
3434 	assert(r->eqn == NULL);
3435 	if (r->last_eqn == NULL)
3436 		r->last_eqn = eqn_alloc();
3437 	else
3438 		eqn_reset(r->last_eqn);
3439 	r->eqn = r->last_eqn;
3440 	r->eqn->node = n;
3441 
3442 	if (buf->buf[pos] != '\0')
3443 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3444 		    ".EQ %s", buf->buf + pos);
3445 
3446 	return ROFF_IGN;
3447 }
3448 
3449 static int
3450 roff_EN(ROFF_ARGS)
3451 {
3452 	if (r->eqn != NULL) {
3453 		eqn_parse(r->eqn);
3454 		r->eqn = NULL;
3455 	} else
3456 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3457 	if (buf->buf[pos] != '\0')
3458 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3459 		    "EN %s", buf->buf + pos);
3460 	return ROFF_IGN;
3461 }
3462 
3463 static int
3464 roff_TS(ROFF_ARGS)
3465 {
3466 	if (r->tbl != NULL) {
3467 		mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3468 		tbl_end(r->tbl, 0);
3469 	}
3470 	r->man->flags |= ROFF_NONOFILL;
3471 	r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3472 	if (r->last_tbl == NULL)
3473 		r->first_tbl = r->tbl;
3474 	r->last_tbl = r->tbl;
3475 	return ROFF_IGN;
3476 }
3477 
3478 static int
3479 roff_noarg(ROFF_ARGS)
3480 {
3481 	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3482 		man_breakscope(r->man, tok);
3483 	if (tok == ROFF_brp)
3484 		tok = ROFF_br;
3485 	roff_elem_alloc(r->man, ln, ppos, tok);
3486 	if (buf->buf[pos] != '\0')
3487 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3488 		   "%s %s", roff_name[tok], buf->buf + pos);
3489 	if (tok == ROFF_nf)
3490 		r->man->flags |= ROFF_NOFILL;
3491 	else if (tok == ROFF_fi)
3492 		r->man->flags &= ~ROFF_NOFILL;
3493 	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3494 	r->man->next = ROFF_NEXT_SIBLING;
3495 	return ROFF_IGN;
3496 }
3497 
3498 static int
3499 roff_onearg(ROFF_ARGS)
3500 {
3501 	struct roff_node	*n;
3502 	char			*cp;
3503 	int			 npos;
3504 
3505 	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3506 	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3507 	     tok == ROFF_ti))
3508 		man_breakscope(r->man, tok);
3509 
3510 	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3511 		r->man->last = roffce_node;
3512 		r->man->next = ROFF_NEXT_SIBLING;
3513 	}
3514 
3515 	roff_elem_alloc(r->man, ln, ppos, tok);
3516 	n = r->man->last;
3517 
3518 	cp = buf->buf + pos;
3519 	if (*cp != '\0') {
3520 		while (*cp != '\0' && *cp != ' ')
3521 			cp++;
3522 		while (*cp == ' ')
3523 			*cp++ = '\0';
3524 		if (*cp != '\0')
3525 			mandoc_msg(MANDOCERR_ARG_EXCESS,
3526 			    ln, (int)(cp - buf->buf),
3527 			    "%s ... %s", roff_name[tok], cp);
3528 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3529 	}
3530 
3531 	if (tok == ROFF_ce || tok == ROFF_rj) {
3532 		if (r->man->last->type == ROFFT_ELEM) {
3533 			roff_word_alloc(r->man, ln, pos, "1");
3534 			r->man->last->flags |= NODE_NOSRC;
3535 		}
3536 		npos = 0;
3537 		if (roff_evalnum(r, ln, r->man->last->string, &npos,
3538 		    &roffce_lines, 0) == 0) {
3539 			mandoc_msg(MANDOCERR_CE_NONUM,
3540 			    ln, pos, "ce %s", buf->buf + pos);
3541 			roffce_lines = 1;
3542 		}
3543 		if (roffce_lines < 1) {
3544 			r->man->last = r->man->last->parent;
3545 			roffce_node = NULL;
3546 			roffce_lines = 0;
3547 		} else
3548 			roffce_node = r->man->last->parent;
3549 	} else {
3550 		n->flags |= NODE_VALID | NODE_ENDED;
3551 		r->man->last = n;
3552 	}
3553 	n->flags |= NODE_LINE;
3554 	r->man->next = ROFF_NEXT_SIBLING;
3555 	return ROFF_IGN;
3556 }
3557 
3558 static int
3559 roff_manyarg(ROFF_ARGS)
3560 {
3561 	struct roff_node	*n;
3562 	char			*sp, *ep;
3563 
3564 	roff_elem_alloc(r->man, ln, ppos, tok);
3565 	n = r->man->last;
3566 
3567 	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3568 		while (*ep != '\0' && *ep != ' ')
3569 			ep++;
3570 		while (*ep == ' ')
3571 			*ep++ = '\0';
3572 		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3573 	}
3574 
3575 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3576 	r->man->last = n;
3577 	r->man->next = ROFF_NEXT_SIBLING;
3578 	return ROFF_IGN;
3579 }
3580 
3581 static int
3582 roff_als(ROFF_ARGS)
3583 {
3584 	char		*oldn, *newn, *end, *value;
3585 	size_t		 oldsz, newsz, valsz;
3586 
3587 	newn = oldn = buf->buf + pos;
3588 	if (*newn == '\0')
3589 		return ROFF_IGN;
3590 
3591 	newsz = roff_getname(r, &oldn, ln, pos);
3592 	if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3593 		return ROFF_IGN;
3594 
3595 	end = oldn;
3596 	oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3597 	if (oldsz == 0)
3598 		return ROFF_IGN;
3599 
3600 	valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3601 	    (int)oldsz, oldn);
3602 	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3603 	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3604 	free(value);
3605 	return ROFF_IGN;
3606 }
3607 
3608 /*
3609  * The .break request only makes sense inside conditionals,
3610  * and that case is already handled in roff_cond_sub().
3611  */
3612 static int
3613 roff_break(ROFF_ARGS)
3614 {
3615 	mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3616 	return ROFF_IGN;
3617 }
3618 
3619 static int
3620 roff_cc(ROFF_ARGS)
3621 {
3622 	const char	*p;
3623 
3624 	p = buf->buf + pos;
3625 
3626 	if (*p == '\0' || (r->control = *p++) == '.')
3627 		r->control = '\0';
3628 
3629 	if (*p != '\0')
3630 		mandoc_msg(MANDOCERR_ARG_EXCESS,
3631 		    ln, p - buf->buf, "cc ... %s", p);
3632 
3633 	return ROFF_IGN;
3634 }
3635 
3636 static int
3637 roff_char(ROFF_ARGS)
3638 {
3639 	const char	*p, *kp, *vp;
3640 	size_t		 ksz, vsz;
3641 	int		 font;
3642 
3643 	/* Parse the character to be replaced. */
3644 
3645 	kp = buf->buf + pos;
3646 	p = kp + 1;
3647 	if (*kp == '\0' || (*kp == '\\' &&
3648 	     mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3649 	    (*p != ' ' && *p != '\0')) {
3650 		mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3651 		return ROFF_IGN;
3652 	}
3653 	ksz = p - kp;
3654 	while (*p == ' ')
3655 		p++;
3656 
3657 	/*
3658 	 * If the replacement string contains a font escape sequence,
3659 	 * we have to restore the font at the end.
3660 	 */
3661 
3662 	vp = p;
3663 	vsz = strlen(p);
3664 	font = 0;
3665 	while (*p != '\0') {
3666 		if (*p++ != '\\')
3667 			continue;
3668 		switch (mandoc_escape(&p, NULL, NULL)) {
3669 		case ESCAPE_FONT:
3670 		case ESCAPE_FONTROMAN:
3671 		case ESCAPE_FONTITALIC:
3672 		case ESCAPE_FONTBOLD:
3673 		case ESCAPE_FONTBI:
3674 		case ESCAPE_FONTCR:
3675 		case ESCAPE_FONTCB:
3676 		case ESCAPE_FONTCI:
3677 		case ESCAPE_FONTPREV:
3678 			font++;
3679 			break;
3680 		default:
3681 			break;
3682 		}
3683 	}
3684 	if (font > 1)
3685 		mandoc_msg(MANDOCERR_CHAR_FONT,
3686 		    ln, (int)(vp - buf->buf), "%s", vp);
3687 
3688 	/*
3689 	 * Approximate the effect of .char using the .tr tables.
3690 	 * XXX In groff, .char and .tr interact differently.
3691 	 */
3692 
3693 	if (ksz == 1) {
3694 		if (r->xtab == NULL)
3695 			r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3696 		assert((unsigned int)*kp < 128);
3697 		free(r->xtab[(int)*kp].p);
3698 		r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3699 		    "%s%s", vp, font ? "\fP" : "");
3700 	} else {
3701 		roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3702 		if (font)
3703 			roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3704 	}
3705 	return ROFF_IGN;
3706 }
3707 
3708 static int
3709 roff_ec(ROFF_ARGS)
3710 {
3711 	const char	*p;
3712 
3713 	p = buf->buf + pos;
3714 	if (*p == '\0')
3715 		r->escape = '\\';
3716 	else {
3717 		r->escape = *p;
3718 		if (*++p != '\0')
3719 			mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3720 			    (int)(p - buf->buf), "ec ... %s", p);
3721 	}
3722 	return ROFF_IGN;
3723 }
3724 
3725 static int
3726 roff_eo(ROFF_ARGS)
3727 {
3728 	r->escape = '\0';
3729 	if (buf->buf[pos] != '\0')
3730 		mandoc_msg(MANDOCERR_ARG_SKIP,
3731 		    ln, pos, "eo %s", buf->buf + pos);
3732 	return ROFF_IGN;
3733 }
3734 
3735 static int
3736 roff_mc(ROFF_ARGS)
3737 {
3738 	struct roff_node	*n;
3739 	char			*cp;
3740 
3741 	/* Parse the first argument. */
3742 
3743 	cp = buf->buf + pos;
3744 	if (*cp != '\0')
3745 		cp++;
3746 	if (buf->buf[pos] == '\\') {
3747 		switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
3748 		case ESCAPE_SPECIAL:
3749 		case ESCAPE_UNICODE:
3750 		case ESCAPE_NUMBERED:
3751 			break;
3752 		default:
3753 			*cp = '\0';
3754 			mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
3755 			    "mc %s", buf->buf + pos);
3756 			buf->buf[pos] = '\0';
3757 			break;
3758 		}
3759 	}
3760 
3761 	/* Ignore additional arguments. */
3762 
3763 	while (*cp == ' ')
3764 		*cp++ = '\0';
3765 	if (*cp != '\0') {
3766 		mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
3767 		    "mc ... %s", cp);
3768 		*cp = '\0';
3769 	}
3770 
3771 	/* Create the .mc node. */
3772 
3773 	roff_elem_alloc(r->man, ln, ppos, tok);
3774 	n = r->man->last;
3775 	if (buf->buf[pos] != '\0')
3776 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3777 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3778 	r->man->last = n;
3779 	r->man->next = ROFF_NEXT_SIBLING;
3780 	return ROFF_IGN;
3781 }
3782 
3783 static int
3784 roff_nop(ROFF_ARGS)
3785 {
3786 	while (buf->buf[pos] == ' ')
3787 		pos++;
3788 	*offs = pos;
3789 	return ROFF_RERUN;
3790 }
3791 
3792 static int
3793 roff_tr(ROFF_ARGS)
3794 {
3795 	const char	*p, *first, *second;
3796 	size_t		 fsz, ssz;
3797 	enum mandoc_esc	 esc;
3798 
3799 	p = buf->buf + pos;
3800 
3801 	if (*p == '\0') {
3802 		mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3803 		return ROFF_IGN;
3804 	}
3805 
3806 	while (*p != '\0') {
3807 		fsz = ssz = 1;
3808 
3809 		first = p++;
3810 		if (*first == '\\') {
3811 			esc = mandoc_escape(&p, NULL, NULL);
3812 			if (esc == ESCAPE_ERROR) {
3813 				mandoc_msg(MANDOCERR_ESC_BAD, ln,
3814 				    (int)(p - buf->buf), "%s", first);
3815 				return ROFF_IGN;
3816 			}
3817 			fsz = (size_t)(p - first);
3818 		}
3819 
3820 		second = p++;
3821 		if (*second == '\\') {
3822 			esc = mandoc_escape(&p, NULL, NULL);
3823 			if (esc == ESCAPE_ERROR) {
3824 				mandoc_msg(MANDOCERR_ESC_BAD, ln,
3825 				    (int)(p - buf->buf), "%s", second);
3826 				return ROFF_IGN;
3827 			}
3828 			ssz = (size_t)(p - second);
3829 		} else if (*second == '\0') {
3830 			mandoc_msg(MANDOCERR_TR_ODD, ln,
3831 			    (int)(first - buf->buf), "tr %s", first);
3832 			second = " ";
3833 			p--;
3834 		}
3835 
3836 		if (fsz > 1) {
3837 			roff_setstrn(&r->xmbtab, first, fsz,
3838 			    second, ssz, 0);
3839 			continue;
3840 		}
3841 
3842 		if (r->xtab == NULL)
3843 			r->xtab = mandoc_calloc(128,
3844 			    sizeof(struct roffstr));
3845 
3846 		free(r->xtab[(int)*first].p);
3847 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3848 		r->xtab[(int)*first].sz = ssz;
3849 	}
3850 
3851 	return ROFF_IGN;
3852 }
3853 
3854 /*
3855  * Implementation of the .return request.
3856  * There is no need to call roff_userret() from here.
3857  * The read module will call that after rewinding the reader stack
3858  * to the place from where the current macro was called.
3859  */
3860 static int
3861 roff_return(ROFF_ARGS)
3862 {
3863 	if (r->mstackpos >= 0)
3864 		return ROFF_IGN | ROFF_USERRET;
3865 
3866 	mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3867 	return ROFF_IGN;
3868 }
3869 
3870 static int
3871 roff_rn(ROFF_ARGS)
3872 {
3873 	const char	*value;
3874 	char		*oldn, *newn, *end;
3875 	size_t		 oldsz, newsz;
3876 	int		 deftype;
3877 
3878 	oldn = newn = buf->buf + pos;
3879 	if (*oldn == '\0')
3880 		return ROFF_IGN;
3881 
3882 	oldsz = roff_getname(r, &newn, ln, pos);
3883 	if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3884 		return ROFF_IGN;
3885 
3886 	end = newn;
3887 	newsz = roff_getname(r, &end, ln, newn - buf->buf);
3888 	if (newsz == 0)
3889 		return ROFF_IGN;
3890 
3891 	deftype = ROFFDEF_ANY;
3892 	value = roff_getstrn(r, oldn, oldsz, &deftype);
3893 	switch (deftype) {
3894 	case ROFFDEF_USER:
3895 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3896 		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3897 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3898 		break;
3899 	case ROFFDEF_PRE:
3900 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3901 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3902 		break;
3903 	case ROFFDEF_REN:
3904 		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3905 		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3906 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3907 		break;
3908 	case ROFFDEF_STD:
3909 		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3910 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3911 		break;
3912 	default:
3913 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3914 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3915 		break;
3916 	}
3917 	return ROFF_IGN;
3918 }
3919 
3920 static int
3921 roff_shift(ROFF_ARGS)
3922 {
3923 	struct mctx	*ctx;
3924 	int		 argpos, levels, i;
3925 
3926 	argpos = pos;
3927 	levels = 1;
3928 	if (buf->buf[pos] != '\0' &&
3929 	    roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3930 		mandoc_msg(MANDOCERR_CE_NONUM,
3931 		    ln, pos, "shift %s", buf->buf + pos);
3932 		levels = 1;
3933 	}
3934 	if (r->mstackpos < 0) {
3935 		mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3936 		return ROFF_IGN;
3937 	}
3938 	ctx = r->mstack + r->mstackpos;
3939 	if (levels > ctx->argc) {
3940 		mandoc_msg(MANDOCERR_SHIFT,
3941 		    ln, argpos, "%d, but max is %d", levels, ctx->argc);
3942 		levels = ctx->argc;
3943 	}
3944 	if (levels < 0) {
3945 		mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3946 		levels = 0;
3947 	}
3948 	if (levels == 0)
3949 		return ROFF_IGN;
3950 	for (i = 0; i < levels; i++)
3951 		free(ctx->argv[i]);
3952 	ctx->argc -= levels;
3953 	for (i = 0; i < ctx->argc; i++)
3954 		ctx->argv[i] = ctx->argv[i + levels];
3955 	return ROFF_IGN;
3956 }
3957 
3958 static int
3959 roff_so(ROFF_ARGS)
3960 {
3961 	char *name, *cp;
3962 
3963 	name = buf->buf + pos;
3964 	mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3965 
3966 	/*
3967 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3968 	 * opening anything that's not in our cwd or anything beneath
3969 	 * it.  Thus, explicitly disallow traversing up the file-system
3970 	 * or using absolute paths.
3971 	 */
3972 
3973 	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3974 		mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3975 		buf->sz = mandoc_asprintf(&cp,
3976 		    ".sp\nSee the file %s.\n.sp", name) + 1;
3977 		free(buf->buf);
3978 		buf->buf = cp;
3979 		*offs = 0;
3980 		return ROFF_REPARSE;
3981 	}
3982 
3983 	*offs = pos;
3984 	return ROFF_SO;
3985 }
3986 
3987 /* --- user defined strings and macros ------------------------------------ */
3988 
3989 static int
3990 roff_userdef(ROFF_ARGS)
3991 {
3992 	struct mctx	 *ctx;
3993 	char		 *arg, *ap, *dst, *src;
3994 	size_t		  sz;
3995 
3996 	/* If the macro is empty, ignore it altogether. */
3997 
3998 	if (*r->current_string == '\0')
3999 		return ROFF_IGN;
4000 
4001 	/* Initialize a new macro stack context. */
4002 
4003 	if (++r->mstackpos == r->mstacksz) {
4004 		r->mstack = mandoc_recallocarray(r->mstack,
4005 		    r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
4006 		r->mstacksz += 8;
4007 	}
4008 	ctx = r->mstack + r->mstackpos;
4009 	ctx->argc = 0;
4010 
4011 	/*
4012 	 * Collect pointers to macro argument strings,
4013 	 * NUL-terminating them and escaping quotes.
4014 	 */
4015 
4016 	src = buf->buf + pos;
4017 	while (*src != '\0') {
4018 		if (ctx->argc == ctx->argsz) {
4019 			ctx->argsz += 8;
4020 			ctx->argv = mandoc_reallocarray(ctx->argv,
4021 			    ctx->argsz, sizeof(*ctx->argv));
4022 		}
4023 		arg = roff_getarg(r, &src, ln, &pos);
4024 		sz = 1;  /* For the terminating NUL. */
4025 		for (ap = arg; *ap != '\0'; ap++)
4026 			sz += *ap == '"' ? 4 : 1;
4027 		ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
4028 		for (ap = arg; *ap != '\0'; ap++) {
4029 			if (*ap == '"') {
4030 				memcpy(dst, "\\(dq", 4);
4031 				dst += 4;
4032 			} else
4033 				*dst++ = *ap;
4034 		}
4035 		*dst = '\0';
4036 		free(arg);
4037 	}
4038 
4039 	/* Replace the macro invocation by the macro definition. */
4040 
4041 	free(buf->buf);
4042 	buf->buf = mandoc_strdup(r->current_string);
4043 	buf->sz = strlen(buf->buf) + 1;
4044 	*offs = 0;
4045 
4046 	return buf->buf[buf->sz - 2] == '\n' ?
4047 	    ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
4048 }
4049 
4050 /*
4051  * Calling a high-level macro that was renamed with .rn.
4052  * r->current_string has already been set up by roff_parse().
4053  */
4054 static int
4055 roff_renamed(ROFF_ARGS)
4056 {
4057 	char	*nbuf;
4058 
4059 	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4060 	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4061 	free(buf->buf);
4062 	buf->buf = nbuf;
4063 	*offs = 0;
4064 	return ROFF_CONT;
4065 }
4066 
4067 /*
4068  * Measure the length in bytes of the roff identifier at *cpp
4069  * and advance the pointer to the next word.
4070  */
4071 static size_t
4072 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4073 {
4074 	char	 *name, *cp;
4075 	size_t	  namesz;
4076 
4077 	name = *cpp;
4078 	if (*name == '\0')
4079 		return 0;
4080 
4081 	/* Advance cp to the byte after the end of the name. */
4082 
4083 	for (cp = name; 1; cp++) {
4084 		namesz = cp - name;
4085 		if (*cp == '\0')
4086 			break;
4087 		if (*cp == ' ' || *cp == '\t') {
4088 			cp++;
4089 			break;
4090 		}
4091 		if (*cp != '\\')
4092 			continue;
4093 		if (cp[1] == '{' || cp[1] == '}')
4094 			break;
4095 		if (*++cp == '\\')
4096 			continue;
4097 		mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4098 		    "%.*s", (int)(cp - name + 1), name);
4099 		mandoc_escape((const char **)&cp, NULL, NULL);
4100 		break;
4101 	}
4102 
4103 	/* Read past spaces. */
4104 
4105 	while (*cp == ' ')
4106 		cp++;
4107 
4108 	*cpp = cp;
4109 	return namesz;
4110 }
4111 
4112 /*
4113  * Store *string into the user-defined string called *name.
4114  * To clear an existing entry, call with (*r, *name, NULL, 0).
4115  * append == 0: replace mode
4116  * append == 1: single-line append mode
4117  * append == 2: multiline append mode, append '\n' after each call
4118  */
4119 static void
4120 roff_setstr(struct roff *r, const char *name, const char *string,
4121 	int append)
4122 {
4123 	size_t	 namesz;
4124 
4125 	namesz = strlen(name);
4126 	roff_setstrn(&r->strtab, name, namesz, string,
4127 	    string ? strlen(string) : 0, append);
4128 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4129 }
4130 
4131 static void
4132 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4133 		const char *string, size_t stringsz, int append)
4134 {
4135 	struct roffkv	*n;
4136 	char		*c;
4137 	int		 i;
4138 	size_t		 oldch, newch;
4139 
4140 	/* Search for an existing string with the same name. */
4141 	n = *r;
4142 
4143 	while (n && (namesz != n->key.sz ||
4144 			strncmp(n->key.p, name, namesz)))
4145 		n = n->next;
4146 
4147 	if (NULL == n) {
4148 		/* Create a new string table entry. */
4149 		n = mandoc_malloc(sizeof(struct roffkv));
4150 		n->key.p = mandoc_strndup(name, namesz);
4151 		n->key.sz = namesz;
4152 		n->val.p = NULL;
4153 		n->val.sz = 0;
4154 		n->next = *r;
4155 		*r = n;
4156 	} else if (0 == append) {
4157 		free(n->val.p);
4158 		n->val.p = NULL;
4159 		n->val.sz = 0;
4160 	}
4161 
4162 	if (NULL == string)
4163 		return;
4164 
4165 	/*
4166 	 * One additional byte for the '\n' in multiline mode,
4167 	 * and one for the terminating '\0'.
4168 	 */
4169 	newch = stringsz + (1 < append ? 2u : 1u);
4170 
4171 	if (NULL == n->val.p) {
4172 		n->val.p = mandoc_malloc(newch);
4173 		*n->val.p = '\0';
4174 		oldch = 0;
4175 	} else {
4176 		oldch = n->val.sz;
4177 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4178 	}
4179 
4180 	/* Skip existing content in the destination buffer. */
4181 	c = n->val.p + (int)oldch;
4182 
4183 	/* Append new content to the destination buffer. */
4184 	i = 0;
4185 	while (i < (int)stringsz) {
4186 		/*
4187 		 * Rudimentary roff copy mode:
4188 		 * Handle escaped backslashes.
4189 		 */
4190 		if ('\\' == string[i] && '\\' == string[i + 1])
4191 			i++;
4192 		*c++ = string[i++];
4193 	}
4194 
4195 	/* Append terminating bytes. */
4196 	if (1 < append)
4197 		*c++ = '\n';
4198 
4199 	*c = '\0';
4200 	n->val.sz = (int)(c - n->val.p);
4201 }
4202 
4203 static const char *
4204 roff_getstrn(struct roff *r, const char *name, size_t len,
4205     int *deftype)
4206 {
4207 	const struct roffkv	*n;
4208 	int			 found, i;
4209 	enum roff_tok		 tok;
4210 
4211 	found = 0;
4212 	for (n = r->strtab; n != NULL; n = n->next) {
4213 		if (strncmp(name, n->key.p, len) != 0 ||
4214 		    n->key.p[len] != '\0' || n->val.p == NULL)
4215 			continue;
4216 		if (*deftype & ROFFDEF_USER) {
4217 			*deftype = ROFFDEF_USER;
4218 			return n->val.p;
4219 		} else {
4220 			found = 1;
4221 			break;
4222 		}
4223 	}
4224 	for (n = r->rentab; n != NULL; n = n->next) {
4225 		if (strncmp(name, n->key.p, len) != 0 ||
4226 		    n->key.p[len] != '\0' || n->val.p == NULL)
4227 			continue;
4228 		if (*deftype & ROFFDEF_REN) {
4229 			*deftype = ROFFDEF_REN;
4230 			return n->val.p;
4231 		} else {
4232 			found = 1;
4233 			break;
4234 		}
4235 	}
4236 	for (i = 0; i < PREDEFS_MAX; i++) {
4237 		if (strncmp(name, predefs[i].name, len) != 0 ||
4238 		    predefs[i].name[len] != '\0')
4239 			continue;
4240 		if (*deftype & ROFFDEF_PRE) {
4241 			*deftype = ROFFDEF_PRE;
4242 			return predefs[i].str;
4243 		} else {
4244 			found = 1;
4245 			break;
4246 		}
4247 	}
4248 	if (r->man->meta.macroset != MACROSET_MAN) {
4249 		for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4250 			if (strncmp(name, roff_name[tok], len) != 0 ||
4251 			    roff_name[tok][len] != '\0')
4252 				continue;
4253 			if (*deftype & ROFFDEF_STD) {
4254 				*deftype = ROFFDEF_STD;
4255 				return NULL;
4256 			} else {
4257 				found = 1;
4258 				break;
4259 			}
4260 		}
4261 	}
4262 	if (r->man->meta.macroset != MACROSET_MDOC) {
4263 		for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4264 			if (strncmp(name, roff_name[tok], len) != 0 ||
4265 			    roff_name[tok][len] != '\0')
4266 				continue;
4267 			if (*deftype & ROFFDEF_STD) {
4268 				*deftype = ROFFDEF_STD;
4269 				return NULL;
4270 			} else {
4271 				found = 1;
4272 				break;
4273 			}
4274 		}
4275 	}
4276 
4277 	if (found == 0 && *deftype != ROFFDEF_ANY) {
4278 		if (*deftype & ROFFDEF_REN) {
4279 			/*
4280 			 * This might still be a request,
4281 			 * so do not treat it as undefined yet.
4282 			 */
4283 			*deftype = ROFFDEF_UNDEF;
4284 			return NULL;
4285 		}
4286 
4287 		/* Using an undefined string defines it to be empty. */
4288 
4289 		roff_setstrn(&r->strtab, name, len, "", 0, 0);
4290 		roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4291 	}
4292 
4293 	*deftype = 0;
4294 	return NULL;
4295 }
4296 
4297 static void
4298 roff_freestr(struct roffkv *r)
4299 {
4300 	struct roffkv	 *n, *nn;
4301 
4302 	for (n = r; n; n = nn) {
4303 		free(n->key.p);
4304 		free(n->val.p);
4305 		nn = n->next;
4306 		free(n);
4307 	}
4308 }
4309 
4310 /* --- accessors and utility functions ------------------------------------ */
4311 
4312 /*
4313  * Duplicate an input string, making the appropriate character
4314  * conversations (as stipulated by `tr') along the way.
4315  * Returns a heap-allocated string with all the replacements made.
4316  */
4317 char *
4318 roff_strdup(const struct roff *r, const char *p)
4319 {
4320 	const struct roffkv *cp;
4321 	char		*res;
4322 	const char	*pp;
4323 	size_t		 ssz, sz;
4324 	enum mandoc_esc	 esc;
4325 
4326 	if (NULL == r->xmbtab && NULL == r->xtab)
4327 		return mandoc_strdup(p);
4328 	else if ('\0' == *p)
4329 		return mandoc_strdup("");
4330 
4331 	/*
4332 	 * Step through each character looking for term matches
4333 	 * (remember that a `tr' can be invoked with an escape, which is
4334 	 * a glyph but the escape is multi-character).
4335 	 * We only do this if the character hash has been initialised
4336 	 * and the string is >0 length.
4337 	 */
4338 
4339 	res = NULL;
4340 	ssz = 0;
4341 
4342 	while ('\0' != *p) {
4343 		assert((unsigned int)*p < 128);
4344 		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4345 			sz = r->xtab[(int)*p].sz;
4346 			res = mandoc_realloc(res, ssz + sz + 1);
4347 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4348 			ssz += sz;
4349 			p++;
4350 			continue;
4351 		} else if ('\\' != *p) {
4352 			res = mandoc_realloc(res, ssz + 2);
4353 			res[ssz++] = *p++;
4354 			continue;
4355 		}
4356 
4357 		/* Search for term matches. */
4358 		for (cp = r->xmbtab; cp; cp = cp->next)
4359 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
4360 				break;
4361 
4362 		if (NULL != cp) {
4363 			/*
4364 			 * A match has been found.
4365 			 * Append the match to the array and move
4366 			 * forward by its keysize.
4367 			 */
4368 			res = mandoc_realloc(res,
4369 			    ssz + cp->val.sz + 1);
4370 			memcpy(res + ssz, cp->val.p, cp->val.sz);
4371 			ssz += cp->val.sz;
4372 			p += (int)cp->key.sz;
4373 			continue;
4374 		}
4375 
4376 		/*
4377 		 * Handle escapes carefully: we need to copy
4378 		 * over just the escape itself, or else we might
4379 		 * do replacements within the escape itself.
4380 		 * Make sure to pass along the bogus string.
4381 		 */
4382 		pp = p++;
4383 		esc = mandoc_escape(&p, NULL, NULL);
4384 		if (ESCAPE_ERROR == esc) {
4385 			sz = strlen(pp);
4386 			res = mandoc_realloc(res, ssz + sz + 1);
4387 			memcpy(res + ssz, pp, sz);
4388 			break;
4389 		}
4390 		/*
4391 		 * We bail out on bad escapes.
4392 		 * No need to warn: we already did so when
4393 		 * roff_expand() was called.
4394 		 */
4395 		sz = (int)(p - pp);
4396 		res = mandoc_realloc(res, ssz + sz + 1);
4397 		memcpy(res + ssz, pp, sz);
4398 		ssz += sz;
4399 	}
4400 
4401 	res[(int)ssz] = '\0';
4402 	return res;
4403 }
4404 
4405 int
4406 roff_getformat(const struct roff *r)
4407 {
4408 
4409 	return r->format;
4410 }
4411 
4412 /*
4413  * Find out whether a line is a macro line or not.
4414  * If it is, adjust the current position and return one; if it isn't,
4415  * return zero and don't change the current position.
4416  * If the control character has been set with `.cc', then let that grain
4417  * precedence.
4418  * This is slighly contrary to groff, where using the non-breaking
4419  * control character when `cc' has been invoked will cause the
4420  * non-breaking macro contents to be printed verbatim.
4421  */
4422 int
4423 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4424 {
4425 	int		pos;
4426 
4427 	pos = *ppos;
4428 
4429 	if (r->control != '\0' && cp[pos] == r->control)
4430 		pos++;
4431 	else if (r->control != '\0')
4432 		return 0;
4433 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4434 		pos += 2;
4435 	else if ('.' == cp[pos] || '\'' == cp[pos])
4436 		pos++;
4437 	else
4438 		return 0;
4439 
4440 	while (' ' == cp[pos] || '\t' == cp[pos])
4441 		pos++;
4442 
4443 	*ppos = pos;
4444 	return 1;
4445 }
4446