xref: /openbsd-src/usr.bin/mandoc/roff.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /* $OpenBSD: roff.c,v 1.248 2020/08/27 12:58:00 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Implementation of the roff(7) parser for mandoc(1).
19  */
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mandoc_parse.h"
36 #include "libmandoc.h"
37 #include "roff_int.h"
38 #include "tbl_parse.h"
39 #include "eqn_parse.h"
40 
41 /*
42  * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
43  * that an escape sequence resulted from copy-in processing and
44  * needs to be checked or interpolated.  As it is used nowhere
45  * else, it is defined here rather than in a header file.
46  */
47 #define	ASCII_ESC	27
48 
49 /* Maximum number of string expansions per line, to break infinite loops. */
50 #define	EXPAND_LIMIT	1000
51 
52 /* Types of definitions of macros and strings. */
53 #define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
54 #define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
55 #define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
56 #define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
57 #define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
58 			 ROFFDEF_REN | ROFFDEF_STD)
59 #define	ROFFDEF_UNDEF	(1 << 5)  /* Completely undefined. */
60 
61 /* --- data types --------------------------------------------------------- */
62 
63 /*
64  * An incredibly-simple string buffer.
65  */
66 struct	roffstr {
67 	char		*p; /* nil-terminated buffer */
68 	size_t		 sz; /* saved strlen(p) */
69 };
70 
71 /*
72  * A key-value roffstr pair as part of a singly-linked list.
73  */
74 struct	roffkv {
75 	struct roffstr	 key;
76 	struct roffstr	 val;
77 	struct roffkv	*next; /* next in list */
78 };
79 
80 /*
81  * A single number register as part of a singly-linked list.
82  */
83 struct	roffreg {
84 	struct roffstr	 key;
85 	int		 val;
86 	int		 step;
87 	struct roffreg	*next;
88 };
89 
90 /*
91  * Association of request and macro names with token IDs.
92  */
93 struct	roffreq {
94 	enum roff_tok	 tok;
95 	char		 name[];
96 };
97 
98 /*
99  * A macro processing context.
100  * More than one is needed when macro calls are nested.
101  */
102 struct	mctx {
103 	char		**argv;
104 	int		 argc;
105 	int		 argsz;
106 };
107 
108 struct	roff {
109 	struct roff_man	*man; /* mdoc or man parser */
110 	struct roffnode	*last; /* leaf of stack */
111 	struct mctx	*mstack; /* stack of macro contexts */
112 	int		*rstack; /* stack of inverted `ie' values */
113 	struct ohash	*reqtab; /* request lookup table */
114 	struct roffreg	*regtab; /* number registers */
115 	struct roffkv	*strtab; /* user-defined strings & macros */
116 	struct roffkv	*rentab; /* renamed strings & macros */
117 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
118 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
119 	const char	*current_string; /* value of last called user macro */
120 	struct tbl_node	*first_tbl; /* first table parsed */
121 	struct tbl_node	*last_tbl; /* last table parsed */
122 	struct tbl_node	*tbl; /* current table being parsed */
123 	struct eqn_node	*last_eqn; /* equation parser */
124 	struct eqn_node	*eqn; /* active equation parser */
125 	int		 eqn_inline; /* current equation is inline */
126 	int		 options; /* parse options */
127 	int		 mstacksz; /* current size of mstack */
128 	int		 mstackpos; /* position in mstack */
129 	int		 rstacksz; /* current size limit of rstack */
130 	int		 rstackpos; /* position in rstack */
131 	int		 format; /* current file in mdoc or man format */
132 	char		 control; /* control character */
133 	char		 escape; /* escape character */
134 };
135 
136 /*
137  * A macro definition, condition, or ignored block.
138  */
139 struct	roffnode {
140 	enum roff_tok	 tok; /* type of node */
141 	struct roffnode	*parent; /* up one in stack */
142 	int		 line; /* parse line */
143 	int		 col; /* parse col */
144 	char		*name; /* node name, e.g. macro name */
145 	char		*end; /* custom end macro of the block */
146 	int		 endspan; /* scope to: 1=eol 2=next line -1=\} */
147 	int		 rule; /* content is: 1=evaluated 0=skipped */
148 };
149 
150 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
151 			 enum roff_tok tok, /* tok of macro */ \
152 			 struct buf *buf, /* input buffer */ \
153 			 int ln, /* parse line */ \
154 			 int ppos, /* original pos in buffer */ \
155 			 int pos, /* current pos in buffer */ \
156 			 int *offs /* reset offset of buffer data */
157 
158 typedef	int (*roffproc)(ROFF_ARGS);
159 
160 struct	roffmac {
161 	roffproc	 proc; /* process new macro */
162 	roffproc	 text; /* process as child text of macro */
163 	roffproc	 sub; /* process as child of macro */
164 	int		 flags;
165 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
166 };
167 
168 struct	predef {
169 	const char	*name; /* predefined input name */
170 	const char	*str; /* replacement symbol */
171 };
172 
173 #define	PREDEF(__name, __str) \
174 	{ (__name), (__str) },
175 
176 /* --- function prototypes ------------------------------------------------ */
177 
178 static	int		 roffnode_cleanscope(struct roff *);
179 static	int		 roffnode_pop(struct roff *);
180 static	void		 roffnode_push(struct roff *, enum roff_tok,
181 				const char *, int, int);
182 static	void		 roff_addtbl(struct roff_man *, int, struct tbl_node *);
183 static	int		 roff_als(ROFF_ARGS);
184 static	int		 roff_block(ROFF_ARGS);
185 static	int		 roff_block_text(ROFF_ARGS);
186 static	int		 roff_block_sub(ROFF_ARGS);
187 static	int		 roff_break(ROFF_ARGS);
188 static	int		 roff_cblock(ROFF_ARGS);
189 static	int		 roff_cc(ROFF_ARGS);
190 static	int		 roff_ccond(struct roff *, int, int);
191 static	int		 roff_char(ROFF_ARGS);
192 static	int		 roff_cond(ROFF_ARGS);
193 static	int		 roff_cond_checkend(ROFF_ARGS);
194 static	int		 roff_cond_text(ROFF_ARGS);
195 static	int		 roff_cond_sub(ROFF_ARGS);
196 static	int		 roff_ds(ROFF_ARGS);
197 static	int		 roff_ec(ROFF_ARGS);
198 static	int		 roff_eo(ROFF_ARGS);
199 static	int		 roff_eqndelim(struct roff *, struct buf *, int);
200 static	int		 roff_evalcond(struct roff *, int, char *, int *);
201 static	int		 roff_evalnum(struct roff *, int,
202 				const char *, int *, int *, int);
203 static	int		 roff_evalpar(struct roff *, int,
204 				const char *, int *, int *, int);
205 static	int		 roff_evalstrcond(const char *, int *);
206 static	int		 roff_expand(struct roff *, struct buf *,
207 				int, int, char);
208 static	void		 roff_free1(struct roff *);
209 static	void		 roff_freereg(struct roffreg *);
210 static	void		 roff_freestr(struct roffkv *);
211 static	size_t		 roff_getname(struct roff *, char **, int, int);
212 static	int		 roff_getnum(const char *, int *, int *, int);
213 static	int		 roff_getop(const char *, int *, char *);
214 static	int		 roff_getregn(struct roff *,
215 				const char *, size_t, char);
216 static	int		 roff_getregro(const struct roff *,
217 				const char *name);
218 static	const char	*roff_getstrn(struct roff *,
219 				const char *, size_t, int *);
220 static	int		 roff_hasregn(const struct roff *,
221 				const char *, size_t);
222 static	int		 roff_insec(ROFF_ARGS);
223 static	int		 roff_it(ROFF_ARGS);
224 static	int		 roff_line_ignore(ROFF_ARGS);
225 static	void		 roff_man_alloc1(struct roff_man *);
226 static	void		 roff_man_free1(struct roff_man *);
227 static	int		 roff_manyarg(ROFF_ARGS);
228 static	int		 roff_noarg(ROFF_ARGS);
229 static	int		 roff_nop(ROFF_ARGS);
230 static	int		 roff_nr(ROFF_ARGS);
231 static	int		 roff_onearg(ROFF_ARGS);
232 static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
233 				int, int);
234 static	int		 roff_parsetext(struct roff *, struct buf *,
235 				int, int *);
236 static	int		 roff_renamed(ROFF_ARGS);
237 static	int		 roff_return(ROFF_ARGS);
238 static	int		 roff_rm(ROFF_ARGS);
239 static	int		 roff_rn(ROFF_ARGS);
240 static	int		 roff_rr(ROFF_ARGS);
241 static	void		 roff_setregn(struct roff *, const char *,
242 				size_t, int, char, int);
243 static	void		 roff_setstr(struct roff *,
244 				const char *, const char *, int);
245 static	void		 roff_setstrn(struct roffkv **, const char *,
246 				size_t, const char *, size_t, int);
247 static	int		 roff_shift(ROFF_ARGS);
248 static	int		 roff_so(ROFF_ARGS);
249 static	int		 roff_tr(ROFF_ARGS);
250 static	int		 roff_Dd(ROFF_ARGS);
251 static	int		 roff_TE(ROFF_ARGS);
252 static	int		 roff_TS(ROFF_ARGS);
253 static	int		 roff_EQ(ROFF_ARGS);
254 static	int		 roff_EN(ROFF_ARGS);
255 static	int		 roff_T_(ROFF_ARGS);
256 static	int		 roff_unsupp(ROFF_ARGS);
257 static	int		 roff_userdef(ROFF_ARGS);
258 
259 /* --- constant data ------------------------------------------------------ */
260 
261 #define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
262 #define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
263 
264 const char *__roff_name[MAN_MAX + 1] = {
265 	"br",		"ce",		"fi",		"ft",
266 	"ll",		"mc",		"nf",
267 	"po",		"rj",		"sp",
268 	"ta",		"ti",		NULL,
269 	"ab",		"ad",		"af",		"aln",
270 	"als",		"am",		"am1",		"ami",
271 	"ami1",		"as",		"as1",		"asciify",
272 	"backtrace",	"bd",		"bleedat",	"blm",
273         "box",		"boxa",		"bp",		"BP",
274 	"break",	"breakchar",	"brnl",		"brp",
275 	"brpnl",	"c2",		"cc",
276 	"cf",		"cflags",	"ch",		"char",
277 	"chop",		"class",	"close",	"CL",
278 	"color",	"composite",	"continue",	"cp",
279 	"cropat",	"cs",		"cu",		"da",
280 	"dch",		"Dd",		"de",		"de1",
281 	"defcolor",	"dei",		"dei1",		"device",
282 	"devicem",	"di",		"do",		"ds",
283 	"ds1",		"dwh",		"dt",		"ec",
284 	"ecr",		"ecs",		"el",		"em",
285 	"EN",		"eo",		"EP",		"EQ",
286 	"errprint",	"ev",		"evc",		"ex",
287 	"fallback",	"fam",		"fc",		"fchar",
288 	"fcolor",	"fdeferlig",	"feature",	"fkern",
289 	"fl",		"flig",		"fp",		"fps",
290 	"fschar",	"fspacewidth",	"fspecial",	"ftr",
291 	"fzoom",	"gcolor",	"hc",		"hcode",
292 	"hidechar",	"hla",		"hlm",		"hpf",
293 	"hpfa",		"hpfcode",	"hw",		"hy",
294 	"hylang",	"hylen",	"hym",		"hypp",
295 	"hys",		"ie",		"if",		"ig",
296 	"index",	"it",		"itc",		"IX",
297 	"kern",		"kernafter",	"kernbefore",	"kernpair",
298 	"lc",		"lc_ctype",	"lds",		"length",
299 	"letadj",	"lf",		"lg",		"lhang",
300 	"linetabs",	"lnr",		"lnrf",		"lpfx",
301 	"ls",		"lsm",		"lt",
302 	"mediasize",	"minss",	"mk",		"mso",
303 	"na",		"ne",		"nh",		"nhychar",
304 	"nm",		"nn",		"nop",		"nr",
305 	"nrf",		"nroff",	"ns",		"nx",
306 	"open",		"opena",	"os",		"output",
307 	"padj",		"papersize",	"pc",		"pev",
308 	"pi",		"PI",		"pl",		"pm",
309 	"pn",		"pnr",		"ps",
310 	"psbb",		"pshape",	"pso",		"ptr",
311 	"pvs",		"rchar",	"rd",		"recursionlimit",
312 	"return",	"rfschar",	"rhang",
313 	"rm",		"rn",		"rnn",		"rr",
314 	"rs",		"rt",		"schar",	"sentchar",
315 	"shc",		"shift",	"sizes",	"so",
316 	"spacewidth",	"special",	"spreadwarn",	"ss",
317 	"sty",		"substring",	"sv",		"sy",
318 	"T&",		"tc",		"TE",
319 	"TH",		"tkf",		"tl",
320 	"tm",		"tm1",		"tmc",		"tr",
321 	"track",	"transchar",	"trf",		"trimat",
322 	"trin",		"trnt",		"troff",	"TS",
323 	"uf",		"ul",		"unformat",	"unwatch",
324 	"unwatchn",	"vpt",		"vs",		"warn",
325 	"warnscale",	"watch",	"watchlength",	"watchn",
326 	"wh",		"while",	"write",	"writec",
327 	"writem",	"xflag",	".",		NULL,
328 	NULL,		"text",
329 	"Dd",		"Dt",		"Os",		"Sh",
330 	"Ss",		"Pp",		"D1",		"Dl",
331 	"Bd",		"Ed",		"Bl",		"El",
332 	"It",		"Ad",		"An",		"Ap",
333 	"Ar",		"Cd",		"Cm",		"Dv",
334 	"Er",		"Ev",		"Ex",		"Fa",
335 	"Fd",		"Fl",		"Fn",		"Ft",
336 	"Ic",		"In",		"Li",		"Nd",
337 	"Nm",		"Op",		"Ot",		"Pa",
338 	"Rv",		"St",		"Va",		"Vt",
339 	"Xr",		"%A",		"%B",		"%D",
340 	"%I",		"%J",		"%N",		"%O",
341 	"%P",		"%R",		"%T",		"%V",
342 	"Ac",		"Ao",		"Aq",		"At",
343 	"Bc",		"Bf",		"Bo",		"Bq",
344 	"Bsx",		"Bx",		"Db",		"Dc",
345 	"Do",		"Dq",		"Ec",		"Ef",
346 	"Em",		"Eo",		"Fx",		"Ms",
347 	"No",		"Ns",		"Nx",		"Ox",
348 	"Pc",		"Pf",		"Po",		"Pq",
349 	"Qc",		"Ql",		"Qo",		"Qq",
350 	"Re",		"Rs",		"Sc",		"So",
351 	"Sq",		"Sm",		"Sx",		"Sy",
352 	"Tn",		"Ux",		"Xc",		"Xo",
353 	"Fo",		"Fc",		"Oo",		"Oc",
354 	"Bk",		"Ek",		"Bt",		"Hf",
355 	"Fr",		"Ud",		"Lb",		"Lp",
356 	"Lk",		"Mt",		"Brq",		"Bro",
357 	"Brc",		"%C",		"Es",		"En",
358 	"Dx",		"%Q",		"%U",		"Ta",
359 	"Tg",		NULL,
360 	"TH",		"SH",		"SS",		"TP",
361 	"TQ",
362 	"LP",		"PP",		"P",		"IP",
363 	"HP",		"SM",		"SB",		"BI",
364 	"IB",		"BR",		"RB",		"R",
365 	"B",		"I",		"IR",		"RI",
366 	"RE",		"RS",		"DT",		"UC",
367 	"PD",		"AT",		"in",
368 	"SY",		"YS",		"OP",
369 	"EX",		"EE",		"UR",
370 	"UE",		"MT",		"ME",		NULL
371 };
372 const	char *const *roff_name = __roff_name;
373 
374 static	struct roffmac	 roffs[TOKEN_NONE] = {
375 	{ roff_noarg, NULL, NULL, 0 },  /* br */
376 	{ roff_onearg, NULL, NULL, 0 },  /* ce */
377 	{ roff_noarg, NULL, NULL, 0 },  /* fi */
378 	{ roff_onearg, NULL, NULL, 0 },  /* ft */
379 	{ roff_onearg, NULL, NULL, 0 },  /* ll */
380 	{ roff_onearg, NULL, NULL, 0 },  /* mc */
381 	{ roff_noarg, NULL, NULL, 0 },  /* nf */
382 	{ roff_onearg, NULL, NULL, 0 },  /* po */
383 	{ roff_onearg, NULL, NULL, 0 },  /* rj */
384 	{ roff_onearg, NULL, NULL, 0 },  /* sp */
385 	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
386 	{ roff_onearg, NULL, NULL, 0 },  /* ti */
387 	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
388 	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
389 	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
390 	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
391 	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
392 	{ roff_als, NULL, NULL, 0 },  /* als */
393 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
394 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
395 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
396 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
397 	{ roff_ds, NULL, NULL, 0 },  /* as */
398 	{ roff_ds, NULL, NULL, 0 },  /* as1 */
399 	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
400 	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
401 	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
402 	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
403 	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
404 	{ roff_unsupp, NULL, NULL, 0 },  /* box */
405 	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
406 	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
407 	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
408 	{ roff_break, NULL, NULL, 0 },  /* break */
409 	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
410 	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
411 	{ roff_noarg, NULL, NULL, 0 },  /* brp */
412 	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
413 	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
414 	{ roff_cc, NULL, NULL, 0 },  /* cc */
415 	{ roff_insec, NULL, NULL, 0 },  /* cf */
416 	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
417 	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
418 	{ roff_char, NULL, NULL, 0 },  /* char */
419 	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
420 	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
421 	{ roff_insec, NULL, NULL, 0 },  /* close */
422 	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
423 	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
424 	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
425 	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
426 	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
427 	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
428 	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
429 	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
430 	{ roff_unsupp, NULL, NULL, 0 },  /* da */
431 	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
432 	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
433 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
434 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
435 	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
436 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
437 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
438 	{ roff_unsupp, NULL, NULL, 0 },  /* device */
439 	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
440 	{ roff_unsupp, NULL, NULL, 0 },  /* di */
441 	{ roff_unsupp, NULL, NULL, 0 },  /* do */
442 	{ roff_ds, NULL, NULL, 0 },  /* ds */
443 	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
444 	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
445 	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
446 	{ roff_ec, NULL, NULL, 0 },  /* ec */
447 	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
448 	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
449 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
450 	{ roff_unsupp, NULL, NULL, 0 },  /* em */
451 	{ roff_EN, NULL, NULL, 0 },  /* EN */
452 	{ roff_eo, NULL, NULL, 0 },  /* eo */
453 	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
454 	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
455 	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
456 	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
457 	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
458 	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
459 	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
460 	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
461 	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
462 	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
463 	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
464 	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
465 	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
466 	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
467 	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
468 	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
469 	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
470 	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
471 	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
472 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
473 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
474 	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
475 	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
476 	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
477 	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
478 	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
479 	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
480 	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
481 	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
482 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
483 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
484 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
485 	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
486 	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
487 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
488 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
489 	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
490 	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
491 	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
492 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
493 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
494 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
495 	{ roff_unsupp, NULL, NULL, 0 },  /* index */
496 	{ roff_it, NULL, NULL, 0 },  /* it */
497 	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
498 	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
499 	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
500 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
501 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
502 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
503 	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
504 	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
505 	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
506 	{ roff_unsupp, NULL, NULL, 0 },  /* length */
507 	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
508 	{ roff_insec, NULL, NULL, 0 },  /* lf */
509 	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
510 	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
511 	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
512 	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
513 	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
514 	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
515 	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
516 	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
517 	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
518 	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
519 	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
520 	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
521 	{ roff_insec, NULL, NULL, 0 },  /* mso */
522 	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
523 	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
524 	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
525 	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
526 	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
527 	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
528 	{ roff_nop, NULL, NULL, 0 },  /* nop */
529 	{ roff_nr, NULL, NULL, 0 },  /* nr */
530 	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
531 	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
532 	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
533 	{ roff_insec, NULL, NULL, 0 },  /* nx */
534 	{ roff_insec, NULL, NULL, 0 },  /* open */
535 	{ roff_insec, NULL, NULL, 0 },  /* opena */
536 	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
537 	{ roff_unsupp, NULL, NULL, 0 },  /* output */
538 	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
539 	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
540 	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
541 	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
542 	{ roff_insec, NULL, NULL, 0 },  /* pi */
543 	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
544 	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
545 	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
546 	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
547 	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
548 	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
549 	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
550 	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
551 	{ roff_insec, NULL, NULL, 0 },  /* pso */
552 	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
553 	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
554 	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
555 	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
556 	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
557 	{ roff_return, NULL, NULL, 0 },  /* return */
558 	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
559 	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
560 	{ roff_rm, NULL, NULL, 0 },  /* rm */
561 	{ roff_rn, NULL, NULL, 0 },  /* rn */
562 	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
563 	{ roff_rr, NULL, NULL, 0 },  /* rr */
564 	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
565 	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
566 	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
567 	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
568 	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
569 	{ roff_shift, NULL, NULL, 0 },  /* shift */
570 	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
571 	{ roff_so, NULL, NULL, 0 },  /* so */
572 	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
573 	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
574 	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
575 	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
576 	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
577 	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
578 	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
579 	{ roff_insec, NULL, NULL, 0 },  /* sy */
580 	{ roff_T_, NULL, NULL, 0 },  /* T& */
581 	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
582 	{ roff_TE, NULL, NULL, 0 },  /* TE */
583 	{ roff_Dd, NULL, NULL, 0 },  /* TH */
584 	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
585 	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
586 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
587 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
588 	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
589 	{ roff_tr, NULL, NULL, 0 },  /* tr */
590 	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
591 	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
592 	{ roff_insec, NULL, NULL, 0 },  /* trf */
593 	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
594 	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
595 	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
596 	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
597 	{ roff_TS, NULL, NULL, 0 },  /* TS */
598 	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
599 	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
600 	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
601 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
602 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
603 	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
604 	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
605 	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
606 	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
607 	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
608 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
609 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
610 	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
611 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
612 	{ roff_insec, NULL, NULL, 0 },  /* write */
613 	{ roff_insec, NULL, NULL, 0 },  /* writec */
614 	{ roff_insec, NULL, NULL, 0 },  /* writem */
615 	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
616 	{ roff_cblock, NULL, NULL, 0 },  /* . */
617 	{ roff_renamed, NULL, NULL, 0 },
618 	{ roff_userdef, NULL, NULL, 0 }
619 };
620 
621 /* Array of injected predefined strings. */
622 #define	PREDEFS_MAX	 38
623 static	const struct predef predefs[PREDEFS_MAX] = {
624 #include "predefs.in"
625 };
626 
627 static	int	 roffce_lines;	/* number of input lines to center */
628 static	struct roff_node *roffce_node;  /* active request */
629 static	int	 roffit_lines;  /* number of lines to delay */
630 static	char	*roffit_macro;  /* nil-terminated macro line */
631 
632 
633 /* --- request table ------------------------------------------------------ */
634 
635 struct ohash *
636 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
637 {
638 	struct ohash	*htab;
639 	struct roffreq	*req;
640 	enum roff_tok	 tok;
641 	size_t		 sz;
642 	unsigned int	 slot;
643 
644 	htab = mandoc_malloc(sizeof(*htab));
645 	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
646 
647 	for (tok = mintok; tok < maxtok; tok++) {
648 		if (roff_name[tok] == NULL)
649 			continue;
650 		sz = strlen(roff_name[tok]);
651 		req = mandoc_malloc(sizeof(*req) + sz + 1);
652 		req->tok = tok;
653 		memcpy(req->name, roff_name[tok], sz + 1);
654 		slot = ohash_qlookup(htab, req->name);
655 		ohash_insert(htab, slot, req);
656 	}
657 	return htab;
658 }
659 
660 void
661 roffhash_free(struct ohash *htab)
662 {
663 	struct roffreq	*req;
664 	unsigned int	 slot;
665 
666 	if (htab == NULL)
667 		return;
668 	for (req = ohash_first(htab, &slot); req != NULL;
669 	     req = ohash_next(htab, &slot))
670 		free(req);
671 	ohash_delete(htab);
672 	free(htab);
673 }
674 
675 enum roff_tok
676 roffhash_find(struct ohash *htab, const char *name, size_t sz)
677 {
678 	struct roffreq	*req;
679 	const char	*end;
680 
681 	if (sz) {
682 		end = name + sz;
683 		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
684 	} else
685 		req = ohash_find(htab, ohash_qlookup(htab, name));
686 	return req == NULL ? TOKEN_NONE : req->tok;
687 }
688 
689 /* --- stack of request blocks -------------------------------------------- */
690 
691 /*
692  * Pop the current node off of the stack of roff instructions currently
693  * pending.  Return 1 if it is a loop or 0 otherwise.
694  */
695 static int
696 roffnode_pop(struct roff *r)
697 {
698 	struct roffnode	*p;
699 	int		 inloop;
700 
701 	p = r->last;
702 	inloop = p->tok == ROFF_while;
703 	r->last = p->parent;
704 	free(p->name);
705 	free(p->end);
706 	free(p);
707 	return inloop;
708 }
709 
710 /*
711  * Push a roff node onto the instruction stack.  This must later be
712  * removed with roffnode_pop().
713  */
714 static void
715 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
716 		int line, int col)
717 {
718 	struct roffnode	*p;
719 
720 	p = mandoc_calloc(1, sizeof(struct roffnode));
721 	p->tok = tok;
722 	if (name)
723 		p->name = mandoc_strdup(name);
724 	p->parent = r->last;
725 	p->line = line;
726 	p->col = col;
727 	p->rule = p->parent ? p->parent->rule : 0;
728 
729 	r->last = p;
730 }
731 
732 /* --- roff parser state data management ---------------------------------- */
733 
734 static void
735 roff_free1(struct roff *r)
736 {
737 	int		 i;
738 
739 	tbl_free(r->first_tbl);
740 	r->first_tbl = r->last_tbl = r->tbl = NULL;
741 
742 	eqn_free(r->last_eqn);
743 	r->last_eqn = r->eqn = NULL;
744 
745 	while (r->mstackpos >= 0)
746 		roff_userret(r);
747 
748 	while (r->last)
749 		roffnode_pop(r);
750 
751 	free (r->rstack);
752 	r->rstack = NULL;
753 	r->rstacksz = 0;
754 	r->rstackpos = -1;
755 
756 	roff_freereg(r->regtab);
757 	r->regtab = NULL;
758 
759 	roff_freestr(r->strtab);
760 	roff_freestr(r->rentab);
761 	roff_freestr(r->xmbtab);
762 	r->strtab = r->rentab = r->xmbtab = NULL;
763 
764 	if (r->xtab)
765 		for (i = 0; i < 128; i++)
766 			free(r->xtab[i].p);
767 	free(r->xtab);
768 	r->xtab = NULL;
769 }
770 
771 void
772 roff_reset(struct roff *r)
773 {
774 	roff_free1(r);
775 	r->options |= MPARSE_COMMENT;
776 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
777 	r->control = '\0';
778 	r->escape = '\\';
779 	roffce_lines = 0;
780 	roffce_node = NULL;
781 	roffit_lines = 0;
782 	roffit_macro = NULL;
783 }
784 
785 void
786 roff_free(struct roff *r)
787 {
788 	int		 i;
789 
790 	roff_free1(r);
791 	for (i = 0; i < r->mstacksz; i++)
792 		free(r->mstack[i].argv);
793 	free(r->mstack);
794 	roffhash_free(r->reqtab);
795 	free(r);
796 }
797 
798 struct roff *
799 roff_alloc(int options)
800 {
801 	struct roff	*r;
802 
803 	r = mandoc_calloc(1, sizeof(struct roff));
804 	r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
805 	r->options = options | MPARSE_COMMENT;
806 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
807 	r->mstackpos = -1;
808 	r->rstackpos = -1;
809 	r->escape = '\\';
810 	return r;
811 }
812 
813 /* --- syntax tree state data management ---------------------------------- */
814 
815 static void
816 roff_man_free1(struct roff_man *man)
817 {
818 	if (man->meta.first != NULL)
819 		roff_node_delete(man, man->meta.first);
820 	free(man->meta.msec);
821 	free(man->meta.vol);
822 	free(man->meta.os);
823 	free(man->meta.arch);
824 	free(man->meta.title);
825 	free(man->meta.name);
826 	free(man->meta.date);
827 	free(man->meta.sodest);
828 }
829 
830 void
831 roff_state_reset(struct roff_man *man)
832 {
833 	man->last = man->meta.first;
834 	man->last_es = NULL;
835 	man->flags = 0;
836 	man->lastsec = man->lastnamed = SEC_NONE;
837 	man->next = ROFF_NEXT_CHILD;
838 	roff_setreg(man->roff, "nS", 0, '=');
839 }
840 
841 static void
842 roff_man_alloc1(struct roff_man *man)
843 {
844 	memset(&man->meta, 0, sizeof(man->meta));
845 	man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
846 	man->meta.first->type = ROFFT_ROOT;
847 	man->meta.macroset = MACROSET_NONE;
848 	roff_state_reset(man);
849 }
850 
851 void
852 roff_man_reset(struct roff_man *man)
853 {
854 	roff_man_free1(man);
855 	roff_man_alloc1(man);
856 }
857 
858 void
859 roff_man_free(struct roff_man *man)
860 {
861 	roff_man_free1(man);
862 	free(man);
863 }
864 
865 struct roff_man *
866 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
867 {
868 	struct roff_man *man;
869 
870 	man = mandoc_calloc(1, sizeof(*man));
871 	man->roff = roff;
872 	man->os_s = os_s;
873 	man->quick = quick;
874 	roff_man_alloc1(man);
875 	roff->man = man;
876 	return man;
877 }
878 
879 /* --- syntax tree handling ----------------------------------------------- */
880 
881 struct roff_node *
882 roff_node_alloc(struct roff_man *man, int line, int pos,
883 	enum roff_type type, int tok)
884 {
885 	struct roff_node	*n;
886 
887 	n = mandoc_calloc(1, sizeof(*n));
888 	n->line = line;
889 	n->pos = pos;
890 	n->tok = tok;
891 	n->type = type;
892 	n->sec = man->lastsec;
893 
894 	if (man->flags & MDOC_SYNOPSIS)
895 		n->flags |= NODE_SYNPRETTY;
896 	else
897 		n->flags &= ~NODE_SYNPRETTY;
898 	if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
899 		n->flags |= NODE_NOFILL;
900 	else
901 		n->flags &= ~NODE_NOFILL;
902 	if (man->flags & MDOC_NEWLINE)
903 		n->flags |= NODE_LINE;
904 	man->flags &= ~MDOC_NEWLINE;
905 
906 	return n;
907 }
908 
909 void
910 roff_node_append(struct roff_man *man, struct roff_node *n)
911 {
912 
913 	switch (man->next) {
914 	case ROFF_NEXT_SIBLING:
915 		if (man->last->next != NULL) {
916 			n->next = man->last->next;
917 			man->last->next->prev = n;
918 		} else
919 			man->last->parent->last = n;
920 		man->last->next = n;
921 		n->prev = man->last;
922 		n->parent = man->last->parent;
923 		break;
924 	case ROFF_NEXT_CHILD:
925 		if (man->last->child != NULL) {
926 			n->next = man->last->child;
927 			man->last->child->prev = n;
928 		} else
929 			man->last->last = n;
930 		man->last->child = n;
931 		n->parent = man->last;
932 		break;
933 	default:
934 		abort();
935 	}
936 	man->last = n;
937 
938 	switch (n->type) {
939 	case ROFFT_HEAD:
940 		n->parent->head = n;
941 		break;
942 	case ROFFT_BODY:
943 		if (n->end != ENDBODY_NOT)
944 			return;
945 		n->parent->body = n;
946 		break;
947 	case ROFFT_TAIL:
948 		n->parent->tail = n;
949 		break;
950 	default:
951 		return;
952 	}
953 
954 	/*
955 	 * Copy over the normalised-data pointer of our parent.  Not
956 	 * everybody has one, but copying a null pointer is fine.
957 	 */
958 
959 	n->norm = n->parent->norm;
960 	assert(n->parent->type == ROFFT_BLOCK);
961 }
962 
963 void
964 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
965 {
966 	struct roff_node	*n;
967 
968 	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
969 	n->string = roff_strdup(man->roff, word);
970 	roff_node_append(man, n);
971 	n->flags |= NODE_VALID | NODE_ENDED;
972 	man->next = ROFF_NEXT_SIBLING;
973 }
974 
975 void
976 roff_word_append(struct roff_man *man, const char *word)
977 {
978 	struct roff_node	*n;
979 	char			*addstr, *newstr;
980 
981 	n = man->last;
982 	addstr = roff_strdup(man->roff, word);
983 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
984 	free(addstr);
985 	free(n->string);
986 	n->string = newstr;
987 	man->next = ROFF_NEXT_SIBLING;
988 }
989 
990 void
991 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
992 {
993 	struct roff_node	*n;
994 
995 	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
996 	roff_node_append(man, n);
997 	man->next = ROFF_NEXT_CHILD;
998 }
999 
1000 struct roff_node *
1001 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1002 {
1003 	struct roff_node	*n;
1004 
1005 	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1006 	roff_node_append(man, n);
1007 	man->next = ROFF_NEXT_CHILD;
1008 	return n;
1009 }
1010 
1011 struct roff_node *
1012 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1013 {
1014 	struct roff_node	*n;
1015 
1016 	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1017 	roff_node_append(man, n);
1018 	man->next = ROFF_NEXT_CHILD;
1019 	return n;
1020 }
1021 
1022 struct roff_node *
1023 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1024 {
1025 	struct roff_node	*n;
1026 
1027 	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1028 	roff_node_append(man, n);
1029 	man->next = ROFF_NEXT_CHILD;
1030 	return n;
1031 }
1032 
1033 static void
1034 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1035 {
1036 	struct roff_node	*n;
1037 	struct tbl_span		*span;
1038 
1039 	if (man->meta.macroset == MACROSET_MAN)
1040 		man_breakscope(man, ROFF_TS);
1041 	while ((span = tbl_span(tbl)) != NULL) {
1042 		n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1043 		n->span = span;
1044 		roff_node_append(man, n);
1045 		n->flags |= NODE_VALID | NODE_ENDED;
1046 		man->next = ROFF_NEXT_SIBLING;
1047 	}
1048 }
1049 
1050 void
1051 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1052 {
1053 
1054 	/* Adjust siblings. */
1055 
1056 	if (n->prev)
1057 		n->prev->next = n->next;
1058 	if (n->next)
1059 		n->next->prev = n->prev;
1060 
1061 	/* Adjust parent. */
1062 
1063 	if (n->parent != NULL) {
1064 		if (n->parent->child == n)
1065 			n->parent->child = n->next;
1066 		if (n->parent->last == n)
1067 			n->parent->last = n->prev;
1068 	}
1069 
1070 	/* Adjust parse point. */
1071 
1072 	if (man == NULL)
1073 		return;
1074 	if (man->last == n) {
1075 		if (n->prev == NULL) {
1076 			man->last = n->parent;
1077 			man->next = ROFF_NEXT_CHILD;
1078 		} else {
1079 			man->last = n->prev;
1080 			man->next = ROFF_NEXT_SIBLING;
1081 		}
1082 	}
1083 	if (man->meta.first == n)
1084 		man->meta.first = NULL;
1085 }
1086 
1087 void
1088 roff_node_relink(struct roff_man *man, struct roff_node *n)
1089 {
1090 	roff_node_unlink(man, n);
1091 	n->prev = n->next = NULL;
1092 	roff_node_append(man, n);
1093 }
1094 
1095 void
1096 roff_node_free(struct roff_node *n)
1097 {
1098 
1099 	if (n->args != NULL)
1100 		mdoc_argv_free(n->args);
1101 	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1102 		free(n->norm);
1103 	eqn_box_free(n->eqn);
1104 	free(n->string);
1105 	free(n->tag);
1106 	free(n);
1107 }
1108 
1109 void
1110 roff_node_delete(struct roff_man *man, struct roff_node *n)
1111 {
1112 
1113 	while (n->child != NULL)
1114 		roff_node_delete(man, n->child);
1115 	roff_node_unlink(man, n);
1116 	roff_node_free(n);
1117 }
1118 
1119 int
1120 roff_node_transparent(struct roff_node *n)
1121 {
1122 	if (n == NULL)
1123 		return 0;
1124 	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1125 		return 1;
1126 	return roff_tok_transparent(n->tok);
1127 }
1128 
1129 int
1130 roff_tok_transparent(enum roff_tok tok)
1131 {
1132 	switch (tok) {
1133 	case ROFF_ft:
1134 	case ROFF_ll:
1135 	case ROFF_mc:
1136 	case ROFF_po:
1137 	case ROFF_ta:
1138 	case MDOC_Db:
1139 	case MDOC_Es:
1140 	case MDOC_Sm:
1141 	case MDOC_Tg:
1142 	case MAN_DT:
1143 	case MAN_UC:
1144 	case MAN_PD:
1145 	case MAN_AT:
1146 		return 1;
1147 	default:
1148 		return 0;
1149 	}
1150 }
1151 
1152 struct roff_node *
1153 roff_node_child(struct roff_node *n)
1154 {
1155 	for (n = n->child; roff_node_transparent(n); n = n->next)
1156 		continue;
1157 	return n;
1158 }
1159 
1160 struct roff_node *
1161 roff_node_prev(struct roff_node *n)
1162 {
1163 	do {
1164 		n = n->prev;
1165 	} while (roff_node_transparent(n));
1166 	return n;
1167 }
1168 
1169 struct roff_node *
1170 roff_node_next(struct roff_node *n)
1171 {
1172 	do {
1173 		n = n->next;
1174 	} while (roff_node_transparent(n));
1175 	return n;
1176 }
1177 
1178 void
1179 deroff(char **dest, const struct roff_node *n)
1180 {
1181 	char	*cp;
1182 	size_t	 sz;
1183 
1184 	if (n->string == NULL) {
1185 		for (n = n->child; n != NULL; n = n->next)
1186 			deroff(dest, n);
1187 		return;
1188 	}
1189 
1190 	/* Skip leading whitespace. */
1191 
1192 	for (cp = n->string; *cp != '\0'; cp++) {
1193 		if (cp[0] == '\\' && cp[1] != '\0' &&
1194 		    strchr(" %&0^|~", cp[1]) != NULL)
1195 			cp++;
1196 		else if ( ! isspace((unsigned char)*cp))
1197 			break;
1198 	}
1199 
1200 	/* Skip trailing backslash. */
1201 
1202 	sz = strlen(cp);
1203 	if (sz > 0 && cp[sz - 1] == '\\')
1204 		sz--;
1205 
1206 	/* Skip trailing whitespace. */
1207 
1208 	for (; sz; sz--)
1209 		if ( ! isspace((unsigned char)cp[sz-1]))
1210 			break;
1211 
1212 	/* Skip empty strings. */
1213 
1214 	if (sz == 0)
1215 		return;
1216 
1217 	if (*dest == NULL) {
1218 		*dest = mandoc_strndup(cp, sz);
1219 		return;
1220 	}
1221 
1222 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1223 	free(*dest);
1224 	*dest = cp;
1225 }
1226 
1227 /* --- main functions of the roff parser ---------------------------------- */
1228 
1229 /*
1230  * In the current line, expand escape sequences that produce parsable
1231  * input text.  Also check the syntax of the remaining escape sequences,
1232  * which typically produce output glyphs or change formatter state.
1233  */
1234 static int
1235 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1236 {
1237 	struct mctx	*ctx;	/* current macro call context */
1238 	char		 ubuf[24]; /* buffer to print the number */
1239 	struct roff_node *n;	/* used for header comments */
1240 	const char	*start;	/* start of the string to process */
1241 	char		*stesc;	/* start of an escape sequence ('\\') */
1242 	const char	*esct;	/* type of esccape sequence */
1243 	char		*ep;	/* end of comment string */
1244 	const char	*stnam;	/* start of the name, after "[(*" */
1245 	const char	*cp;	/* end of the name, e.g. before ']' */
1246 	const char	*res;	/* the string to be substituted */
1247 	char		*nbuf;	/* new buffer to copy buf->buf to */
1248 	size_t		 maxl;  /* expected length of the escape name */
1249 	size_t		 naml;	/* actual length of the escape name */
1250 	size_t		 asz;	/* length of the replacement */
1251 	size_t		 rsz;	/* length of the rest of the string */
1252 	int		 inaml;	/* length returned from mandoc_escape() */
1253 	int		 expand_count;	/* to avoid infinite loops */
1254 	int		 npos;	/* position in numeric expression */
1255 	int		 arg_complete; /* argument not interrupted by eol */
1256 	int		 quote_args; /* true for \\$@, false for \\$* */
1257 	int		 done;	/* no more input available */
1258 	int		 deftype; /* type of definition to paste */
1259 	int		 rcsid;	/* kind of RCS id seen */
1260 	enum mandocerr	 err;	/* for escape sequence problems */
1261 	char		 sign;	/* increment number register */
1262 	char		 term;	/* character terminating the escape */
1263 
1264 	/* Search forward for comments. */
1265 
1266 	done = 0;
1267 	start = buf->buf + pos;
1268 	for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1269 		if (stesc[0] != newesc || stesc[1] == '\0')
1270 			continue;
1271 		stesc++;
1272 		if (*stesc != '"' && *stesc != '#')
1273 			continue;
1274 
1275 		/* Comment found, look for RCS id. */
1276 
1277 		rcsid = 0;
1278 		if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1279 			rcsid = 1 << MANDOC_OS_OPENBSD;
1280 			cp += 8;
1281 		} else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1282 			rcsid = 1 << MANDOC_OS_NETBSD;
1283 			cp += 7;
1284 		}
1285 		if (cp != NULL &&
1286 		    isalnum((unsigned char)*cp) == 0 &&
1287 		    strchr(cp, '$') != NULL) {
1288 			if (r->man->meta.rcsids & rcsid)
1289 				mandoc_msg(MANDOCERR_RCS_REP, ln,
1290 				    (int)(stesc - buf->buf) + 1,
1291 				    "%s", stesc + 1);
1292 			r->man->meta.rcsids |= rcsid;
1293 		}
1294 
1295 		/* Handle trailing whitespace. */
1296 
1297 		ep = strchr(stesc--, '\0') - 1;
1298 		if (*ep == '\n') {
1299 			done = 1;
1300 			ep--;
1301 		}
1302 		if (*ep == ' ' || *ep == '\t')
1303 			mandoc_msg(MANDOCERR_SPACE_EOL,
1304 			    ln, (int)(ep - buf->buf), NULL);
1305 
1306 		/*
1307 		 * Save comments preceding the title macro
1308 		 * in the syntax tree.
1309 		 */
1310 
1311 		if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
1312 			while (*ep == ' ' || *ep == '\t')
1313 				ep--;
1314 			ep[1] = '\0';
1315 			n = roff_node_alloc(r->man,
1316 			    ln, stesc + 1 - buf->buf,
1317 			    ROFFT_COMMENT, TOKEN_NONE);
1318 			n->string = mandoc_strdup(stesc + 2);
1319 			roff_node_append(r->man, n);
1320 			n->flags |= NODE_VALID | NODE_ENDED;
1321 			r->man->next = ROFF_NEXT_SIBLING;
1322 		}
1323 
1324 		/* Line continuation with comment. */
1325 
1326 		if (stesc[1] == '#') {
1327 			*stesc = '\0';
1328 			return ROFF_IGN | ROFF_APPEND;
1329 		}
1330 
1331 		/* Discard normal comments. */
1332 
1333 		while (stesc > start && stesc[-1] == ' ' &&
1334 		    (stesc == start + 1 || stesc[-2] != '\\'))
1335 			stesc--;
1336 		*stesc = '\0';
1337 		break;
1338 	}
1339 	if (stesc == start)
1340 		return ROFF_CONT;
1341 	stesc--;
1342 
1343 	/* Notice the end of the input. */
1344 
1345 	if (*stesc == '\n') {
1346 		*stesc-- = '\0';
1347 		done = 1;
1348 	}
1349 
1350 	expand_count = 0;
1351 	while (stesc >= start) {
1352 		if (*stesc != newesc) {
1353 
1354 			/*
1355 			 * If we have a non-standard escape character,
1356 			 * escape literal backslashes because all
1357 			 * processing in subsequent functions uses
1358 			 * the standard escaping rules.
1359 			 */
1360 
1361 			if (newesc != ASCII_ESC && *stesc == '\\') {
1362 				*stesc = '\0';
1363 				buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1364 				    buf->buf, stesc + 1) + 1;
1365 				start = nbuf + pos;
1366 				stesc = nbuf + (stesc - buf->buf);
1367 				free(buf->buf);
1368 				buf->buf = nbuf;
1369 			}
1370 
1371 			/* Search backwards for the next escape. */
1372 
1373 			stesc--;
1374 			continue;
1375 		}
1376 
1377 		/* If it is escaped, skip it. */
1378 
1379 		for (cp = stesc - 1; cp >= start; cp--)
1380 			if (*cp != r->escape)
1381 				break;
1382 
1383 		if ((stesc - cp) % 2 == 0) {
1384 			while (stesc > cp)
1385 				*stesc-- = '\\';
1386 			continue;
1387 		} else if (stesc[1] != '\0') {
1388 			*stesc = '\\';
1389 		} else {
1390 			*stesc-- = '\0';
1391 			if (done)
1392 				continue;
1393 			else
1394 				return ROFF_IGN | ROFF_APPEND;
1395 		}
1396 
1397 		/* Decide whether to expand or to check only. */
1398 
1399 		term = '\0';
1400 		cp = stesc + 1;
1401 		if (*cp == 'E')
1402 			cp++;
1403 		esct = cp;
1404 		switch (*esct) {
1405 		case '*':
1406 		case '$':
1407 			res = NULL;
1408 			break;
1409 		case 'B':
1410 		case 'w':
1411 			term = cp[1];
1412 			/* FALLTHROUGH */
1413 		case 'n':
1414 			sign = cp[1];
1415 			if (sign == '+' || sign == '-')
1416 				cp++;
1417 			res = ubuf;
1418 			break;
1419 		default:
1420 			err = MANDOCERR_OK;
1421 			switch(mandoc_escape(&cp, &stnam, &inaml)) {
1422 			case ESCAPE_SPECIAL:
1423 				if (mchars_spec2cp(stnam, inaml) >= 0)
1424 					break;
1425 				/* FALLTHROUGH */
1426 			case ESCAPE_ERROR:
1427 				err = MANDOCERR_ESC_BAD;
1428 				break;
1429 			case ESCAPE_UNDEF:
1430 				err = MANDOCERR_ESC_UNDEF;
1431 				break;
1432 			case ESCAPE_UNSUPP:
1433 				err = MANDOCERR_ESC_UNSUPP;
1434 				break;
1435 			default:
1436 				break;
1437 			}
1438 			if (err != MANDOCERR_OK)
1439 				mandoc_msg(err, ln, (int)(stesc - buf->buf),
1440 				    "%.*s", (int)(cp - stesc), stesc);
1441 			stesc--;
1442 			continue;
1443 		}
1444 
1445 		if (EXPAND_LIMIT < ++expand_count) {
1446 			mandoc_msg(MANDOCERR_ROFFLOOP,
1447 			    ln, (int)(stesc - buf->buf), NULL);
1448 			return ROFF_IGN;
1449 		}
1450 
1451 		/*
1452 		 * The third character decides the length
1453 		 * of the name of the string or register.
1454 		 * Save a pointer to the name.
1455 		 */
1456 
1457 		if (term == '\0') {
1458 			switch (*++cp) {
1459 			case '\0':
1460 				maxl = 0;
1461 				break;
1462 			case '(':
1463 				cp++;
1464 				maxl = 2;
1465 				break;
1466 			case '[':
1467 				cp++;
1468 				term = ']';
1469 				maxl = 0;
1470 				break;
1471 			default:
1472 				maxl = 1;
1473 				break;
1474 			}
1475 		} else {
1476 			cp += 2;
1477 			maxl = 0;
1478 		}
1479 		stnam = cp;
1480 
1481 		/* Advance to the end of the name. */
1482 
1483 		naml = 0;
1484 		arg_complete = 1;
1485 		while (maxl == 0 || naml < maxl) {
1486 			if (*cp == '\0') {
1487 				mandoc_msg(MANDOCERR_ESC_BAD, ln,
1488 				    (int)(stesc - buf->buf), "%s", stesc);
1489 				arg_complete = 0;
1490 				break;
1491 			}
1492 			if (maxl == 0 && *cp == term) {
1493 				cp++;
1494 				break;
1495 			}
1496 			if (*cp++ != '\\' || *esct != 'w') {
1497 				naml++;
1498 				continue;
1499 			}
1500 			switch (mandoc_escape(&cp, NULL, NULL)) {
1501 			case ESCAPE_SPECIAL:
1502 			case ESCAPE_UNICODE:
1503 			case ESCAPE_NUMBERED:
1504 			case ESCAPE_UNDEF:
1505 			case ESCAPE_OVERSTRIKE:
1506 				naml++;
1507 				break;
1508 			default:
1509 				break;
1510 			}
1511 		}
1512 
1513 		/*
1514 		 * Retrieve the replacement string; if it is
1515 		 * undefined, resume searching for escapes.
1516 		 */
1517 
1518 		switch (*esct) {
1519 		case '*':
1520 			if (arg_complete) {
1521 				deftype = ROFFDEF_USER | ROFFDEF_PRE;
1522 				res = roff_getstrn(r, stnam, naml, &deftype);
1523 
1524 				/*
1525 				 * If not overriden, let \*(.T
1526 				 * through to the formatters.
1527 				 */
1528 
1529 				if (res == NULL && naml == 2 &&
1530 				    stnam[0] == '.' && stnam[1] == 'T') {
1531 					roff_setstrn(&r->strtab,
1532 					    ".T", 2, NULL, 0, 0);
1533 					stesc--;
1534 					continue;
1535 				}
1536 			}
1537 			break;
1538 		case '$':
1539 			if (r->mstackpos < 0) {
1540 				mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1541 				    (int)(stesc - buf->buf), "%.3s", stesc);
1542 				break;
1543 			}
1544 			ctx = r->mstack + r->mstackpos;
1545 			npos = esct[1] - '1';
1546 			if (npos >= 0 && npos <= 8) {
1547 				res = npos < ctx->argc ?
1548 				    ctx->argv[npos] : "";
1549 				break;
1550 			}
1551 			if (esct[1] == '*')
1552 				quote_args = 0;
1553 			else if (esct[1] == '@')
1554 				quote_args = 1;
1555 			else {
1556 				mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1557 				    (int)(stesc - buf->buf), "%.3s", stesc);
1558 				break;
1559 			}
1560 			asz = 0;
1561 			for (npos = 0; npos < ctx->argc; npos++) {
1562 				if (npos)
1563 					asz++;  /* blank */
1564 				if (quote_args)
1565 					asz += 2;  /* quotes */
1566 				asz += strlen(ctx->argv[npos]);
1567 			}
1568 			if (asz != 3) {
1569 				rsz = buf->sz - (stesc - buf->buf) - 3;
1570 				if (asz < 3)
1571 					memmove(stesc + asz, stesc + 3, rsz);
1572 				buf->sz += asz - 3;
1573 				nbuf = mandoc_realloc(buf->buf, buf->sz);
1574 				start = nbuf + pos;
1575 				stesc = nbuf + (stesc - buf->buf);
1576 				buf->buf = nbuf;
1577 				if (asz > 3)
1578 					memmove(stesc + asz, stesc + 3, rsz);
1579 			}
1580 			for (npos = 0; npos < ctx->argc; npos++) {
1581 				if (npos)
1582 					*stesc++ = ' ';
1583 				if (quote_args)
1584 					*stesc++ = '"';
1585 				cp = ctx->argv[npos];
1586 				while (*cp != '\0')
1587 					*stesc++ = *cp++;
1588 				if (quote_args)
1589 					*stesc++ = '"';
1590 			}
1591 			continue;
1592 		case 'B':
1593 			npos = 0;
1594 			ubuf[0] = arg_complete &&
1595 			    roff_evalnum(r, ln, stnam, &npos,
1596 			      NULL, ROFFNUM_SCALE) &&
1597 			    stnam + npos + 1 == cp ? '1' : '0';
1598 			ubuf[1] = '\0';
1599 			break;
1600 		case 'n':
1601 			if (arg_complete)
1602 				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1603 				    roff_getregn(r, stnam, naml, sign));
1604 			else
1605 				ubuf[0] = '\0';
1606 			break;
1607 		case 'w':
1608 			/* use even incomplete args */
1609 			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1610 			    24 * (int)naml);
1611 			break;
1612 		}
1613 
1614 		if (res == NULL) {
1615 			if (*esct == '*')
1616 				mandoc_msg(MANDOCERR_STR_UNDEF,
1617 				    ln, (int)(stesc - buf->buf),
1618 				    "%.*s", (int)naml, stnam);
1619 			res = "";
1620 		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1621 			mandoc_msg(MANDOCERR_ROFFLOOP,
1622 			    ln, (int)(stesc - buf->buf), NULL);
1623 			return ROFF_IGN;
1624 		}
1625 
1626 		/* Replace the escape sequence by the string. */
1627 
1628 		*stesc = '\0';
1629 		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1630 		    buf->buf, res, cp) + 1;
1631 
1632 		/* Prepare for the next replacement. */
1633 
1634 		start = nbuf + pos;
1635 		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1636 		free(buf->buf);
1637 		buf->buf = nbuf;
1638 	}
1639 	return ROFF_CONT;
1640 }
1641 
1642 /*
1643  * Parse a quoted or unquoted roff-style request or macro argument.
1644  * Return a pointer to the parsed argument, which is either the original
1645  * pointer or advanced by one byte in case the argument is quoted.
1646  * NUL-terminate the argument in place.
1647  * Collapse pairs of quotes inside quoted arguments.
1648  * Advance the argument pointer to the next argument,
1649  * or to the NUL byte terminating the argument line.
1650  */
1651 char *
1652 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1653 {
1654 	struct buf	 buf;
1655 	char		*cp, *start;
1656 	int		 newesc, pairs, quoted, white;
1657 
1658 	/* Quoting can only start with a new word. */
1659 	start = *cpp;
1660 	quoted = 0;
1661 	if ('"' == *start) {
1662 		quoted = 1;
1663 		start++;
1664 	}
1665 
1666 	newesc = pairs = white = 0;
1667 	for (cp = start; '\0' != *cp; cp++) {
1668 
1669 		/*
1670 		 * Move the following text left
1671 		 * after quoted quotes and after "\\" and "\t".
1672 		 */
1673 		if (pairs)
1674 			cp[-pairs] = cp[0];
1675 
1676 		if ('\\' == cp[0]) {
1677 			/*
1678 			 * In copy mode, translate double to single
1679 			 * backslashes and backslash-t to literal tabs.
1680 			 */
1681 			switch (cp[1]) {
1682 			case 'a':
1683 			case 't':
1684 				cp[-pairs] = '\t';
1685 				pairs++;
1686 				cp++;
1687 				break;
1688 			case '\\':
1689 				newesc = 1;
1690 				cp[-pairs] = ASCII_ESC;
1691 				pairs++;
1692 				cp++;
1693 				break;
1694 			case ' ':
1695 				/* Skip escaped blanks. */
1696 				if (0 == quoted)
1697 					cp++;
1698 				break;
1699 			default:
1700 				break;
1701 			}
1702 		} else if (0 == quoted) {
1703 			if (' ' == cp[0]) {
1704 				/* Unescaped blanks end unquoted args. */
1705 				white = 1;
1706 				break;
1707 			}
1708 		} else if ('"' == cp[0]) {
1709 			if ('"' == cp[1]) {
1710 				/* Quoted quotes collapse. */
1711 				pairs++;
1712 				cp++;
1713 			} else {
1714 				/* Unquoted quotes end quoted args. */
1715 				quoted = 2;
1716 				break;
1717 			}
1718 		}
1719 	}
1720 
1721 	/* Quoted argument without a closing quote. */
1722 	if (1 == quoted)
1723 		mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1724 
1725 	/* NUL-terminate this argument and move to the next one. */
1726 	if (pairs)
1727 		cp[-pairs] = '\0';
1728 	if ('\0' != *cp) {
1729 		*cp++ = '\0';
1730 		while (' ' == *cp)
1731 			cp++;
1732 	}
1733 	*pos += (int)(cp - start) + (quoted ? 1 : 0);
1734 	*cpp = cp;
1735 
1736 	if ('\0' == *cp && (white || ' ' == cp[-1]))
1737 		mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1738 
1739 	start = mandoc_strdup(start);
1740 	if (newesc == 0)
1741 		return start;
1742 
1743 	buf.buf = start;
1744 	buf.sz = strlen(start) + 1;
1745 	buf.next = NULL;
1746 	if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1747 		free(buf.buf);
1748 		buf.buf = mandoc_strdup("");
1749 	}
1750 	return buf.buf;
1751 }
1752 
1753 
1754 /*
1755  * Process text streams.
1756  */
1757 static int
1758 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1759 {
1760 	size_t		 sz;
1761 	const char	*start;
1762 	char		*p;
1763 	int		 isz;
1764 	enum mandoc_esc	 esc;
1765 
1766 	/* Spring the input line trap. */
1767 
1768 	if (roffit_lines == 1) {
1769 		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1770 		free(buf->buf);
1771 		buf->buf = p;
1772 		buf->sz = isz + 1;
1773 		*offs = 0;
1774 		free(roffit_macro);
1775 		roffit_lines = 0;
1776 		return ROFF_REPARSE;
1777 	} else if (roffit_lines > 1)
1778 		--roffit_lines;
1779 
1780 	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1781 		if (roffce_lines < 1) {
1782 			r->man->last = roffce_node;
1783 			r->man->next = ROFF_NEXT_SIBLING;
1784 			roffce_lines = 0;
1785 			roffce_node = NULL;
1786 		} else
1787 			roffce_lines--;
1788 	}
1789 
1790 	/* Convert all breakable hyphens into ASCII_HYPH. */
1791 
1792 	start = p = buf->buf + pos;
1793 
1794 	while (*p != '\0') {
1795 		sz = strcspn(p, "-\\");
1796 		p += sz;
1797 
1798 		if (*p == '\0')
1799 			break;
1800 
1801 		if (*p == '\\') {
1802 			/* Skip over escapes. */
1803 			p++;
1804 			esc = mandoc_escape((const char **)&p, NULL, NULL);
1805 			if (esc == ESCAPE_ERROR)
1806 				break;
1807 			while (*p == '-')
1808 				p++;
1809 			continue;
1810 		} else if (p == start) {
1811 			p++;
1812 			continue;
1813 		}
1814 
1815 		if (isalpha((unsigned char)p[-1]) &&
1816 		    isalpha((unsigned char)p[1]))
1817 			*p = ASCII_HYPH;
1818 		p++;
1819 	}
1820 	return ROFF_CONT;
1821 }
1822 
1823 int
1824 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1825 {
1826 	enum roff_tok	 t;
1827 	int		 e;
1828 	int		 pos;	/* parse point */
1829 	int		 spos;	/* saved parse point for messages */
1830 	int		 ppos;	/* original offset in buf->buf */
1831 	int		 ctl;	/* macro line (boolean) */
1832 
1833 	ppos = pos = *offs;
1834 
1835 	/* Handle in-line equation delimiters. */
1836 
1837 	if (r->tbl == NULL &&
1838 	    r->last_eqn != NULL && r->last_eqn->delim &&
1839 	    (r->eqn == NULL || r->eqn_inline)) {
1840 		e = roff_eqndelim(r, buf, pos);
1841 		if (e == ROFF_REPARSE)
1842 			return e;
1843 		assert(e == ROFF_CONT);
1844 	}
1845 
1846 	/* Expand some escape sequences. */
1847 
1848 	e = roff_expand(r, buf, ln, pos, r->escape);
1849 	if ((e & ROFF_MASK) == ROFF_IGN)
1850 		return e;
1851 	assert(e == ROFF_CONT);
1852 
1853 	ctl = roff_getcontrol(r, buf->buf, &pos);
1854 
1855 	/*
1856 	 * First, if a scope is open and we're not a macro, pass the
1857 	 * text through the macro's filter.
1858 	 * Equations process all content themselves.
1859 	 * Tables process almost all content themselves, but we want
1860 	 * to warn about macros before passing it there.
1861 	 */
1862 
1863 	if (r->last != NULL && ! ctl) {
1864 		t = r->last->tok;
1865 		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1866 		if ((e & ROFF_MASK) == ROFF_IGN)
1867 			return e;
1868 		e &= ~ROFF_MASK;
1869 	} else
1870 		e = ROFF_IGN;
1871 	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1872 		eqn_read(r->eqn, buf->buf + ppos);
1873 		return e;
1874 	}
1875 	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1876 		tbl_read(r->tbl, ln, buf->buf, ppos);
1877 		roff_addtbl(r->man, ln, r->tbl);
1878 		return e;
1879 	}
1880 	if ( ! ctl) {
1881 		r->options &= ~MPARSE_COMMENT;
1882 		return roff_parsetext(r, buf, pos, offs) | e;
1883 	}
1884 
1885 	/* Skip empty request lines. */
1886 
1887 	if (buf->buf[pos] == '"') {
1888 		mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1889 		return ROFF_IGN;
1890 	} else if (buf->buf[pos] == '\0')
1891 		return ROFF_IGN;
1892 
1893 	/*
1894 	 * If a scope is open, go to the child handler for that macro,
1895 	 * as it may want to preprocess before doing anything with it.
1896 	 * Don't do so if an equation is open.
1897 	 */
1898 
1899 	if (r->last) {
1900 		t = r->last->tok;
1901 		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1902 	}
1903 
1904 	/* No scope is open.  This is a new request or macro. */
1905 
1906 	r->options &= ~MPARSE_COMMENT;
1907 	spos = pos;
1908 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1909 
1910 	/* Tables ignore most macros. */
1911 
1912 	if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1913 	    t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1914 		mandoc_msg(MANDOCERR_TBLMACRO,
1915 		    ln, pos, "%s", buf->buf + spos);
1916 		if (t != TOKEN_NONE)
1917 			return ROFF_IGN;
1918 		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1919 			pos++;
1920 		while (buf->buf[pos] == ' ')
1921 			pos++;
1922 		tbl_read(r->tbl, ln, buf->buf, pos);
1923 		roff_addtbl(r->man, ln, r->tbl);
1924 		return ROFF_IGN;
1925 	}
1926 
1927 	/* For now, let high level macros abort .ce mode. */
1928 
1929 	if (ctl && roffce_node != NULL &&
1930 	    (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1931 	     t == ROFF_TH || t == ROFF_TS)) {
1932 		r->man->last = roffce_node;
1933 		r->man->next = ROFF_NEXT_SIBLING;
1934 		roffce_lines = 0;
1935 		roffce_node = NULL;
1936 	}
1937 
1938 	/*
1939 	 * This is neither a roff request nor a user-defined macro.
1940 	 * Let the standard macro set parsers handle it.
1941 	 */
1942 
1943 	if (t == TOKEN_NONE)
1944 		return ROFF_CONT;
1945 
1946 	/* Execute a roff request or a user defined macro. */
1947 
1948 	return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1949 }
1950 
1951 /*
1952  * Internal interface function to tell the roff parser that execution
1953  * of the current macro ended.  This is required because macro
1954  * definitions usually do not end with a .return request.
1955  */
1956 void
1957 roff_userret(struct roff *r)
1958 {
1959 	struct mctx	*ctx;
1960 	int		 i;
1961 
1962 	assert(r->mstackpos >= 0);
1963 	ctx = r->mstack + r->mstackpos;
1964 	for (i = 0; i < ctx->argc; i++)
1965 		free(ctx->argv[i]);
1966 	ctx->argc = 0;
1967 	r->mstackpos--;
1968 }
1969 
1970 void
1971 roff_endparse(struct roff *r)
1972 {
1973 	if (r->last != NULL)
1974 		mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1975 		    r->last->col, "%s", roff_name[r->last->tok]);
1976 
1977 	if (r->eqn != NULL) {
1978 		mandoc_msg(MANDOCERR_BLK_NOEND,
1979 		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1980 		eqn_parse(r->eqn);
1981 		r->eqn = NULL;
1982 	}
1983 
1984 	if (r->tbl != NULL) {
1985 		tbl_end(r->tbl, 1);
1986 		r->tbl = NULL;
1987 	}
1988 }
1989 
1990 /*
1991  * Parse a roff node's type from the input buffer.  This must be in the
1992  * form of ".foo xxx" in the usual way.
1993  */
1994 static enum roff_tok
1995 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1996 {
1997 	char		*cp;
1998 	const char	*mac;
1999 	size_t		 maclen;
2000 	int		 deftype;
2001 	enum roff_tok	 t;
2002 
2003 	cp = buf + *pos;
2004 
2005 	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2006 		return TOKEN_NONE;
2007 
2008 	mac = cp;
2009 	maclen = roff_getname(r, &cp, ln, ppos);
2010 
2011 	deftype = ROFFDEF_USER | ROFFDEF_REN;
2012 	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2013 	switch (deftype) {
2014 	case ROFFDEF_USER:
2015 		t = ROFF_USERDEF;
2016 		break;
2017 	case ROFFDEF_REN:
2018 		t = ROFF_RENAMED;
2019 		break;
2020 	default:
2021 		t = roffhash_find(r->reqtab, mac, maclen);
2022 		break;
2023 	}
2024 	if (t != TOKEN_NONE)
2025 		*pos = cp - buf;
2026 	else if (deftype == ROFFDEF_UNDEF) {
2027 		/* Using an undefined macro defines it to be empty. */
2028 		roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2029 		roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2030 	}
2031 	return t;
2032 }
2033 
2034 /* --- handling of request blocks ----------------------------------------- */
2035 
2036 /*
2037  * Close a macro definition block or an "ignore" block.
2038  */
2039 static int
2040 roff_cblock(ROFF_ARGS)
2041 {
2042 	int	 rr;
2043 
2044 	if (r->last == NULL) {
2045 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2046 		return ROFF_IGN;
2047 	}
2048 
2049 	switch (r->last->tok) {
2050 	case ROFF_am:
2051 	case ROFF_ami:
2052 	case ROFF_de:
2053 	case ROFF_dei:
2054 	case ROFF_ig:
2055 		break;
2056 	case ROFF_am1:
2057 	case ROFF_de1:
2058 		/* Remapped in roff_block(). */
2059 		abort();
2060 	default:
2061 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2062 		return ROFF_IGN;
2063 	}
2064 
2065 	roffnode_pop(r);
2066 	roffnode_cleanscope(r);
2067 
2068 	/*
2069 	 * If a conditional block with braces is still open,
2070 	 * check for "\}" block end markers.
2071 	 */
2072 
2073 	if (r->last != NULL && r->last->endspan < 0) {
2074 		rr = 1;  /* If arguments follow "\}", warn about them. */
2075 		roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2076 	}
2077 
2078 	if (buf->buf[pos] != '\0')
2079 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2080 		    ".. %s", buf->buf + pos);
2081 
2082 	return ROFF_IGN;
2083 }
2084 
2085 /*
2086  * Pop all nodes ending at the end of the current input line.
2087  * Return the number of loops ended.
2088  */
2089 static int
2090 roffnode_cleanscope(struct roff *r)
2091 {
2092 	int inloop;
2093 
2094 	inloop = 0;
2095 	while (r->last != NULL && r->last->endspan > 0) {
2096 		if (--r->last->endspan != 0)
2097 			break;
2098 		inloop += roffnode_pop(r);
2099 	}
2100 	return inloop;
2101 }
2102 
2103 /*
2104  * Handle the closing "\}" of a conditional block.
2105  * Apart from generating warnings, this only pops nodes.
2106  * Return the number of loops ended.
2107  */
2108 static int
2109 roff_ccond(struct roff *r, int ln, int ppos)
2110 {
2111 	if (NULL == r->last) {
2112 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2113 		return 0;
2114 	}
2115 
2116 	switch (r->last->tok) {
2117 	case ROFF_el:
2118 	case ROFF_ie:
2119 	case ROFF_if:
2120 	case ROFF_while:
2121 		break;
2122 	default:
2123 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2124 		return 0;
2125 	}
2126 
2127 	if (r->last->endspan > -1) {
2128 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2129 		return 0;
2130 	}
2131 
2132 	return roffnode_pop(r) + roffnode_cleanscope(r);
2133 }
2134 
2135 static int
2136 roff_block(ROFF_ARGS)
2137 {
2138 	const char	*name, *value;
2139 	char		*call, *cp, *iname, *rname;
2140 	size_t		 csz, namesz, rsz;
2141 	int		 deftype;
2142 
2143 	/* Ignore groff compatibility mode for now. */
2144 
2145 	if (tok == ROFF_de1)
2146 		tok = ROFF_de;
2147 	else if (tok == ROFF_dei1)
2148 		tok = ROFF_dei;
2149 	else if (tok == ROFF_am1)
2150 		tok = ROFF_am;
2151 	else if (tok == ROFF_ami1)
2152 		tok = ROFF_ami;
2153 
2154 	/* Parse the macro name argument. */
2155 
2156 	cp = buf->buf + pos;
2157 	if (tok == ROFF_ig) {
2158 		iname = NULL;
2159 		namesz = 0;
2160 	} else {
2161 		iname = cp;
2162 		namesz = roff_getname(r, &cp, ln, ppos);
2163 		iname[namesz] = '\0';
2164 	}
2165 
2166 	/* Resolve the macro name argument if it is indirect. */
2167 
2168 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2169 		deftype = ROFFDEF_USER;
2170 		name = roff_getstrn(r, iname, namesz, &deftype);
2171 		if (name == NULL) {
2172 			mandoc_msg(MANDOCERR_STR_UNDEF,
2173 			    ln, (int)(iname - buf->buf),
2174 			    "%.*s", (int)namesz, iname);
2175 			namesz = 0;
2176 		} else
2177 			namesz = strlen(name);
2178 	} else
2179 		name = iname;
2180 
2181 	if (namesz == 0 && tok != ROFF_ig) {
2182 		mandoc_msg(MANDOCERR_REQ_EMPTY,
2183 		    ln, ppos, "%s", roff_name[tok]);
2184 		return ROFF_IGN;
2185 	}
2186 
2187 	roffnode_push(r, tok, name, ln, ppos);
2188 
2189 	/*
2190 	 * At the beginning of a `de' macro, clear the existing string
2191 	 * with the same name, if there is one.  New content will be
2192 	 * appended from roff_block_text() in multiline mode.
2193 	 */
2194 
2195 	if (tok == ROFF_de || tok == ROFF_dei) {
2196 		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2197 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2198 	} else if (tok == ROFF_am || tok == ROFF_ami) {
2199 		deftype = ROFFDEF_ANY;
2200 		value = roff_getstrn(r, iname, namesz, &deftype);
2201 		switch (deftype) {  /* Before appending, ... */
2202 		case ROFFDEF_PRE: /* copy predefined to user-defined. */
2203 			roff_setstrn(&r->strtab, name, namesz,
2204 			    value, strlen(value), 0);
2205 			break;
2206 		case ROFFDEF_REN: /* call original standard macro. */
2207 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2208 			    (int)strlen(value), value);
2209 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2210 			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2211 			free(call);
2212 			break;
2213 		case ROFFDEF_STD:  /* rename and call standard macro. */
2214 			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2215 			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2216 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2217 			    (int)rsz, rname);
2218 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2219 			free(call);
2220 			free(rname);
2221 			break;
2222 		default:
2223 			break;
2224 		}
2225 	}
2226 
2227 	if (*cp == '\0')
2228 		return ROFF_IGN;
2229 
2230 	/* Get the custom end marker. */
2231 
2232 	iname = cp;
2233 	namesz = roff_getname(r, &cp, ln, ppos);
2234 
2235 	/* Resolve the end marker if it is indirect. */
2236 
2237 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2238 		deftype = ROFFDEF_USER;
2239 		name = roff_getstrn(r, iname, namesz, &deftype);
2240 		if (name == NULL) {
2241 			mandoc_msg(MANDOCERR_STR_UNDEF,
2242 			    ln, (int)(iname - buf->buf),
2243 			    "%.*s", (int)namesz, iname);
2244 			namesz = 0;
2245 		} else
2246 			namesz = strlen(name);
2247 	} else
2248 		name = iname;
2249 
2250 	if (namesz)
2251 		r->last->end = mandoc_strndup(name, namesz);
2252 
2253 	if (*cp != '\0')
2254 		mandoc_msg(MANDOCERR_ARG_EXCESS,
2255 		    ln, pos, ".%s ... %s", roff_name[tok], cp);
2256 
2257 	return ROFF_IGN;
2258 }
2259 
2260 static int
2261 roff_block_sub(ROFF_ARGS)
2262 {
2263 	enum roff_tok	t;
2264 	int		i, j;
2265 
2266 	/*
2267 	 * First check whether a custom macro exists at this level.  If
2268 	 * it does, then check against it.  This is some of groff's
2269 	 * stranger behaviours.  If we encountered a custom end-scope
2270 	 * tag and that tag also happens to be a "real" macro, then we
2271 	 * need to try interpreting it again as a real macro.  If it's
2272 	 * not, then return ignore.  Else continue.
2273 	 */
2274 
2275 	if (r->last->end) {
2276 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
2277 			if (buf->buf[i] != r->last->end[j])
2278 				break;
2279 
2280 		if (r->last->end[j] == '\0' &&
2281 		    (buf->buf[i] == '\0' ||
2282 		     buf->buf[i] == ' ' ||
2283 		     buf->buf[i] == '\t')) {
2284 			roffnode_pop(r);
2285 			roffnode_cleanscope(r);
2286 
2287 			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2288 				i++;
2289 
2290 			pos = i;
2291 			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2292 			    TOKEN_NONE)
2293 				return ROFF_RERUN;
2294 			return ROFF_IGN;
2295 		}
2296 	}
2297 
2298 	/*
2299 	 * If we have no custom end-query or lookup failed, then try
2300 	 * pulling it out of the hashtable.
2301 	 */
2302 
2303 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2304 
2305 	if (t != ROFF_cblock) {
2306 		if (tok != ROFF_ig)
2307 			roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2308 		return ROFF_IGN;
2309 	}
2310 
2311 	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2312 }
2313 
2314 static int
2315 roff_block_text(ROFF_ARGS)
2316 {
2317 
2318 	if (tok != ROFF_ig)
2319 		roff_setstr(r, r->last->name, buf->buf + pos, 2);
2320 
2321 	return ROFF_IGN;
2322 }
2323 
2324 /*
2325  * Check for a closing "\}" and handle it.
2326  * In this function, the final "int *offs" argument is used for
2327  * different purposes than elsewhere:
2328  * Input: *offs == 0: caller wants to discard arguments following \}
2329  *        *offs == 1: caller wants to preserve text following \}
2330  * Output: *offs = 0: tell caller to discard input line
2331  *         *offs = 1: tell caller to use input line
2332  */
2333 static int
2334 roff_cond_checkend(ROFF_ARGS)
2335 {
2336 	char		*ep;
2337 	int		 endloop, irc, rr;
2338 
2339 	irc = ROFF_IGN;
2340 	rr = r->last->rule;
2341 	endloop = tok != ROFF_while ? ROFF_IGN :
2342 	    rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2343 	if (roffnode_cleanscope(r))
2344 		irc |= endloop;
2345 
2346 	/*
2347 	 * If "\}" occurs on a macro line without a preceding macro or
2348 	 * a text line contains nothing else, drop the line completely.
2349 	 */
2350 
2351 	ep = buf->buf + pos;
2352 	if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2353 		rr = 0;
2354 
2355 	/*
2356 	 * The closing delimiter "\}" rewinds the conditional scope
2357 	 * but is otherwise ignored when interpreting the line.
2358 	 */
2359 
2360 	while ((ep = strchr(ep, '\\')) != NULL) {
2361 		switch (ep[1]) {
2362 		case '}':
2363 			if (ep[2] == '\0')
2364 				ep[0] = '\0';
2365 			else if (rr)
2366 				ep[1] = '&';
2367 			else
2368 				memmove(ep, ep + 2, strlen(ep + 2) + 1);
2369 			if (roff_ccond(r, ln, ep - buf->buf))
2370 				irc |= endloop;
2371 			break;
2372 		case '\0':
2373 			++ep;
2374 			break;
2375 		default:
2376 			ep += 2;
2377 			break;
2378 		}
2379 	}
2380 	*offs = rr;
2381 	return irc;
2382 }
2383 
2384 /*
2385  * Parse and process a request or macro line in conditional scope.
2386  */
2387 static int
2388 roff_cond_sub(ROFF_ARGS)
2389 {
2390 	struct roffnode	*bl;
2391 	int		 irc, rr;
2392 	enum roff_tok	 t;
2393 
2394 	rr = 0;  /* If arguments follow "\}", skip them. */
2395 	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2396 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2397 
2398 	/* For now, let high level macros abort .ce mode. */
2399 
2400 	if (roffce_node != NULL &&
2401 	    (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
2402              t == ROFF_TH || t == ROFF_TS)) {
2403 		r->man->last = roffce_node;
2404 		r->man->next = ROFF_NEXT_SIBLING;
2405 		roffce_lines = 0;
2406 		roffce_node = NULL;
2407 	}
2408 
2409 	/*
2410 	 * Fully handle known macros when they are structurally
2411 	 * required or when the conditional evaluated to true.
2412 	 */
2413 
2414 	if (t == ROFF_break) {
2415 		if (irc & ROFF_LOOPMASK)
2416 			irc = ROFF_IGN | ROFF_LOOPEXIT;
2417 		else if (rr) {
2418 			for (bl = r->last; bl != NULL; bl = bl->parent) {
2419 				bl->rule = 0;
2420 				if (bl->tok == ROFF_while)
2421 					break;
2422 			}
2423 		}
2424 	} else if (t != TOKEN_NONE &&
2425 	    (rr || roffs[t].flags & ROFFMAC_STRUCT))
2426 		irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2427 	else
2428 		irc |= rr ? ROFF_CONT : ROFF_IGN;
2429 	return irc;
2430 }
2431 
2432 /*
2433  * Parse and process a text line in conditional scope.
2434  */
2435 static int
2436 roff_cond_text(ROFF_ARGS)
2437 {
2438 	int	 irc, rr;
2439 
2440 	rr = 1;  /* If arguments follow "\}", preserve them. */
2441 	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2442 	if (rr)
2443 		irc |= ROFF_CONT;
2444 	return irc;
2445 }
2446 
2447 /* --- handling of numeric and conditional expressions -------------------- */
2448 
2449 /*
2450  * Parse a single signed integer number.  Stop at the first non-digit.
2451  * If there is at least one digit, return success and advance the
2452  * parse point, else return failure and let the parse point unchanged.
2453  * Ignore overflows, treat them just like the C language.
2454  */
2455 static int
2456 roff_getnum(const char *v, int *pos, int *res, int flags)
2457 {
2458 	int	 myres, scaled, n, p;
2459 
2460 	if (NULL == res)
2461 		res = &myres;
2462 
2463 	p = *pos;
2464 	n = v[p] == '-';
2465 	if (n || v[p] == '+')
2466 		p++;
2467 
2468 	if (flags & ROFFNUM_WHITE)
2469 		while (isspace((unsigned char)v[p]))
2470 			p++;
2471 
2472 	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2473 		*res = 10 * *res + v[p] - '0';
2474 	if (p == *pos + n)
2475 		return 0;
2476 
2477 	if (n)
2478 		*res = -*res;
2479 
2480 	/* Each number may be followed by one optional scaling unit. */
2481 
2482 	switch (v[p]) {
2483 	case 'f':
2484 		scaled = *res * 65536;
2485 		break;
2486 	case 'i':
2487 		scaled = *res * 240;
2488 		break;
2489 	case 'c':
2490 		scaled = *res * 240 / 2.54;
2491 		break;
2492 	case 'v':
2493 	case 'P':
2494 		scaled = *res * 40;
2495 		break;
2496 	case 'm':
2497 	case 'n':
2498 		scaled = *res * 24;
2499 		break;
2500 	case 'p':
2501 		scaled = *res * 10 / 3;
2502 		break;
2503 	case 'u':
2504 		scaled = *res;
2505 		break;
2506 	case 'M':
2507 		scaled = *res * 6 / 25;
2508 		break;
2509 	default:
2510 		scaled = *res;
2511 		p--;
2512 		break;
2513 	}
2514 	if (flags & ROFFNUM_SCALE)
2515 		*res = scaled;
2516 
2517 	*pos = p + 1;
2518 	return 1;
2519 }
2520 
2521 /*
2522  * Evaluate a string comparison condition.
2523  * The first character is the delimiter.
2524  * Succeed if the string up to its second occurrence
2525  * matches the string up to its third occurence.
2526  * Advance the cursor after the third occurrence
2527  * or lacking that, to the end of the line.
2528  */
2529 static int
2530 roff_evalstrcond(const char *v, int *pos)
2531 {
2532 	const char	*s1, *s2, *s3;
2533 	int		 match;
2534 
2535 	match = 0;
2536 	s1 = v + *pos;		/* initial delimiter */
2537 	s2 = s1 + 1;		/* for scanning the first string */
2538 	s3 = strchr(s2, *s1);	/* for scanning the second string */
2539 
2540 	if (NULL == s3)		/* found no middle delimiter */
2541 		goto out;
2542 
2543 	while ('\0' != *++s3) {
2544 		if (*s2 != *s3) {  /* mismatch */
2545 			s3 = strchr(s3, *s1);
2546 			break;
2547 		}
2548 		if (*s3 == *s1) {  /* found the final delimiter */
2549 			match = 1;
2550 			break;
2551 		}
2552 		s2++;
2553 	}
2554 
2555 out:
2556 	if (NULL == s3)
2557 		s3 = strchr(s2, '\0');
2558 	else if (*s3 != '\0')
2559 		s3++;
2560 	*pos = s3 - v;
2561 	return match;
2562 }
2563 
2564 /*
2565  * Evaluate an optionally negated single character, numerical,
2566  * or string condition.
2567  */
2568 static int
2569 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2570 {
2571 	const char	*start, *end;
2572 	char		*cp, *name;
2573 	size_t		 sz;
2574 	int		 deftype, len, number, savepos, istrue, wanttrue;
2575 
2576 	if ('!' == v[*pos]) {
2577 		wanttrue = 0;
2578 		(*pos)++;
2579 	} else
2580 		wanttrue = 1;
2581 
2582 	switch (v[*pos]) {
2583 	case '\0':
2584 		return 0;
2585 	case 'n':
2586 	case 'o':
2587 		(*pos)++;
2588 		return wanttrue;
2589 	case 'e':
2590 	case 't':
2591 	case 'v':
2592 		(*pos)++;
2593 		return !wanttrue;
2594 	case 'c':
2595 		do {
2596 			(*pos)++;
2597 		} while (v[*pos] == ' ');
2598 
2599 		/*
2600 		 * Quirk for groff compatibility:
2601 		 * The horizontal tab is neither available nor unavailable.
2602 		 */
2603 
2604 		if (v[*pos] == '\t') {
2605 			(*pos)++;
2606 			return 0;
2607 		}
2608 
2609 		/* Printable ASCII characters are available. */
2610 
2611 		if (v[*pos] != '\\') {
2612 			(*pos)++;
2613 			return wanttrue;
2614 		}
2615 
2616 		end = v + ++*pos;
2617 		switch (mandoc_escape(&end, &start, &len)) {
2618 		case ESCAPE_SPECIAL:
2619 			istrue = mchars_spec2cp(start, len) != -1;
2620 			break;
2621 		case ESCAPE_UNICODE:
2622 			istrue = 1;
2623 			break;
2624 		case ESCAPE_NUMBERED:
2625 			istrue = mchars_num2char(start, len) != -1;
2626 			break;
2627 		default:
2628 			istrue = !wanttrue;
2629 			break;
2630 		}
2631 		*pos = end - v;
2632 		return istrue == wanttrue;
2633 	case 'd':
2634 	case 'r':
2635 		cp = v + *pos + 1;
2636 		while (*cp == ' ')
2637 			cp++;
2638 		name = cp;
2639 		sz = roff_getname(r, &cp, ln, cp - v);
2640 		if (sz == 0)
2641 			istrue = 0;
2642 		else if (v[*pos] == 'r')
2643 			istrue = roff_hasregn(r, name, sz);
2644 		else {
2645 			deftype = ROFFDEF_ANY;
2646 		        roff_getstrn(r, name, sz, &deftype);
2647 			istrue = !!deftype;
2648 		}
2649 		*pos = (name + sz) - v;
2650 		return istrue == wanttrue;
2651 	default:
2652 		break;
2653 	}
2654 
2655 	savepos = *pos;
2656 	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2657 		return (number > 0) == wanttrue;
2658 	else if (*pos == savepos)
2659 		return roff_evalstrcond(v, pos) == wanttrue;
2660 	else
2661 		return 0;
2662 }
2663 
2664 static int
2665 roff_line_ignore(ROFF_ARGS)
2666 {
2667 
2668 	return ROFF_IGN;
2669 }
2670 
2671 static int
2672 roff_insec(ROFF_ARGS)
2673 {
2674 
2675 	mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2676 	return ROFF_IGN;
2677 }
2678 
2679 static int
2680 roff_unsupp(ROFF_ARGS)
2681 {
2682 
2683 	mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2684 	return ROFF_IGN;
2685 }
2686 
2687 static int
2688 roff_cond(ROFF_ARGS)
2689 {
2690 	int	 irc;
2691 
2692 	roffnode_push(r, tok, NULL, ln, ppos);
2693 
2694 	/*
2695 	 * An `.el' has no conditional body: it will consume the value
2696 	 * of the current rstack entry set in prior `ie' calls or
2697 	 * defaults to DENY.
2698 	 *
2699 	 * If we're not an `el', however, then evaluate the conditional.
2700 	 */
2701 
2702 	r->last->rule = tok == ROFF_el ?
2703 	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2704 	    roff_evalcond(r, ln, buf->buf, &pos);
2705 
2706 	/*
2707 	 * An if-else will put the NEGATION of the current evaluated
2708 	 * conditional into the stack of rules.
2709 	 */
2710 
2711 	if (tok == ROFF_ie) {
2712 		if (r->rstackpos + 1 == r->rstacksz) {
2713 			r->rstacksz += 16;
2714 			r->rstack = mandoc_reallocarray(r->rstack,
2715 			    r->rstacksz, sizeof(int));
2716 		}
2717 		r->rstack[++r->rstackpos] = !r->last->rule;
2718 	}
2719 
2720 	/* If the parent has false as its rule, then so do we. */
2721 
2722 	if (r->last->parent && !r->last->parent->rule)
2723 		r->last->rule = 0;
2724 
2725 	/*
2726 	 * Determine scope.
2727 	 * If there is nothing on the line after the conditional,
2728 	 * not even whitespace, use next-line scope.
2729 	 * Except that .while does not support next-line scope.
2730 	 */
2731 
2732 	if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2733 		r->last->endspan = 2;
2734 		goto out;
2735 	}
2736 
2737 	while (buf->buf[pos] == ' ')
2738 		pos++;
2739 
2740 	/* An opening brace requests multiline scope. */
2741 
2742 	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2743 		r->last->endspan = -1;
2744 		pos += 2;
2745 		while (buf->buf[pos] == ' ')
2746 			pos++;
2747 		goto out;
2748 	}
2749 
2750 	/*
2751 	 * Anything else following the conditional causes
2752 	 * single-line scope.  Warn if the scope contains
2753 	 * nothing but trailing whitespace.
2754 	 */
2755 
2756 	if (buf->buf[pos] == '\0')
2757 		mandoc_msg(MANDOCERR_COND_EMPTY,
2758 		    ln, ppos, "%s", roff_name[tok]);
2759 
2760 	r->last->endspan = 1;
2761 
2762 out:
2763 	*offs = pos;
2764 	irc = ROFF_RERUN;
2765 	if (tok == ROFF_while)
2766 		irc |= ROFF_WHILE;
2767 	return irc;
2768 }
2769 
2770 static int
2771 roff_ds(ROFF_ARGS)
2772 {
2773 	char		*string;
2774 	const char	*name;
2775 	size_t		 namesz;
2776 
2777 	/* Ignore groff compatibility mode for now. */
2778 
2779 	if (tok == ROFF_ds1)
2780 		tok = ROFF_ds;
2781 	else if (tok == ROFF_as1)
2782 		tok = ROFF_as;
2783 
2784 	/*
2785 	 * The first word is the name of the string.
2786 	 * If it is empty or terminated by an escape sequence,
2787 	 * abort the `ds' request without defining anything.
2788 	 */
2789 
2790 	name = string = buf->buf + pos;
2791 	if (*name == '\0')
2792 		return ROFF_IGN;
2793 
2794 	namesz = roff_getname(r, &string, ln, pos);
2795 	switch (name[namesz]) {
2796 	case '\\':
2797 		return ROFF_IGN;
2798 	case '\t':
2799 		string = buf->buf + pos + namesz;
2800 		break;
2801 	default:
2802 		break;
2803 	}
2804 
2805 	/* Read past the initial double-quote, if any. */
2806 	if (*string == '"')
2807 		string++;
2808 
2809 	/* The rest is the value. */
2810 	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2811 	    ROFF_as == tok);
2812 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2813 	return ROFF_IGN;
2814 }
2815 
2816 /*
2817  * Parse a single operator, one or two characters long.
2818  * If the operator is recognized, return success and advance the
2819  * parse point, else return failure and let the parse point unchanged.
2820  */
2821 static int
2822 roff_getop(const char *v, int *pos, char *res)
2823 {
2824 
2825 	*res = v[*pos];
2826 
2827 	switch (*res) {
2828 	case '+':
2829 	case '-':
2830 	case '*':
2831 	case '/':
2832 	case '%':
2833 	case '&':
2834 	case ':':
2835 		break;
2836 	case '<':
2837 		switch (v[*pos + 1]) {
2838 		case '=':
2839 			*res = 'l';
2840 			(*pos)++;
2841 			break;
2842 		case '>':
2843 			*res = '!';
2844 			(*pos)++;
2845 			break;
2846 		case '?':
2847 			*res = 'i';
2848 			(*pos)++;
2849 			break;
2850 		default:
2851 			break;
2852 		}
2853 		break;
2854 	case '>':
2855 		switch (v[*pos + 1]) {
2856 		case '=':
2857 			*res = 'g';
2858 			(*pos)++;
2859 			break;
2860 		case '?':
2861 			*res = 'a';
2862 			(*pos)++;
2863 			break;
2864 		default:
2865 			break;
2866 		}
2867 		break;
2868 	case '=':
2869 		if ('=' == v[*pos + 1])
2870 			(*pos)++;
2871 		break;
2872 	default:
2873 		return 0;
2874 	}
2875 	(*pos)++;
2876 
2877 	return *res;
2878 }
2879 
2880 /*
2881  * Evaluate either a parenthesized numeric expression
2882  * or a single signed integer number.
2883  */
2884 static int
2885 roff_evalpar(struct roff *r, int ln,
2886 	const char *v, int *pos, int *res, int flags)
2887 {
2888 
2889 	if ('(' != v[*pos])
2890 		return roff_getnum(v, pos, res, flags);
2891 
2892 	(*pos)++;
2893 	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2894 		return 0;
2895 
2896 	/*
2897 	 * Omission of the closing parenthesis
2898 	 * is an error in validation mode,
2899 	 * but ignored in evaluation mode.
2900 	 */
2901 
2902 	if (')' == v[*pos])
2903 		(*pos)++;
2904 	else if (NULL == res)
2905 		return 0;
2906 
2907 	return 1;
2908 }
2909 
2910 /*
2911  * Evaluate a complete numeric expression.
2912  * Proceed left to right, there is no concept of precedence.
2913  */
2914 static int
2915 roff_evalnum(struct roff *r, int ln, const char *v,
2916 	int *pos, int *res, int flags)
2917 {
2918 	int		 mypos, operand2;
2919 	char		 operator;
2920 
2921 	if (NULL == pos) {
2922 		mypos = 0;
2923 		pos = &mypos;
2924 	}
2925 
2926 	if (flags & ROFFNUM_WHITE)
2927 		while (isspace((unsigned char)v[*pos]))
2928 			(*pos)++;
2929 
2930 	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2931 		return 0;
2932 
2933 	while (1) {
2934 		if (flags & ROFFNUM_WHITE)
2935 			while (isspace((unsigned char)v[*pos]))
2936 				(*pos)++;
2937 
2938 		if ( ! roff_getop(v, pos, &operator))
2939 			break;
2940 
2941 		if (flags & ROFFNUM_WHITE)
2942 			while (isspace((unsigned char)v[*pos]))
2943 				(*pos)++;
2944 
2945 		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2946 			return 0;
2947 
2948 		if (flags & ROFFNUM_WHITE)
2949 			while (isspace((unsigned char)v[*pos]))
2950 				(*pos)++;
2951 
2952 		if (NULL == res)
2953 			continue;
2954 
2955 		switch (operator) {
2956 		case '+':
2957 			*res += operand2;
2958 			break;
2959 		case '-':
2960 			*res -= operand2;
2961 			break;
2962 		case '*':
2963 			*res *= operand2;
2964 			break;
2965 		case '/':
2966 			if (operand2 == 0) {
2967 				mandoc_msg(MANDOCERR_DIVZERO,
2968 					ln, *pos, "%s", v);
2969 				*res = 0;
2970 				break;
2971 			}
2972 			*res /= operand2;
2973 			break;
2974 		case '%':
2975 			if (operand2 == 0) {
2976 				mandoc_msg(MANDOCERR_DIVZERO,
2977 					ln, *pos, "%s", v);
2978 				*res = 0;
2979 				break;
2980 			}
2981 			*res %= operand2;
2982 			break;
2983 		case '<':
2984 			*res = *res < operand2;
2985 			break;
2986 		case '>':
2987 			*res = *res > operand2;
2988 			break;
2989 		case 'l':
2990 			*res = *res <= operand2;
2991 			break;
2992 		case 'g':
2993 			*res = *res >= operand2;
2994 			break;
2995 		case '=':
2996 			*res = *res == operand2;
2997 			break;
2998 		case '!':
2999 			*res = *res != operand2;
3000 			break;
3001 		case '&':
3002 			*res = *res && operand2;
3003 			break;
3004 		case ':':
3005 			*res = *res || operand2;
3006 			break;
3007 		case 'i':
3008 			if (operand2 < *res)
3009 				*res = operand2;
3010 			break;
3011 		case 'a':
3012 			if (operand2 > *res)
3013 				*res = operand2;
3014 			break;
3015 		default:
3016 			abort();
3017 		}
3018 	}
3019 	return 1;
3020 }
3021 
3022 /* --- register management ------------------------------------------------ */
3023 
3024 void
3025 roff_setreg(struct roff *r, const char *name, int val, char sign)
3026 {
3027 	roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3028 }
3029 
3030 static void
3031 roff_setregn(struct roff *r, const char *name, size_t len,
3032     int val, char sign, int step)
3033 {
3034 	struct roffreg	*reg;
3035 
3036 	/* Search for an existing register with the same name. */
3037 	reg = r->regtab;
3038 
3039 	while (reg != NULL && (reg->key.sz != len ||
3040 	    strncmp(reg->key.p, name, len) != 0))
3041 		reg = reg->next;
3042 
3043 	if (NULL == reg) {
3044 		/* Create a new register. */
3045 		reg = mandoc_malloc(sizeof(struct roffreg));
3046 		reg->key.p = mandoc_strndup(name, len);
3047 		reg->key.sz = len;
3048 		reg->val = 0;
3049 		reg->step = 0;
3050 		reg->next = r->regtab;
3051 		r->regtab = reg;
3052 	}
3053 
3054 	if ('+' == sign)
3055 		reg->val += val;
3056 	else if ('-' == sign)
3057 		reg->val -= val;
3058 	else
3059 		reg->val = val;
3060 	if (step != INT_MIN)
3061 		reg->step = step;
3062 }
3063 
3064 /*
3065  * Handle some predefined read-only number registers.
3066  * For now, return -1 if the requested register is not predefined;
3067  * in case a predefined read-only register having the value -1
3068  * were to turn up, another special value would have to be chosen.
3069  */
3070 static int
3071 roff_getregro(const struct roff *r, const char *name)
3072 {
3073 
3074 	switch (*name) {
3075 	case '$':  /* Number of arguments of the last macro evaluated. */
3076 		return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3077 	case 'A':  /* ASCII approximation mode is always off. */
3078 		return 0;
3079 	case 'g':  /* Groff compatibility mode is always on. */
3080 		return 1;
3081 	case 'H':  /* Fixed horizontal resolution. */
3082 		return 24;
3083 	case 'j':  /* Always adjust left margin only. */
3084 		return 0;
3085 	case 'T':  /* Some output device is always defined. */
3086 		return 1;
3087 	case 'V':  /* Fixed vertical resolution. */
3088 		return 40;
3089 	default:
3090 		return -1;
3091 	}
3092 }
3093 
3094 int
3095 roff_getreg(struct roff *r, const char *name)
3096 {
3097 	return roff_getregn(r, name, strlen(name), '\0');
3098 }
3099 
3100 static int
3101 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3102 {
3103 	struct roffreg	*reg;
3104 	int		 val;
3105 
3106 	if ('.' == name[0] && 2 == len) {
3107 		val = roff_getregro(r, name + 1);
3108 		if (-1 != val)
3109 			return val;
3110 	}
3111 
3112 	for (reg = r->regtab; reg; reg = reg->next) {
3113 		if (len == reg->key.sz &&
3114 		    0 == strncmp(name, reg->key.p, len)) {
3115 			switch (sign) {
3116 			case '+':
3117 				reg->val += reg->step;
3118 				break;
3119 			case '-':
3120 				reg->val -= reg->step;
3121 				break;
3122 			default:
3123 				break;
3124 			}
3125 			return reg->val;
3126 		}
3127 	}
3128 
3129 	roff_setregn(r, name, len, 0, '\0', INT_MIN);
3130 	return 0;
3131 }
3132 
3133 static int
3134 roff_hasregn(const struct roff *r, const char *name, size_t len)
3135 {
3136 	struct roffreg	*reg;
3137 	int		 val;
3138 
3139 	if ('.' == name[0] && 2 == len) {
3140 		val = roff_getregro(r, name + 1);
3141 		if (-1 != val)
3142 			return 1;
3143 	}
3144 
3145 	for (reg = r->regtab; reg; reg = reg->next)
3146 		if (len == reg->key.sz &&
3147 		    0 == strncmp(name, reg->key.p, len))
3148 			return 1;
3149 
3150 	return 0;
3151 }
3152 
3153 static void
3154 roff_freereg(struct roffreg *reg)
3155 {
3156 	struct roffreg	*old_reg;
3157 
3158 	while (NULL != reg) {
3159 		free(reg->key.p);
3160 		old_reg = reg;
3161 		reg = reg->next;
3162 		free(old_reg);
3163 	}
3164 }
3165 
3166 static int
3167 roff_nr(ROFF_ARGS)
3168 {
3169 	char		*key, *val, *step;
3170 	size_t		 keysz;
3171 	int		 iv, is, len;
3172 	char		 sign;
3173 
3174 	key = val = buf->buf + pos;
3175 	if (*key == '\0')
3176 		return ROFF_IGN;
3177 
3178 	keysz = roff_getname(r, &val, ln, pos);
3179 	if (key[keysz] == '\\' || key[keysz] == '\t')
3180 		return ROFF_IGN;
3181 
3182 	sign = *val;
3183 	if (sign == '+' || sign == '-')
3184 		val++;
3185 
3186 	len = 0;
3187 	if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3188 		return ROFF_IGN;
3189 
3190 	step = val + len;
3191 	while (isspace((unsigned char)*step))
3192 		step++;
3193 	if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3194 		is = INT_MIN;
3195 
3196 	roff_setregn(r, key, keysz, iv, sign, is);
3197 	return ROFF_IGN;
3198 }
3199 
3200 static int
3201 roff_rr(ROFF_ARGS)
3202 {
3203 	struct roffreg	*reg, **prev;
3204 	char		*name, *cp;
3205 	size_t		 namesz;
3206 
3207 	name = cp = buf->buf + pos;
3208 	if (*name == '\0')
3209 		return ROFF_IGN;
3210 	namesz = roff_getname(r, &cp, ln, pos);
3211 	name[namesz] = '\0';
3212 
3213 	prev = &r->regtab;
3214 	while (1) {
3215 		reg = *prev;
3216 		if (reg == NULL || !strcmp(name, reg->key.p))
3217 			break;
3218 		prev = &reg->next;
3219 	}
3220 	if (reg != NULL) {
3221 		*prev = reg->next;
3222 		free(reg->key.p);
3223 		free(reg);
3224 	}
3225 	return ROFF_IGN;
3226 }
3227 
3228 /* --- handler functions for roff requests -------------------------------- */
3229 
3230 static int
3231 roff_rm(ROFF_ARGS)
3232 {
3233 	const char	 *name;
3234 	char		 *cp;
3235 	size_t		  namesz;
3236 
3237 	cp = buf->buf + pos;
3238 	while (*cp != '\0') {
3239 		name = cp;
3240 		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3241 		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3242 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3243 		if (name[namesz] == '\\' || name[namesz] == '\t')
3244 			break;
3245 	}
3246 	return ROFF_IGN;
3247 }
3248 
3249 static int
3250 roff_it(ROFF_ARGS)
3251 {
3252 	int		 iv;
3253 
3254 	/* Parse the number of lines. */
3255 
3256 	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3257 		mandoc_msg(MANDOCERR_IT_NONUM,
3258 		    ln, ppos, "%s", buf->buf + 1);
3259 		return ROFF_IGN;
3260 	}
3261 
3262 	while (isspace((unsigned char)buf->buf[pos]))
3263 		pos++;
3264 
3265 	/*
3266 	 * Arm the input line trap.
3267 	 * Special-casing "an-trap" is an ugly workaround to cope
3268 	 * with DocBook stupidly fiddling with man(7) internals.
3269 	 */
3270 
3271 	roffit_lines = iv;
3272 	roffit_macro = mandoc_strdup(iv != 1 ||
3273 	    strcmp(buf->buf + pos, "an-trap") ?
3274 	    buf->buf + pos : "br");
3275 	return ROFF_IGN;
3276 }
3277 
3278 static int
3279 roff_Dd(ROFF_ARGS)
3280 {
3281 	int		 mask;
3282 	enum roff_tok	 t, te;
3283 
3284 	switch (tok) {
3285 	case ROFF_Dd:
3286 		tok = MDOC_Dd;
3287 		te = MDOC_MAX;
3288 		if (r->format == 0)
3289 			r->format = MPARSE_MDOC;
3290 		mask = MPARSE_MDOC | MPARSE_QUICK;
3291 		break;
3292 	case ROFF_TH:
3293 		tok = MAN_TH;
3294 		te = MAN_MAX;
3295 		if (r->format == 0)
3296 			r->format = MPARSE_MAN;
3297 		mask = MPARSE_QUICK;
3298 		break;
3299 	default:
3300 		abort();
3301 	}
3302 	if ((r->options & mask) == 0)
3303 		for (t = tok; t < te; t++)
3304 			roff_setstr(r, roff_name[t], NULL, 0);
3305 	return ROFF_CONT;
3306 }
3307 
3308 static int
3309 roff_TE(ROFF_ARGS)
3310 {
3311 	r->man->flags &= ~ROFF_NONOFILL;
3312 	if (r->tbl == NULL) {
3313 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3314 		return ROFF_IGN;
3315 	}
3316 	if (tbl_end(r->tbl, 0) == 0) {
3317 		r->tbl = NULL;
3318 		free(buf->buf);
3319 		buf->buf = mandoc_strdup(".sp");
3320 		buf->sz = 4;
3321 		*offs = 0;
3322 		return ROFF_REPARSE;
3323 	}
3324 	r->tbl = NULL;
3325 	return ROFF_IGN;
3326 }
3327 
3328 static int
3329 roff_T_(ROFF_ARGS)
3330 {
3331 
3332 	if (NULL == r->tbl)
3333 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3334 	else
3335 		tbl_restart(ln, ppos, r->tbl);
3336 
3337 	return ROFF_IGN;
3338 }
3339 
3340 /*
3341  * Handle in-line equation delimiters.
3342  */
3343 static int
3344 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3345 {
3346 	char		*cp1, *cp2;
3347 	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3348 
3349 	/*
3350 	 * Outside equations, look for an opening delimiter.
3351 	 * If we are inside an equation, we already know it is
3352 	 * in-line, or this function wouldn't have been called;
3353 	 * so look for a closing delimiter.
3354 	 */
3355 
3356 	cp1 = buf->buf + pos;
3357 	cp2 = strchr(cp1, r->eqn == NULL ?
3358 	    r->last_eqn->odelim : r->last_eqn->cdelim);
3359 	if (cp2 == NULL)
3360 		return ROFF_CONT;
3361 
3362 	*cp2++ = '\0';
3363 	bef_pr = bef_nl = aft_nl = aft_pr = "";
3364 
3365 	/* Handle preceding text, protecting whitespace. */
3366 
3367 	if (*buf->buf != '\0') {
3368 		if (r->eqn == NULL)
3369 			bef_pr = "\\&";
3370 		bef_nl = "\n";
3371 	}
3372 
3373 	/*
3374 	 * Prepare replacing the delimiter with an equation macro
3375 	 * and drop leading white space from the equation.
3376 	 */
3377 
3378 	if (r->eqn == NULL) {
3379 		while (*cp2 == ' ')
3380 			cp2++;
3381 		mac = ".EQ";
3382 	} else
3383 		mac = ".EN";
3384 
3385 	/* Handle following text, protecting whitespace. */
3386 
3387 	if (*cp2 != '\0') {
3388 		aft_nl = "\n";
3389 		if (r->eqn != NULL)
3390 			aft_pr = "\\&";
3391 	}
3392 
3393 	/* Do the actual replacement. */
3394 
3395 	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3396 	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3397 	free(buf->buf);
3398 	buf->buf = cp1;
3399 
3400 	/* Toggle the in-line state of the eqn subsystem. */
3401 
3402 	r->eqn_inline = r->eqn == NULL;
3403 	return ROFF_REPARSE;
3404 }
3405 
3406 static int
3407 roff_EQ(ROFF_ARGS)
3408 {
3409 	struct roff_node	*n;
3410 
3411 	if (r->man->meta.macroset == MACROSET_MAN)
3412 		man_breakscope(r->man, ROFF_EQ);
3413 	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3414 	if (ln > r->man->last->line)
3415 		n->flags |= NODE_LINE;
3416 	n->eqn = eqn_box_new();
3417 	roff_node_append(r->man, n);
3418 	r->man->next = ROFF_NEXT_SIBLING;
3419 
3420 	assert(r->eqn == NULL);
3421 	if (r->last_eqn == NULL)
3422 		r->last_eqn = eqn_alloc();
3423 	else
3424 		eqn_reset(r->last_eqn);
3425 	r->eqn = r->last_eqn;
3426 	r->eqn->node = n;
3427 
3428 	if (buf->buf[pos] != '\0')
3429 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3430 		    ".EQ %s", buf->buf + pos);
3431 
3432 	return ROFF_IGN;
3433 }
3434 
3435 static int
3436 roff_EN(ROFF_ARGS)
3437 {
3438 	if (r->eqn != NULL) {
3439 		eqn_parse(r->eqn);
3440 		r->eqn = NULL;
3441 	} else
3442 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3443 	if (buf->buf[pos] != '\0')
3444 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3445 		    "EN %s", buf->buf + pos);
3446 	return ROFF_IGN;
3447 }
3448 
3449 static int
3450 roff_TS(ROFF_ARGS)
3451 {
3452 	if (r->tbl != NULL) {
3453 		mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3454 		tbl_end(r->tbl, 0);
3455 	}
3456 	r->man->flags |= ROFF_NONOFILL;
3457 	r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3458 	if (r->last_tbl == NULL)
3459 		r->first_tbl = r->tbl;
3460 	r->last_tbl = r->tbl;
3461 	return ROFF_IGN;
3462 }
3463 
3464 static int
3465 roff_noarg(ROFF_ARGS)
3466 {
3467 	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3468 		man_breakscope(r->man, tok);
3469 	if (tok == ROFF_brp)
3470 		tok = ROFF_br;
3471 	roff_elem_alloc(r->man, ln, ppos, tok);
3472 	if (buf->buf[pos] != '\0')
3473 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3474 		   "%s %s", roff_name[tok], buf->buf + pos);
3475 	if (tok == ROFF_nf)
3476 		r->man->flags |= ROFF_NOFILL;
3477 	else if (tok == ROFF_fi)
3478 		r->man->flags &= ~ROFF_NOFILL;
3479 	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3480 	r->man->next = ROFF_NEXT_SIBLING;
3481 	return ROFF_IGN;
3482 }
3483 
3484 static int
3485 roff_onearg(ROFF_ARGS)
3486 {
3487 	struct roff_node	*n;
3488 	char			*cp;
3489 	int			 npos;
3490 
3491 	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3492 	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3493 	     tok == ROFF_ti))
3494 		man_breakscope(r->man, tok);
3495 
3496 	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3497 		r->man->last = roffce_node;
3498 		r->man->next = ROFF_NEXT_SIBLING;
3499 	}
3500 
3501 	roff_elem_alloc(r->man, ln, ppos, tok);
3502 	n = r->man->last;
3503 
3504 	cp = buf->buf + pos;
3505 	if (*cp != '\0') {
3506 		while (*cp != '\0' && *cp != ' ')
3507 			cp++;
3508 		while (*cp == ' ')
3509 			*cp++ = '\0';
3510 		if (*cp != '\0')
3511 			mandoc_msg(MANDOCERR_ARG_EXCESS,
3512 			    ln, (int)(cp - buf->buf),
3513 			    "%s ... %s", roff_name[tok], cp);
3514 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3515 	}
3516 
3517 	if (tok == ROFF_ce || tok == ROFF_rj) {
3518 		if (r->man->last->type == ROFFT_ELEM) {
3519 			roff_word_alloc(r->man, ln, pos, "1");
3520 			r->man->last->flags |= NODE_NOSRC;
3521 		}
3522 		npos = 0;
3523 		if (roff_evalnum(r, ln, r->man->last->string, &npos,
3524 		    &roffce_lines, 0) == 0) {
3525 			mandoc_msg(MANDOCERR_CE_NONUM,
3526 			    ln, pos, "ce %s", buf->buf + pos);
3527 			roffce_lines = 1;
3528 		}
3529 		if (roffce_lines < 1) {
3530 			r->man->last = r->man->last->parent;
3531 			roffce_node = NULL;
3532 			roffce_lines = 0;
3533 		} else
3534 			roffce_node = r->man->last->parent;
3535 	} else {
3536 		n->flags |= NODE_VALID | NODE_ENDED;
3537 		r->man->last = n;
3538 	}
3539 	n->flags |= NODE_LINE;
3540 	r->man->next = ROFF_NEXT_SIBLING;
3541 	return ROFF_IGN;
3542 }
3543 
3544 static int
3545 roff_manyarg(ROFF_ARGS)
3546 {
3547 	struct roff_node	*n;
3548 	char			*sp, *ep;
3549 
3550 	roff_elem_alloc(r->man, ln, ppos, tok);
3551 	n = r->man->last;
3552 
3553 	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3554 		while (*ep != '\0' && *ep != ' ')
3555 			ep++;
3556 		while (*ep == ' ')
3557 			*ep++ = '\0';
3558 		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3559 	}
3560 
3561 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3562 	r->man->last = n;
3563 	r->man->next = ROFF_NEXT_SIBLING;
3564 	return ROFF_IGN;
3565 }
3566 
3567 static int
3568 roff_als(ROFF_ARGS)
3569 {
3570 	char		*oldn, *newn, *end, *value;
3571 	size_t		 oldsz, newsz, valsz;
3572 
3573 	newn = oldn = buf->buf + pos;
3574 	if (*newn == '\0')
3575 		return ROFF_IGN;
3576 
3577 	newsz = roff_getname(r, &oldn, ln, pos);
3578 	if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3579 		return ROFF_IGN;
3580 
3581 	end = oldn;
3582 	oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3583 	if (oldsz == 0)
3584 		return ROFF_IGN;
3585 
3586 	valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3587 	    (int)oldsz, oldn);
3588 	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3589 	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3590 	free(value);
3591 	return ROFF_IGN;
3592 }
3593 
3594 /*
3595  * The .break request only makes sense inside conditionals,
3596  * and that case is already handled in roff_cond_sub().
3597  */
3598 static int
3599 roff_break(ROFF_ARGS)
3600 {
3601 	mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3602 	return ROFF_IGN;
3603 }
3604 
3605 static int
3606 roff_cc(ROFF_ARGS)
3607 {
3608 	const char	*p;
3609 
3610 	p = buf->buf + pos;
3611 
3612 	if (*p == '\0' || (r->control = *p++) == '.')
3613 		r->control = '\0';
3614 
3615 	if (*p != '\0')
3616 		mandoc_msg(MANDOCERR_ARG_EXCESS,
3617 		    ln, p - buf->buf, "cc ... %s", p);
3618 
3619 	return ROFF_IGN;
3620 }
3621 
3622 static int
3623 roff_char(ROFF_ARGS)
3624 {
3625 	const char	*p, *kp, *vp;
3626 	size_t		 ksz, vsz;
3627 	int		 font;
3628 
3629 	/* Parse the character to be replaced. */
3630 
3631 	kp = buf->buf + pos;
3632 	p = kp + 1;
3633 	if (*kp == '\0' || (*kp == '\\' &&
3634 	     mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3635 	    (*p != ' ' && *p != '\0')) {
3636 		mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3637 		return ROFF_IGN;
3638 	}
3639 	ksz = p - kp;
3640 	while (*p == ' ')
3641 		p++;
3642 
3643 	/*
3644 	 * If the replacement string contains a font escape sequence,
3645 	 * we have to restore the font at the end.
3646 	 */
3647 
3648 	vp = p;
3649 	vsz = strlen(p);
3650 	font = 0;
3651 	while (*p != '\0') {
3652 		if (*p++ != '\\')
3653 			continue;
3654 		switch (mandoc_escape(&p, NULL, NULL)) {
3655 		case ESCAPE_FONT:
3656 		case ESCAPE_FONTROMAN:
3657 		case ESCAPE_FONTITALIC:
3658 		case ESCAPE_FONTBOLD:
3659 		case ESCAPE_FONTBI:
3660 		case ESCAPE_FONTCW:
3661 		case ESCAPE_FONTPREV:
3662 			font++;
3663 			break;
3664 		default:
3665 			break;
3666 		}
3667 	}
3668 	if (font > 1)
3669 		mandoc_msg(MANDOCERR_CHAR_FONT,
3670 		    ln, (int)(vp - buf->buf), "%s", vp);
3671 
3672 	/*
3673 	 * Approximate the effect of .char using the .tr tables.
3674 	 * XXX In groff, .char and .tr interact differently.
3675 	 */
3676 
3677 	if (ksz == 1) {
3678 		if (r->xtab == NULL)
3679 			r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3680 		assert((unsigned int)*kp < 128);
3681 		free(r->xtab[(int)*kp].p);
3682 		r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3683 		    "%s%s", vp, font ? "\fP" : "");
3684 	} else {
3685 		roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3686 		if (font)
3687 			roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3688 	}
3689 	return ROFF_IGN;
3690 }
3691 
3692 static int
3693 roff_ec(ROFF_ARGS)
3694 {
3695 	const char	*p;
3696 
3697 	p = buf->buf + pos;
3698 	if (*p == '\0')
3699 		r->escape = '\\';
3700 	else {
3701 		r->escape = *p;
3702 		if (*++p != '\0')
3703 			mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3704 			    (int)(p - buf->buf), "ec ... %s", p);
3705 	}
3706 	return ROFF_IGN;
3707 }
3708 
3709 static int
3710 roff_eo(ROFF_ARGS)
3711 {
3712 	r->escape = '\0';
3713 	if (buf->buf[pos] != '\0')
3714 		mandoc_msg(MANDOCERR_ARG_SKIP,
3715 		    ln, pos, "eo %s", buf->buf + pos);
3716 	return ROFF_IGN;
3717 }
3718 
3719 static int
3720 roff_nop(ROFF_ARGS)
3721 {
3722 	while (buf->buf[pos] == ' ')
3723 		pos++;
3724 	*offs = pos;
3725 	return ROFF_RERUN;
3726 }
3727 
3728 static int
3729 roff_tr(ROFF_ARGS)
3730 {
3731 	const char	*p, *first, *second;
3732 	size_t		 fsz, ssz;
3733 	enum mandoc_esc	 esc;
3734 
3735 	p = buf->buf + pos;
3736 
3737 	if (*p == '\0') {
3738 		mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3739 		return ROFF_IGN;
3740 	}
3741 
3742 	while (*p != '\0') {
3743 		fsz = ssz = 1;
3744 
3745 		first = p++;
3746 		if (*first == '\\') {
3747 			esc = mandoc_escape(&p, NULL, NULL);
3748 			if (esc == ESCAPE_ERROR) {
3749 				mandoc_msg(MANDOCERR_ESC_BAD, ln,
3750 				    (int)(p - buf->buf), "%s", first);
3751 				return ROFF_IGN;
3752 			}
3753 			fsz = (size_t)(p - first);
3754 		}
3755 
3756 		second = p++;
3757 		if (*second == '\\') {
3758 			esc = mandoc_escape(&p, NULL, NULL);
3759 			if (esc == ESCAPE_ERROR) {
3760 				mandoc_msg(MANDOCERR_ESC_BAD, ln,
3761 				    (int)(p - buf->buf), "%s", second);
3762 				return ROFF_IGN;
3763 			}
3764 			ssz = (size_t)(p - second);
3765 		} else if (*second == '\0') {
3766 			mandoc_msg(MANDOCERR_TR_ODD, ln,
3767 			    (int)(first - buf->buf), "tr %s", first);
3768 			second = " ";
3769 			p--;
3770 		}
3771 
3772 		if (fsz > 1) {
3773 			roff_setstrn(&r->xmbtab, first, fsz,
3774 			    second, ssz, 0);
3775 			continue;
3776 		}
3777 
3778 		if (r->xtab == NULL)
3779 			r->xtab = mandoc_calloc(128,
3780 			    sizeof(struct roffstr));
3781 
3782 		free(r->xtab[(int)*first].p);
3783 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3784 		r->xtab[(int)*first].sz = ssz;
3785 	}
3786 
3787 	return ROFF_IGN;
3788 }
3789 
3790 /*
3791  * Implementation of the .return request.
3792  * There is no need to call roff_userret() from here.
3793  * The read module will call that after rewinding the reader stack
3794  * to the place from where the current macro was called.
3795  */
3796 static int
3797 roff_return(ROFF_ARGS)
3798 {
3799 	if (r->mstackpos >= 0)
3800 		return ROFF_IGN | ROFF_USERRET;
3801 
3802 	mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3803 	return ROFF_IGN;
3804 }
3805 
3806 static int
3807 roff_rn(ROFF_ARGS)
3808 {
3809 	const char	*value;
3810 	char		*oldn, *newn, *end;
3811 	size_t		 oldsz, newsz;
3812 	int		 deftype;
3813 
3814 	oldn = newn = buf->buf + pos;
3815 	if (*oldn == '\0')
3816 		return ROFF_IGN;
3817 
3818 	oldsz = roff_getname(r, &newn, ln, pos);
3819 	if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3820 		return ROFF_IGN;
3821 
3822 	end = newn;
3823 	newsz = roff_getname(r, &end, ln, newn - buf->buf);
3824 	if (newsz == 0)
3825 		return ROFF_IGN;
3826 
3827 	deftype = ROFFDEF_ANY;
3828 	value = roff_getstrn(r, oldn, oldsz, &deftype);
3829 	switch (deftype) {
3830 	case ROFFDEF_USER:
3831 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3832 		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3833 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3834 		break;
3835 	case ROFFDEF_PRE:
3836 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3837 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3838 		break;
3839 	case ROFFDEF_REN:
3840 		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3841 		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3842 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3843 		break;
3844 	case ROFFDEF_STD:
3845 		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3846 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3847 		break;
3848 	default:
3849 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3850 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3851 		break;
3852 	}
3853 	return ROFF_IGN;
3854 }
3855 
3856 static int
3857 roff_shift(ROFF_ARGS)
3858 {
3859 	struct mctx	*ctx;
3860 	int		 levels, i;
3861 
3862 	levels = 1;
3863 	if (buf->buf[pos] != '\0' &&
3864 	    roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3865 		mandoc_msg(MANDOCERR_CE_NONUM,
3866 		    ln, pos, "shift %s", buf->buf + pos);
3867 		levels = 1;
3868 	}
3869 	if (r->mstackpos < 0) {
3870 		mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3871 		return ROFF_IGN;
3872 	}
3873 	ctx = r->mstack + r->mstackpos;
3874 	if (levels > ctx->argc) {
3875 		mandoc_msg(MANDOCERR_SHIFT,
3876 		    ln, pos, "%d, but max is %d", levels, ctx->argc);
3877 		levels = ctx->argc;
3878 	}
3879 	if (levels == 0)
3880 		return ROFF_IGN;
3881 	for (i = 0; i < levels; i++)
3882 		free(ctx->argv[i]);
3883 	ctx->argc -= levels;
3884 	for (i = 0; i < ctx->argc; i++)
3885 		ctx->argv[i] = ctx->argv[i + levels];
3886 	return ROFF_IGN;
3887 }
3888 
3889 static int
3890 roff_so(ROFF_ARGS)
3891 {
3892 	char *name, *cp;
3893 
3894 	name = buf->buf + pos;
3895 	mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3896 
3897 	/*
3898 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3899 	 * opening anything that's not in our cwd or anything beneath
3900 	 * it.  Thus, explicitly disallow traversing up the file-system
3901 	 * or using absolute paths.
3902 	 */
3903 
3904 	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3905 		mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3906 		buf->sz = mandoc_asprintf(&cp,
3907 		    ".sp\nSee the file %s.\n.sp", name) + 1;
3908 		free(buf->buf);
3909 		buf->buf = cp;
3910 		*offs = 0;
3911 		return ROFF_REPARSE;
3912 	}
3913 
3914 	*offs = pos;
3915 	return ROFF_SO;
3916 }
3917 
3918 /* --- user defined strings and macros ------------------------------------ */
3919 
3920 static int
3921 roff_userdef(ROFF_ARGS)
3922 {
3923 	struct mctx	 *ctx;
3924 	char		 *arg, *ap, *dst, *src;
3925 	size_t		  sz;
3926 
3927 	/* If the macro is empty, ignore it altogether. */
3928 
3929 	if (*r->current_string == '\0')
3930 		return ROFF_IGN;
3931 
3932 	/* Initialize a new macro stack context. */
3933 
3934 	if (++r->mstackpos == r->mstacksz) {
3935 		r->mstack = mandoc_recallocarray(r->mstack,
3936 		    r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3937 		r->mstacksz += 8;
3938 	}
3939 	ctx = r->mstack + r->mstackpos;
3940 	ctx->argsz = 0;
3941 	ctx->argc = 0;
3942 	ctx->argv = NULL;
3943 
3944 	/*
3945 	 * Collect pointers to macro argument strings,
3946 	 * NUL-terminating them and escaping quotes.
3947 	 */
3948 
3949 	src = buf->buf + pos;
3950 	while (*src != '\0') {
3951 		if (ctx->argc == ctx->argsz) {
3952 			ctx->argsz += 8;
3953 			ctx->argv = mandoc_reallocarray(ctx->argv,
3954 			    ctx->argsz, sizeof(*ctx->argv));
3955 		}
3956 		arg = roff_getarg(r, &src, ln, &pos);
3957 		sz = 1;  /* For the terminating NUL. */
3958 		for (ap = arg; *ap != '\0'; ap++)
3959 			sz += *ap == '"' ? 4 : 1;
3960 		ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3961 		for (ap = arg; *ap != '\0'; ap++) {
3962 			if (*ap == '"') {
3963 				memcpy(dst, "\\(dq", 4);
3964 				dst += 4;
3965 			} else
3966 				*dst++ = *ap;
3967 		}
3968 		*dst = '\0';
3969 		free(arg);
3970 	}
3971 
3972 	/* Replace the macro invocation by the macro definition. */
3973 
3974 	free(buf->buf);
3975 	buf->buf = mandoc_strdup(r->current_string);
3976 	buf->sz = strlen(buf->buf) + 1;
3977 	*offs = 0;
3978 
3979 	return buf->buf[buf->sz - 2] == '\n' ?
3980 	    ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3981 }
3982 
3983 /*
3984  * Calling a high-level macro that was renamed with .rn.
3985  * r->current_string has already been set up by roff_parse().
3986  */
3987 static int
3988 roff_renamed(ROFF_ARGS)
3989 {
3990 	char	*nbuf;
3991 
3992 	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3993 	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3994 	free(buf->buf);
3995 	buf->buf = nbuf;
3996 	*offs = 0;
3997 	return ROFF_CONT;
3998 }
3999 
4000 /*
4001  * Measure the length in bytes of the roff identifier at *cpp
4002  * and advance the pointer to the next word.
4003  */
4004 static size_t
4005 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4006 {
4007 	char	 *name, *cp;
4008 	size_t	  namesz;
4009 
4010 	name = *cpp;
4011 	if (*name == '\0')
4012 		return 0;
4013 
4014 	/* Advance cp to the byte after the end of the name. */
4015 
4016 	for (cp = name; 1; cp++) {
4017 		namesz = cp - name;
4018 		if (*cp == '\0')
4019 			break;
4020 		if (*cp == ' ' || *cp == '\t') {
4021 			cp++;
4022 			break;
4023 		}
4024 		if (*cp != '\\')
4025 			continue;
4026 		if (cp[1] == '{' || cp[1] == '}')
4027 			break;
4028 		if (*++cp == '\\')
4029 			continue;
4030 		mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4031 		    "%.*s", (int)(cp - name + 1), name);
4032 		mandoc_escape((const char **)&cp, NULL, NULL);
4033 		break;
4034 	}
4035 
4036 	/* Read past spaces. */
4037 
4038 	while (*cp == ' ')
4039 		cp++;
4040 
4041 	*cpp = cp;
4042 	return namesz;
4043 }
4044 
4045 /*
4046  * Store *string into the user-defined string called *name.
4047  * To clear an existing entry, call with (*r, *name, NULL, 0).
4048  * append == 0: replace mode
4049  * append == 1: single-line append mode
4050  * append == 2: multiline append mode, append '\n' after each call
4051  */
4052 static void
4053 roff_setstr(struct roff *r, const char *name, const char *string,
4054 	int append)
4055 {
4056 	size_t	 namesz;
4057 
4058 	namesz = strlen(name);
4059 	roff_setstrn(&r->strtab, name, namesz, string,
4060 	    string ? strlen(string) : 0, append);
4061 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4062 }
4063 
4064 static void
4065 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4066 		const char *string, size_t stringsz, int append)
4067 {
4068 	struct roffkv	*n;
4069 	char		*c;
4070 	int		 i;
4071 	size_t		 oldch, newch;
4072 
4073 	/* Search for an existing string with the same name. */
4074 	n = *r;
4075 
4076 	while (n && (namesz != n->key.sz ||
4077 			strncmp(n->key.p, name, namesz)))
4078 		n = n->next;
4079 
4080 	if (NULL == n) {
4081 		/* Create a new string table entry. */
4082 		n = mandoc_malloc(sizeof(struct roffkv));
4083 		n->key.p = mandoc_strndup(name, namesz);
4084 		n->key.sz = namesz;
4085 		n->val.p = NULL;
4086 		n->val.sz = 0;
4087 		n->next = *r;
4088 		*r = n;
4089 	} else if (0 == append) {
4090 		free(n->val.p);
4091 		n->val.p = NULL;
4092 		n->val.sz = 0;
4093 	}
4094 
4095 	if (NULL == string)
4096 		return;
4097 
4098 	/*
4099 	 * One additional byte for the '\n' in multiline mode,
4100 	 * and one for the terminating '\0'.
4101 	 */
4102 	newch = stringsz + (1 < append ? 2u : 1u);
4103 
4104 	if (NULL == n->val.p) {
4105 		n->val.p = mandoc_malloc(newch);
4106 		*n->val.p = '\0';
4107 		oldch = 0;
4108 	} else {
4109 		oldch = n->val.sz;
4110 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4111 	}
4112 
4113 	/* Skip existing content in the destination buffer. */
4114 	c = n->val.p + (int)oldch;
4115 
4116 	/* Append new content to the destination buffer. */
4117 	i = 0;
4118 	while (i < (int)stringsz) {
4119 		/*
4120 		 * Rudimentary roff copy mode:
4121 		 * Handle escaped backslashes.
4122 		 */
4123 		if ('\\' == string[i] && '\\' == string[i + 1])
4124 			i++;
4125 		*c++ = string[i++];
4126 	}
4127 
4128 	/* Append terminating bytes. */
4129 	if (1 < append)
4130 		*c++ = '\n';
4131 
4132 	*c = '\0';
4133 	n->val.sz = (int)(c - n->val.p);
4134 }
4135 
4136 static const char *
4137 roff_getstrn(struct roff *r, const char *name, size_t len,
4138     int *deftype)
4139 {
4140 	const struct roffkv	*n;
4141 	int			 found, i;
4142 	enum roff_tok		 tok;
4143 
4144 	found = 0;
4145 	for (n = r->strtab; n != NULL; n = n->next) {
4146 		if (strncmp(name, n->key.p, len) != 0 ||
4147 		    n->key.p[len] != '\0' || n->val.p == NULL)
4148 			continue;
4149 		if (*deftype & ROFFDEF_USER) {
4150 			*deftype = ROFFDEF_USER;
4151 			return n->val.p;
4152 		} else {
4153 			found = 1;
4154 			break;
4155 		}
4156 	}
4157 	for (n = r->rentab; n != NULL; n = n->next) {
4158 		if (strncmp(name, n->key.p, len) != 0 ||
4159 		    n->key.p[len] != '\0' || n->val.p == NULL)
4160 			continue;
4161 		if (*deftype & ROFFDEF_REN) {
4162 			*deftype = ROFFDEF_REN;
4163 			return n->val.p;
4164 		} else {
4165 			found = 1;
4166 			break;
4167 		}
4168 	}
4169 	for (i = 0; i < PREDEFS_MAX; i++) {
4170 		if (strncmp(name, predefs[i].name, len) != 0 ||
4171 		    predefs[i].name[len] != '\0')
4172 			continue;
4173 		if (*deftype & ROFFDEF_PRE) {
4174 			*deftype = ROFFDEF_PRE;
4175 			return predefs[i].str;
4176 		} else {
4177 			found = 1;
4178 			break;
4179 		}
4180 	}
4181 	if (r->man->meta.macroset != MACROSET_MAN) {
4182 		for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4183 			if (strncmp(name, roff_name[tok], len) != 0 ||
4184 			    roff_name[tok][len] != '\0')
4185 				continue;
4186 			if (*deftype & ROFFDEF_STD) {
4187 				*deftype = ROFFDEF_STD;
4188 				return NULL;
4189 			} else {
4190 				found = 1;
4191 				break;
4192 			}
4193 		}
4194 	}
4195 	if (r->man->meta.macroset != MACROSET_MDOC) {
4196 		for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4197 			if (strncmp(name, roff_name[tok], len) != 0 ||
4198 			    roff_name[tok][len] != '\0')
4199 				continue;
4200 			if (*deftype & ROFFDEF_STD) {
4201 				*deftype = ROFFDEF_STD;
4202 				return NULL;
4203 			} else {
4204 				found = 1;
4205 				break;
4206 			}
4207 		}
4208 	}
4209 
4210 	if (found == 0 && *deftype != ROFFDEF_ANY) {
4211 		if (*deftype & ROFFDEF_REN) {
4212 			/*
4213 			 * This might still be a request,
4214 			 * so do not treat it as undefined yet.
4215 			 */
4216 			*deftype = ROFFDEF_UNDEF;
4217 			return NULL;
4218 		}
4219 
4220 		/* Using an undefined string defines it to be empty. */
4221 
4222 		roff_setstrn(&r->strtab, name, len, "", 0, 0);
4223 		roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4224 	}
4225 
4226 	*deftype = 0;
4227 	return NULL;
4228 }
4229 
4230 static void
4231 roff_freestr(struct roffkv *r)
4232 {
4233 	struct roffkv	 *n, *nn;
4234 
4235 	for (n = r; n; n = nn) {
4236 		free(n->key.p);
4237 		free(n->val.p);
4238 		nn = n->next;
4239 		free(n);
4240 	}
4241 }
4242 
4243 /* --- accessors and utility functions ------------------------------------ */
4244 
4245 /*
4246  * Duplicate an input string, making the appropriate character
4247  * conversations (as stipulated by `tr') along the way.
4248  * Returns a heap-allocated string with all the replacements made.
4249  */
4250 char *
4251 roff_strdup(const struct roff *r, const char *p)
4252 {
4253 	const struct roffkv *cp;
4254 	char		*res;
4255 	const char	*pp;
4256 	size_t		 ssz, sz;
4257 	enum mandoc_esc	 esc;
4258 
4259 	if (NULL == r->xmbtab && NULL == r->xtab)
4260 		return mandoc_strdup(p);
4261 	else if ('\0' == *p)
4262 		return mandoc_strdup("");
4263 
4264 	/*
4265 	 * Step through each character looking for term matches
4266 	 * (remember that a `tr' can be invoked with an escape, which is
4267 	 * a glyph but the escape is multi-character).
4268 	 * We only do this if the character hash has been initialised
4269 	 * and the string is >0 length.
4270 	 */
4271 
4272 	res = NULL;
4273 	ssz = 0;
4274 
4275 	while ('\0' != *p) {
4276 		assert((unsigned int)*p < 128);
4277 		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4278 			sz = r->xtab[(int)*p].sz;
4279 			res = mandoc_realloc(res, ssz + sz + 1);
4280 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4281 			ssz += sz;
4282 			p++;
4283 			continue;
4284 		} else if ('\\' != *p) {
4285 			res = mandoc_realloc(res, ssz + 2);
4286 			res[ssz++] = *p++;
4287 			continue;
4288 		}
4289 
4290 		/* Search for term matches. */
4291 		for (cp = r->xmbtab; cp; cp = cp->next)
4292 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
4293 				break;
4294 
4295 		if (NULL != cp) {
4296 			/*
4297 			 * A match has been found.
4298 			 * Append the match to the array and move
4299 			 * forward by its keysize.
4300 			 */
4301 			res = mandoc_realloc(res,
4302 			    ssz + cp->val.sz + 1);
4303 			memcpy(res + ssz, cp->val.p, cp->val.sz);
4304 			ssz += cp->val.sz;
4305 			p += (int)cp->key.sz;
4306 			continue;
4307 		}
4308 
4309 		/*
4310 		 * Handle escapes carefully: we need to copy
4311 		 * over just the escape itself, or else we might
4312 		 * do replacements within the escape itself.
4313 		 * Make sure to pass along the bogus string.
4314 		 */
4315 		pp = p++;
4316 		esc = mandoc_escape(&p, NULL, NULL);
4317 		if (ESCAPE_ERROR == esc) {
4318 			sz = strlen(pp);
4319 			res = mandoc_realloc(res, ssz + sz + 1);
4320 			memcpy(res + ssz, pp, sz);
4321 			break;
4322 		}
4323 		/*
4324 		 * We bail out on bad escapes.
4325 		 * No need to warn: we already did so when
4326 		 * roff_expand() was called.
4327 		 */
4328 		sz = (int)(p - pp);
4329 		res = mandoc_realloc(res, ssz + sz + 1);
4330 		memcpy(res + ssz, pp, sz);
4331 		ssz += sz;
4332 	}
4333 
4334 	res[(int)ssz] = '\0';
4335 	return res;
4336 }
4337 
4338 int
4339 roff_getformat(const struct roff *r)
4340 {
4341 
4342 	return r->format;
4343 }
4344 
4345 /*
4346  * Find out whether a line is a macro line or not.
4347  * If it is, adjust the current position and return one; if it isn't,
4348  * return zero and don't change the current position.
4349  * If the control character has been set with `.cc', then let that grain
4350  * precedence.
4351  * This is slighly contrary to groff, where using the non-breaking
4352  * control character when `cc' has been invoked will cause the
4353  * non-breaking macro contents to be printed verbatim.
4354  */
4355 int
4356 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4357 {
4358 	int		pos;
4359 
4360 	pos = *ppos;
4361 
4362 	if (r->control != '\0' && cp[pos] == r->control)
4363 		pos++;
4364 	else if (r->control != '\0')
4365 		return 0;
4366 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4367 		pos += 2;
4368 	else if ('.' == cp[pos] || '\'' == cp[pos])
4369 		pos++;
4370 	else
4371 		return 0;
4372 
4373 	while (' ' == cp[pos] || '\t' == cp[pos])
4374 		pos++;
4375 
4376 	*ppos = pos;
4377 	return 1;
4378 }
4379