xref: /openbsd-src/usr.bin/mandoc/roff.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: roff.c,v 1.243 2020/02/27 21:38:27 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <ctype.h>
22 #include <limits.h>
23 #include <stddef.h>
24 #include <stdint.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 
29 #include "mandoc_aux.h"
30 #include "mandoc_ohash.h"
31 #include "mandoc.h"
32 #include "roff.h"
33 #include "mandoc_parse.h"
34 #include "libmandoc.h"
35 #include "roff_int.h"
36 #include "tbl_parse.h"
37 #include "eqn_parse.h"
38 
39 /*
40  * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
41  * that an escape sequence resulted from copy-in processing and
42  * needs to be checked or interpolated.  As it is used nowhere
43  * else, it is defined here rather than in a header file.
44  */
45 #define	ASCII_ESC	27
46 
47 /* Maximum number of string expansions per line, to break infinite loops. */
48 #define	EXPAND_LIMIT	1000
49 
50 /* Types of definitions of macros and strings. */
51 #define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
52 #define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
53 #define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
54 #define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
55 #define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
56 			 ROFFDEF_REN | ROFFDEF_STD)
57 #define	ROFFDEF_UNDEF	(1 << 5)  /* Completely undefined. */
58 
59 /* --- data types --------------------------------------------------------- */
60 
61 /*
62  * An incredibly-simple string buffer.
63  */
64 struct	roffstr {
65 	char		*p; /* nil-terminated buffer */
66 	size_t		 sz; /* saved strlen(p) */
67 };
68 
69 /*
70  * A key-value roffstr pair as part of a singly-linked list.
71  */
72 struct	roffkv {
73 	struct roffstr	 key;
74 	struct roffstr	 val;
75 	struct roffkv	*next; /* next in list */
76 };
77 
78 /*
79  * A single number register as part of a singly-linked list.
80  */
81 struct	roffreg {
82 	struct roffstr	 key;
83 	int		 val;
84 	int		 step;
85 	struct roffreg	*next;
86 };
87 
88 /*
89  * Association of request and macro names with token IDs.
90  */
91 struct	roffreq {
92 	enum roff_tok	 tok;
93 	char		 name[];
94 };
95 
96 /*
97  * A macro processing context.
98  * More than one is needed when macro calls are nested.
99  */
100 struct	mctx {
101 	char		**argv;
102 	int		 argc;
103 	int		 argsz;
104 };
105 
106 struct	roff {
107 	struct roff_man	*man; /* mdoc or man parser */
108 	struct roffnode	*last; /* leaf of stack */
109 	struct mctx	*mstack; /* stack of macro contexts */
110 	int		*rstack; /* stack of inverted `ie' values */
111 	struct ohash	*reqtab; /* request lookup table */
112 	struct roffreg	*regtab; /* number registers */
113 	struct roffkv	*strtab; /* user-defined strings & macros */
114 	struct roffkv	*rentab; /* renamed strings & macros */
115 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
116 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
117 	const char	*current_string; /* value of last called user macro */
118 	struct tbl_node	*first_tbl; /* first table parsed */
119 	struct tbl_node	*last_tbl; /* last table parsed */
120 	struct tbl_node	*tbl; /* current table being parsed */
121 	struct eqn_node	*last_eqn; /* equation parser */
122 	struct eqn_node	*eqn; /* active equation parser */
123 	int		 eqn_inline; /* current equation is inline */
124 	int		 options; /* parse options */
125 	int		 mstacksz; /* current size of mstack */
126 	int		 mstackpos; /* position in mstack */
127 	int		 rstacksz; /* current size limit of rstack */
128 	int		 rstackpos; /* position in rstack */
129 	int		 format; /* current file in mdoc or man format */
130 	char		 control; /* control character */
131 	char		 escape; /* escape character */
132 };
133 
134 /*
135  * A macro definition, condition, or ignored block.
136  */
137 struct	roffnode {
138 	enum roff_tok	 tok; /* type of node */
139 	struct roffnode	*parent; /* up one in stack */
140 	int		 line; /* parse line */
141 	int		 col; /* parse col */
142 	char		*name; /* node name, e.g. macro name */
143 	char		*end; /* custom end macro of the block */
144 	int		 endspan; /* scope to: 1=eol 2=next line -1=\} */
145 	int		 rule; /* content is: 1=evaluated 0=skipped */
146 };
147 
148 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
149 			 enum roff_tok tok, /* tok of macro */ \
150 			 struct buf *buf, /* input buffer */ \
151 			 int ln, /* parse line */ \
152 			 int ppos, /* original pos in buffer */ \
153 			 int pos, /* current pos in buffer */ \
154 			 int *offs /* reset offset of buffer data */
155 
156 typedef	int (*roffproc)(ROFF_ARGS);
157 
158 struct	roffmac {
159 	roffproc	 proc; /* process new macro */
160 	roffproc	 text; /* process as child text of macro */
161 	roffproc	 sub; /* process as child of macro */
162 	int		 flags;
163 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
164 };
165 
166 struct	predef {
167 	const char	*name; /* predefined input name */
168 	const char	*str; /* replacement symbol */
169 };
170 
171 #define	PREDEF(__name, __str) \
172 	{ (__name), (__str) },
173 
174 /* --- function prototypes ------------------------------------------------ */
175 
176 static	int		 roffnode_cleanscope(struct roff *);
177 static	int		 roffnode_pop(struct roff *);
178 static	void		 roffnode_push(struct roff *, enum roff_tok,
179 				const char *, int, int);
180 static	void		 roff_addtbl(struct roff_man *, int, struct tbl_node *);
181 static	int		 roff_als(ROFF_ARGS);
182 static	int		 roff_block(ROFF_ARGS);
183 static	int		 roff_block_text(ROFF_ARGS);
184 static	int		 roff_block_sub(ROFF_ARGS);
185 static	int		 roff_break(ROFF_ARGS);
186 static	int		 roff_cblock(ROFF_ARGS);
187 static	int		 roff_cc(ROFF_ARGS);
188 static	int		 roff_ccond(struct roff *, int, int);
189 static	int		 roff_char(ROFF_ARGS);
190 static	int		 roff_cond(ROFF_ARGS);
191 static	int		 roff_cond_text(ROFF_ARGS);
192 static	int		 roff_cond_sub(ROFF_ARGS);
193 static	int		 roff_ds(ROFF_ARGS);
194 static	int		 roff_ec(ROFF_ARGS);
195 static	int		 roff_eo(ROFF_ARGS);
196 static	int		 roff_eqndelim(struct roff *, struct buf *, int);
197 static	int		 roff_evalcond(struct roff *r, int, char *, int *);
198 static	int		 roff_evalnum(struct roff *, int,
199 				const char *, int *, int *, int);
200 static	int		 roff_evalpar(struct roff *, int,
201 				const char *, int *, int *, int);
202 static	int		 roff_evalstrcond(const char *, int *);
203 static	int		 roff_expand(struct roff *, struct buf *,
204 				int, int, char);
205 static	void		 roff_free1(struct roff *);
206 static	void		 roff_freereg(struct roffreg *);
207 static	void		 roff_freestr(struct roffkv *);
208 static	size_t		 roff_getname(struct roff *, char **, int, int);
209 static	int		 roff_getnum(const char *, int *, int *, int);
210 static	int		 roff_getop(const char *, int *, char *);
211 static	int		 roff_getregn(struct roff *,
212 				const char *, size_t, char);
213 static	int		 roff_getregro(const struct roff *,
214 				const char *name);
215 static	const char	*roff_getstrn(struct roff *,
216 				const char *, size_t, int *);
217 static	int		 roff_hasregn(const struct roff *,
218 				const char *, size_t);
219 static	int		 roff_insec(ROFF_ARGS);
220 static	int		 roff_it(ROFF_ARGS);
221 static	int		 roff_line_ignore(ROFF_ARGS);
222 static	void		 roff_man_alloc1(struct roff_man *);
223 static	void		 roff_man_free1(struct roff_man *);
224 static	int		 roff_manyarg(ROFF_ARGS);
225 static	int		 roff_noarg(ROFF_ARGS);
226 static	int		 roff_nop(ROFF_ARGS);
227 static	int		 roff_nr(ROFF_ARGS);
228 static	int		 roff_onearg(ROFF_ARGS);
229 static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
230 				int, int);
231 static	int		 roff_parsetext(struct roff *, struct buf *,
232 				int, int *);
233 static	int		 roff_renamed(ROFF_ARGS);
234 static	int		 roff_return(ROFF_ARGS);
235 static	int		 roff_rm(ROFF_ARGS);
236 static	int		 roff_rn(ROFF_ARGS);
237 static	int		 roff_rr(ROFF_ARGS);
238 static	void		 roff_setregn(struct roff *, const char *,
239 				size_t, int, char, int);
240 static	void		 roff_setstr(struct roff *,
241 				const char *, const char *, int);
242 static	void		 roff_setstrn(struct roffkv **, const char *,
243 				size_t, const char *, size_t, int);
244 static	int		 roff_shift(ROFF_ARGS);
245 static	int		 roff_so(ROFF_ARGS);
246 static	int		 roff_tr(ROFF_ARGS);
247 static	int		 roff_Dd(ROFF_ARGS);
248 static	int		 roff_TE(ROFF_ARGS);
249 static	int		 roff_TS(ROFF_ARGS);
250 static	int		 roff_EQ(ROFF_ARGS);
251 static	int		 roff_EN(ROFF_ARGS);
252 static	int		 roff_T_(ROFF_ARGS);
253 static	int		 roff_unsupp(ROFF_ARGS);
254 static	int		 roff_userdef(ROFF_ARGS);
255 
256 /* --- constant data ------------------------------------------------------ */
257 
258 #define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
259 #define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
260 
261 const char *__roff_name[MAN_MAX + 1] = {
262 	"br",		"ce",		"fi",		"ft",
263 	"ll",		"mc",		"nf",
264 	"po",		"rj",		"sp",
265 	"ta",		"ti",		NULL,
266 	"ab",		"ad",		"af",		"aln",
267 	"als",		"am",		"am1",		"ami",
268 	"ami1",		"as",		"as1",		"asciify",
269 	"backtrace",	"bd",		"bleedat",	"blm",
270         "box",		"boxa",		"bp",		"BP",
271 	"break",	"breakchar",	"brnl",		"brp",
272 	"brpnl",	"c2",		"cc",
273 	"cf",		"cflags",	"ch",		"char",
274 	"chop",		"class",	"close",	"CL",
275 	"color",	"composite",	"continue",	"cp",
276 	"cropat",	"cs",		"cu",		"da",
277 	"dch",		"Dd",		"de",		"de1",
278 	"defcolor",	"dei",		"dei1",		"device",
279 	"devicem",	"di",		"do",		"ds",
280 	"ds1",		"dwh",		"dt",		"ec",
281 	"ecr",		"ecs",		"el",		"em",
282 	"EN",		"eo",		"EP",		"EQ",
283 	"errprint",	"ev",		"evc",		"ex",
284 	"fallback",	"fam",		"fc",		"fchar",
285 	"fcolor",	"fdeferlig",	"feature",	"fkern",
286 	"fl",		"flig",		"fp",		"fps",
287 	"fschar",	"fspacewidth",	"fspecial",	"ftr",
288 	"fzoom",	"gcolor",	"hc",		"hcode",
289 	"hidechar",	"hla",		"hlm",		"hpf",
290 	"hpfa",		"hpfcode",	"hw",		"hy",
291 	"hylang",	"hylen",	"hym",		"hypp",
292 	"hys",		"ie",		"if",		"ig",
293 	"index",	"it",		"itc",		"IX",
294 	"kern",		"kernafter",	"kernbefore",	"kernpair",
295 	"lc",		"lc_ctype",	"lds",		"length",
296 	"letadj",	"lf",		"lg",		"lhang",
297 	"linetabs",	"lnr",		"lnrf",		"lpfx",
298 	"ls",		"lsm",		"lt",
299 	"mediasize",	"minss",	"mk",		"mso",
300 	"na",		"ne",		"nh",		"nhychar",
301 	"nm",		"nn",		"nop",		"nr",
302 	"nrf",		"nroff",	"ns",		"nx",
303 	"open",		"opena",	"os",		"output",
304 	"padj",		"papersize",	"pc",		"pev",
305 	"pi",		"PI",		"pl",		"pm",
306 	"pn",		"pnr",		"ps",
307 	"psbb",		"pshape",	"pso",		"ptr",
308 	"pvs",		"rchar",	"rd",		"recursionlimit",
309 	"return",	"rfschar",	"rhang",
310 	"rm",		"rn",		"rnn",		"rr",
311 	"rs",		"rt",		"schar",	"sentchar",
312 	"shc",		"shift",	"sizes",	"so",
313 	"spacewidth",	"special",	"spreadwarn",	"ss",
314 	"sty",		"substring",	"sv",		"sy",
315 	"T&",		"tc",		"TE",
316 	"TH",		"tkf",		"tl",
317 	"tm",		"tm1",		"tmc",		"tr",
318 	"track",	"transchar",	"trf",		"trimat",
319 	"trin",		"trnt",		"troff",	"TS",
320 	"uf",		"ul",		"unformat",	"unwatch",
321 	"unwatchn",	"vpt",		"vs",		"warn",
322 	"warnscale",	"watch",	"watchlength",	"watchn",
323 	"wh",		"while",	"write",	"writec",
324 	"writem",	"xflag",	".",		NULL,
325 	NULL,		"text",
326 	"Dd",		"Dt",		"Os",		"Sh",
327 	"Ss",		"Pp",		"D1",		"Dl",
328 	"Bd",		"Ed",		"Bl",		"El",
329 	"It",		"Ad",		"An",		"Ap",
330 	"Ar",		"Cd",		"Cm",		"Dv",
331 	"Er",		"Ev",		"Ex",		"Fa",
332 	"Fd",		"Fl",		"Fn",		"Ft",
333 	"Ic",		"In",		"Li",		"Nd",
334 	"Nm",		"Op",		"Ot",		"Pa",
335 	"Rv",		"St",		"Va",		"Vt",
336 	"Xr",		"%A",		"%B",		"%D",
337 	"%I",		"%J",		"%N",		"%O",
338 	"%P",		"%R",		"%T",		"%V",
339 	"Ac",		"Ao",		"Aq",		"At",
340 	"Bc",		"Bf",		"Bo",		"Bq",
341 	"Bsx",		"Bx",		"Db",		"Dc",
342 	"Do",		"Dq",		"Ec",		"Ef",
343 	"Em",		"Eo",		"Fx",		"Ms",
344 	"No",		"Ns",		"Nx",		"Ox",
345 	"Pc",		"Pf",		"Po",		"Pq",
346 	"Qc",		"Ql",		"Qo",		"Qq",
347 	"Re",		"Rs",		"Sc",		"So",
348 	"Sq",		"Sm",		"Sx",		"Sy",
349 	"Tn",		"Ux",		"Xc",		"Xo",
350 	"Fo",		"Fc",		"Oo",		"Oc",
351 	"Bk",		"Ek",		"Bt",		"Hf",
352 	"Fr",		"Ud",		"Lb",		"Lp",
353 	"Lk",		"Mt",		"Brq",		"Bro",
354 	"Brc",		"%C",		"Es",		"En",
355 	"Dx",		"%Q",		"%U",		"Ta",
356 	"Tg",		NULL,
357 	"TH",		"SH",		"SS",		"TP",
358 	"TQ",
359 	"LP",		"PP",		"P",		"IP",
360 	"HP",		"SM",		"SB",		"BI",
361 	"IB",		"BR",		"RB",		"R",
362 	"B",		"I",		"IR",		"RI",
363 	"RE",		"RS",		"DT",		"UC",
364 	"PD",		"AT",		"in",
365 	"SY",		"YS",		"OP",
366 	"EX",		"EE",		"UR",
367 	"UE",		"MT",		"ME",		NULL
368 };
369 const	char *const *roff_name = __roff_name;
370 
371 static	struct roffmac	 roffs[TOKEN_NONE] = {
372 	{ roff_noarg, NULL, NULL, 0 },  /* br */
373 	{ roff_onearg, NULL, NULL, 0 },  /* ce */
374 	{ roff_noarg, NULL, NULL, 0 },  /* fi */
375 	{ roff_onearg, NULL, NULL, 0 },  /* ft */
376 	{ roff_onearg, NULL, NULL, 0 },  /* ll */
377 	{ roff_onearg, NULL, NULL, 0 },  /* mc */
378 	{ roff_noarg, NULL, NULL, 0 },  /* nf */
379 	{ roff_onearg, NULL, NULL, 0 },  /* po */
380 	{ roff_onearg, NULL, NULL, 0 },  /* rj */
381 	{ roff_onearg, NULL, NULL, 0 },  /* sp */
382 	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
383 	{ roff_onearg, NULL, NULL, 0 },  /* ti */
384 	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
385 	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
386 	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
387 	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
388 	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
389 	{ roff_als, NULL, NULL, 0 },  /* als */
390 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
391 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
392 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
393 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
394 	{ roff_ds, NULL, NULL, 0 },  /* as */
395 	{ roff_ds, NULL, NULL, 0 },  /* as1 */
396 	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
397 	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
398 	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
399 	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
400 	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
401 	{ roff_unsupp, NULL, NULL, 0 },  /* box */
402 	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
403 	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
404 	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
405 	{ roff_break, NULL, NULL, 0 },  /* break */
406 	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
407 	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
408 	{ roff_noarg, NULL, NULL, 0 },  /* brp */
409 	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
410 	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
411 	{ roff_cc, NULL, NULL, 0 },  /* cc */
412 	{ roff_insec, NULL, NULL, 0 },  /* cf */
413 	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
414 	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
415 	{ roff_char, NULL, NULL, 0 },  /* char */
416 	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
417 	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
418 	{ roff_insec, NULL, NULL, 0 },  /* close */
419 	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
420 	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
421 	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
422 	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
423 	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
424 	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
425 	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
426 	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
427 	{ roff_unsupp, NULL, NULL, 0 },  /* da */
428 	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
429 	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
430 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
431 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
432 	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
433 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
434 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
435 	{ roff_unsupp, NULL, NULL, 0 },  /* device */
436 	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
437 	{ roff_unsupp, NULL, NULL, 0 },  /* di */
438 	{ roff_unsupp, NULL, NULL, 0 },  /* do */
439 	{ roff_ds, NULL, NULL, 0 },  /* ds */
440 	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
441 	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
442 	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
443 	{ roff_ec, NULL, NULL, 0 },  /* ec */
444 	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
445 	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
446 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
447 	{ roff_unsupp, NULL, NULL, 0 },  /* em */
448 	{ roff_EN, NULL, NULL, 0 },  /* EN */
449 	{ roff_eo, NULL, NULL, 0 },  /* eo */
450 	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
451 	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
452 	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
453 	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
454 	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
455 	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
456 	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
457 	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
458 	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
459 	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
460 	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
461 	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
462 	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
463 	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
464 	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
465 	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
466 	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
467 	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
468 	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
469 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
470 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
471 	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
472 	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
473 	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
474 	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
475 	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
476 	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
477 	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
478 	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
479 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
480 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
481 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
482 	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
483 	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
484 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
485 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
486 	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
487 	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
488 	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
489 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
490 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
491 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
492 	{ roff_unsupp, NULL, NULL, 0 },  /* index */
493 	{ roff_it, NULL, NULL, 0 },  /* it */
494 	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
495 	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
496 	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
497 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
498 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
499 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
500 	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
501 	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
502 	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
503 	{ roff_unsupp, NULL, NULL, 0 },  /* length */
504 	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
505 	{ roff_insec, NULL, NULL, 0 },  /* lf */
506 	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
507 	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
508 	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
509 	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
510 	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
511 	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
512 	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
513 	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
514 	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
515 	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
516 	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
517 	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
518 	{ roff_insec, NULL, NULL, 0 },  /* mso */
519 	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
520 	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
521 	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
522 	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
523 	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
524 	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
525 	{ roff_nop, NULL, NULL, 0 },  /* nop */
526 	{ roff_nr, NULL, NULL, 0 },  /* nr */
527 	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
528 	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
529 	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
530 	{ roff_insec, NULL, NULL, 0 },  /* nx */
531 	{ roff_insec, NULL, NULL, 0 },  /* open */
532 	{ roff_insec, NULL, NULL, 0 },  /* opena */
533 	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
534 	{ roff_unsupp, NULL, NULL, 0 },  /* output */
535 	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
536 	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
537 	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
538 	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
539 	{ roff_insec, NULL, NULL, 0 },  /* pi */
540 	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
541 	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
542 	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
543 	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
544 	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
545 	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
546 	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
547 	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
548 	{ roff_insec, NULL, NULL, 0 },  /* pso */
549 	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
550 	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
551 	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
552 	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
553 	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
554 	{ roff_return, NULL, NULL, 0 },  /* return */
555 	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
556 	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
557 	{ roff_rm, NULL, NULL, 0 },  /* rm */
558 	{ roff_rn, NULL, NULL, 0 },  /* rn */
559 	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
560 	{ roff_rr, NULL, NULL, 0 },  /* rr */
561 	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
562 	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
563 	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
564 	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
565 	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
566 	{ roff_shift, NULL, NULL, 0 },  /* shift */
567 	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
568 	{ roff_so, NULL, NULL, 0 },  /* so */
569 	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
570 	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
571 	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
572 	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
573 	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
574 	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
575 	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
576 	{ roff_insec, NULL, NULL, 0 },  /* sy */
577 	{ roff_T_, NULL, NULL, 0 },  /* T& */
578 	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
579 	{ roff_TE, NULL, NULL, 0 },  /* TE */
580 	{ roff_Dd, NULL, NULL, 0 },  /* TH */
581 	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
582 	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
583 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
584 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
585 	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
586 	{ roff_tr, NULL, NULL, 0 },  /* tr */
587 	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
588 	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
589 	{ roff_insec, NULL, NULL, 0 },  /* trf */
590 	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
591 	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
592 	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
593 	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
594 	{ roff_TS, NULL, NULL, 0 },  /* TS */
595 	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
596 	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
597 	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
598 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
599 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
600 	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
601 	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
602 	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
603 	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
604 	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
605 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
606 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
607 	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
608 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
609 	{ roff_insec, NULL, NULL, 0 },  /* write */
610 	{ roff_insec, NULL, NULL, 0 },  /* writec */
611 	{ roff_insec, NULL, NULL, 0 },  /* writem */
612 	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
613 	{ roff_cblock, NULL, NULL, 0 },  /* . */
614 	{ roff_renamed, NULL, NULL, 0 },
615 	{ roff_userdef, NULL, NULL, 0 }
616 };
617 
618 /* Array of injected predefined strings. */
619 #define	PREDEFS_MAX	 38
620 static	const struct predef predefs[PREDEFS_MAX] = {
621 #include "predefs.in"
622 };
623 
624 static	int	 roffce_lines;	/* number of input lines to center */
625 static	struct roff_node *roffce_node;  /* active request */
626 static	int	 roffit_lines;  /* number of lines to delay */
627 static	char	*roffit_macro;  /* nil-terminated macro line */
628 
629 
630 /* --- request table ------------------------------------------------------ */
631 
632 struct ohash *
633 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
634 {
635 	struct ohash	*htab;
636 	struct roffreq	*req;
637 	enum roff_tok	 tok;
638 	size_t		 sz;
639 	unsigned int	 slot;
640 
641 	htab = mandoc_malloc(sizeof(*htab));
642 	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
643 
644 	for (tok = mintok; tok < maxtok; tok++) {
645 		if (roff_name[tok] == NULL)
646 			continue;
647 		sz = strlen(roff_name[tok]);
648 		req = mandoc_malloc(sizeof(*req) + sz + 1);
649 		req->tok = tok;
650 		memcpy(req->name, roff_name[tok], sz + 1);
651 		slot = ohash_qlookup(htab, req->name);
652 		ohash_insert(htab, slot, req);
653 	}
654 	return htab;
655 }
656 
657 void
658 roffhash_free(struct ohash *htab)
659 {
660 	struct roffreq	*req;
661 	unsigned int	 slot;
662 
663 	if (htab == NULL)
664 		return;
665 	for (req = ohash_first(htab, &slot); req != NULL;
666 	     req = ohash_next(htab, &slot))
667 		free(req);
668 	ohash_delete(htab);
669 	free(htab);
670 }
671 
672 enum roff_tok
673 roffhash_find(struct ohash *htab, const char *name, size_t sz)
674 {
675 	struct roffreq	*req;
676 	const char	*end;
677 
678 	if (sz) {
679 		end = name + sz;
680 		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
681 	} else
682 		req = ohash_find(htab, ohash_qlookup(htab, name));
683 	return req == NULL ? TOKEN_NONE : req->tok;
684 }
685 
686 /* --- stack of request blocks -------------------------------------------- */
687 
688 /*
689  * Pop the current node off of the stack of roff instructions currently
690  * pending.  Return 1 if it is a loop or 0 otherwise.
691  */
692 static int
693 roffnode_pop(struct roff *r)
694 {
695 	struct roffnode	*p;
696 	int		 inloop;
697 
698 	p = r->last;
699 	inloop = p->tok == ROFF_while;
700 	r->last = p->parent;
701 	free(p->name);
702 	free(p->end);
703 	free(p);
704 	return inloop;
705 }
706 
707 /*
708  * Push a roff node onto the instruction stack.  This must later be
709  * removed with roffnode_pop().
710  */
711 static void
712 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
713 		int line, int col)
714 {
715 	struct roffnode	*p;
716 
717 	p = mandoc_calloc(1, sizeof(struct roffnode));
718 	p->tok = tok;
719 	if (name)
720 		p->name = mandoc_strdup(name);
721 	p->parent = r->last;
722 	p->line = line;
723 	p->col = col;
724 	p->rule = p->parent ? p->parent->rule : 0;
725 
726 	r->last = p;
727 }
728 
729 /* --- roff parser state data management ---------------------------------- */
730 
731 static void
732 roff_free1(struct roff *r)
733 {
734 	int		 i;
735 
736 	tbl_free(r->first_tbl);
737 	r->first_tbl = r->last_tbl = r->tbl = NULL;
738 
739 	eqn_free(r->last_eqn);
740 	r->last_eqn = r->eqn = NULL;
741 
742 	while (r->mstackpos >= 0)
743 		roff_userret(r);
744 
745 	while (r->last)
746 		roffnode_pop(r);
747 
748 	free (r->rstack);
749 	r->rstack = NULL;
750 	r->rstacksz = 0;
751 	r->rstackpos = -1;
752 
753 	roff_freereg(r->regtab);
754 	r->regtab = NULL;
755 
756 	roff_freestr(r->strtab);
757 	roff_freestr(r->rentab);
758 	roff_freestr(r->xmbtab);
759 	r->strtab = r->rentab = r->xmbtab = NULL;
760 
761 	if (r->xtab)
762 		for (i = 0; i < 128; i++)
763 			free(r->xtab[i].p);
764 	free(r->xtab);
765 	r->xtab = NULL;
766 }
767 
768 void
769 roff_reset(struct roff *r)
770 {
771 	roff_free1(r);
772 	r->options |= MPARSE_COMMENT;
773 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
774 	r->control = '\0';
775 	r->escape = '\\';
776 	roffce_lines = 0;
777 	roffce_node = NULL;
778 	roffit_lines = 0;
779 	roffit_macro = NULL;
780 }
781 
782 void
783 roff_free(struct roff *r)
784 {
785 	int		 i;
786 
787 	roff_free1(r);
788 	for (i = 0; i < r->mstacksz; i++)
789 		free(r->mstack[i].argv);
790 	free(r->mstack);
791 	roffhash_free(r->reqtab);
792 	free(r);
793 }
794 
795 struct roff *
796 roff_alloc(int options)
797 {
798 	struct roff	*r;
799 
800 	r = mandoc_calloc(1, sizeof(struct roff));
801 	r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
802 	r->options = options | MPARSE_COMMENT;
803 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
804 	r->mstackpos = -1;
805 	r->rstackpos = -1;
806 	r->escape = '\\';
807 	return r;
808 }
809 
810 /* --- syntax tree state data management ---------------------------------- */
811 
812 static void
813 roff_man_free1(struct roff_man *man)
814 {
815 	if (man->meta.first != NULL)
816 		roff_node_delete(man, man->meta.first);
817 	free(man->meta.msec);
818 	free(man->meta.vol);
819 	free(man->meta.os);
820 	free(man->meta.arch);
821 	free(man->meta.title);
822 	free(man->meta.name);
823 	free(man->meta.date);
824 	free(man->meta.sodest);
825 }
826 
827 void
828 roff_state_reset(struct roff_man *man)
829 {
830 	man->last = man->meta.first;
831 	man->last_es = NULL;
832 	man->flags = 0;
833 	man->lastsec = man->lastnamed = SEC_NONE;
834 	man->next = ROFF_NEXT_CHILD;
835 	roff_setreg(man->roff, "nS", 0, '=');
836 }
837 
838 static void
839 roff_man_alloc1(struct roff_man *man)
840 {
841 	memset(&man->meta, 0, sizeof(man->meta));
842 	man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
843 	man->meta.first->type = ROFFT_ROOT;
844 	man->meta.macroset = MACROSET_NONE;
845 	roff_state_reset(man);
846 }
847 
848 void
849 roff_man_reset(struct roff_man *man)
850 {
851 	roff_man_free1(man);
852 	roff_man_alloc1(man);
853 }
854 
855 void
856 roff_man_free(struct roff_man *man)
857 {
858 	roff_man_free1(man);
859 	free(man);
860 }
861 
862 struct roff_man *
863 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
864 {
865 	struct roff_man *man;
866 
867 	man = mandoc_calloc(1, sizeof(*man));
868 	man->roff = roff;
869 	man->os_s = os_s;
870 	man->quick = quick;
871 	roff_man_alloc1(man);
872 	roff->man = man;
873 	return man;
874 }
875 
876 /* --- syntax tree handling ----------------------------------------------- */
877 
878 struct roff_node *
879 roff_node_alloc(struct roff_man *man, int line, int pos,
880 	enum roff_type type, int tok)
881 {
882 	struct roff_node	*n;
883 
884 	n = mandoc_calloc(1, sizeof(*n));
885 	n->line = line;
886 	n->pos = pos;
887 	n->tok = tok;
888 	n->type = type;
889 	n->sec = man->lastsec;
890 
891 	if (man->flags & MDOC_SYNOPSIS)
892 		n->flags |= NODE_SYNPRETTY;
893 	else
894 		n->flags &= ~NODE_SYNPRETTY;
895 	if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
896 		n->flags |= NODE_NOFILL;
897 	else
898 		n->flags &= ~NODE_NOFILL;
899 	if (man->flags & MDOC_NEWLINE)
900 		n->flags |= NODE_LINE;
901 	man->flags &= ~MDOC_NEWLINE;
902 
903 	return n;
904 }
905 
906 void
907 roff_node_append(struct roff_man *man, struct roff_node *n)
908 {
909 
910 	switch (man->next) {
911 	case ROFF_NEXT_SIBLING:
912 		if (man->last->next != NULL) {
913 			n->next = man->last->next;
914 			man->last->next->prev = n;
915 		} else
916 			man->last->parent->last = n;
917 		man->last->next = n;
918 		n->prev = man->last;
919 		n->parent = man->last->parent;
920 		break;
921 	case ROFF_NEXT_CHILD:
922 		if (man->last->child != NULL) {
923 			n->next = man->last->child;
924 			man->last->child->prev = n;
925 		} else
926 			man->last->last = n;
927 		man->last->child = n;
928 		n->parent = man->last;
929 		break;
930 	default:
931 		abort();
932 	}
933 	man->last = n;
934 
935 	switch (n->type) {
936 	case ROFFT_HEAD:
937 		n->parent->head = n;
938 		break;
939 	case ROFFT_BODY:
940 		if (n->end != ENDBODY_NOT)
941 			return;
942 		n->parent->body = n;
943 		break;
944 	case ROFFT_TAIL:
945 		n->parent->tail = n;
946 		break;
947 	default:
948 		return;
949 	}
950 
951 	/*
952 	 * Copy over the normalised-data pointer of our parent.  Not
953 	 * everybody has one, but copying a null pointer is fine.
954 	 */
955 
956 	n->norm = n->parent->norm;
957 	assert(n->parent->type == ROFFT_BLOCK);
958 }
959 
960 void
961 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
962 {
963 	struct roff_node	*n;
964 
965 	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
966 	n->string = roff_strdup(man->roff, word);
967 	roff_node_append(man, n);
968 	n->flags |= NODE_VALID | NODE_ENDED;
969 	man->next = ROFF_NEXT_SIBLING;
970 }
971 
972 void
973 roff_word_append(struct roff_man *man, const char *word)
974 {
975 	struct roff_node	*n;
976 	char			*addstr, *newstr;
977 
978 	n = man->last;
979 	addstr = roff_strdup(man->roff, word);
980 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
981 	free(addstr);
982 	free(n->string);
983 	n->string = newstr;
984 	man->next = ROFF_NEXT_SIBLING;
985 }
986 
987 void
988 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
989 {
990 	struct roff_node	*n;
991 
992 	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
993 	roff_node_append(man, n);
994 	man->next = ROFF_NEXT_CHILD;
995 }
996 
997 struct roff_node *
998 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
999 {
1000 	struct roff_node	*n;
1001 
1002 	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1003 	roff_node_append(man, n);
1004 	man->next = ROFF_NEXT_CHILD;
1005 	return n;
1006 }
1007 
1008 struct roff_node *
1009 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1010 {
1011 	struct roff_node	*n;
1012 
1013 	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1014 	roff_node_append(man, n);
1015 	man->next = ROFF_NEXT_CHILD;
1016 	return n;
1017 }
1018 
1019 struct roff_node *
1020 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1021 {
1022 	struct roff_node	*n;
1023 
1024 	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1025 	roff_node_append(man, n);
1026 	man->next = ROFF_NEXT_CHILD;
1027 	return n;
1028 }
1029 
1030 static void
1031 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1032 {
1033 	struct roff_node	*n;
1034 	struct tbl_span		*span;
1035 
1036 	if (man->meta.macroset == MACROSET_MAN)
1037 		man_breakscope(man, ROFF_TS);
1038 	while ((span = tbl_span(tbl)) != NULL) {
1039 		n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1040 		n->span = span;
1041 		roff_node_append(man, n);
1042 		n->flags |= NODE_VALID | NODE_ENDED;
1043 		man->next = ROFF_NEXT_SIBLING;
1044 	}
1045 }
1046 
1047 void
1048 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1049 {
1050 
1051 	/* Adjust siblings. */
1052 
1053 	if (n->prev)
1054 		n->prev->next = n->next;
1055 	if (n->next)
1056 		n->next->prev = n->prev;
1057 
1058 	/* Adjust parent. */
1059 
1060 	if (n->parent != NULL) {
1061 		if (n->parent->child == n)
1062 			n->parent->child = n->next;
1063 		if (n->parent->last == n)
1064 			n->parent->last = n->prev;
1065 	}
1066 
1067 	/* Adjust parse point. */
1068 
1069 	if (man == NULL)
1070 		return;
1071 	if (man->last == n) {
1072 		if (n->prev == NULL) {
1073 			man->last = n->parent;
1074 			man->next = ROFF_NEXT_CHILD;
1075 		} else {
1076 			man->last = n->prev;
1077 			man->next = ROFF_NEXT_SIBLING;
1078 		}
1079 	}
1080 	if (man->meta.first == n)
1081 		man->meta.first = NULL;
1082 }
1083 
1084 void
1085 roff_node_relink(struct roff_man *man, struct roff_node *n)
1086 {
1087 	roff_node_unlink(man, n);
1088 	n->prev = n->next = NULL;
1089 	roff_node_append(man, n);
1090 }
1091 
1092 void
1093 roff_node_free(struct roff_node *n)
1094 {
1095 
1096 	if (n->args != NULL)
1097 		mdoc_argv_free(n->args);
1098 	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1099 		free(n->norm);
1100 	eqn_box_free(n->eqn);
1101 	free(n->string);
1102 	free(n);
1103 }
1104 
1105 void
1106 roff_node_delete(struct roff_man *man, struct roff_node *n)
1107 {
1108 
1109 	while (n->child != NULL)
1110 		roff_node_delete(man, n->child);
1111 	roff_node_unlink(man, n);
1112 	roff_node_free(n);
1113 }
1114 
1115 int
1116 roff_node_transparent(struct roff_node *n)
1117 {
1118 	if (n == NULL)
1119 		return 0;
1120 	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1121 		return 1;
1122 	switch (n->tok) {
1123 	case ROFF_ft:
1124 	case ROFF_ll:
1125 	case ROFF_mc:
1126 	case ROFF_po:
1127 	case ROFF_ta:
1128 	case MDOC_Db:
1129 	case MDOC_Es:
1130 	case MDOC_Sm:
1131 	case MDOC_Tg:
1132 	case MAN_DT:
1133 	case MAN_UC:
1134 	case MAN_PD:
1135 	case MAN_AT:
1136 		return 1;
1137 	default:
1138 		return 0;
1139 	}
1140 }
1141 
1142 struct roff_node *
1143 roff_node_child(struct roff_node *n)
1144 {
1145 	for (n = n->child; roff_node_transparent(n); n = n->next)
1146 		continue;
1147 	return n;
1148 }
1149 
1150 struct roff_node *
1151 roff_node_prev(struct roff_node *n)
1152 {
1153 	do {
1154 		n = n->prev;
1155 	} while (roff_node_transparent(n));
1156 	return n;
1157 }
1158 
1159 struct roff_node *
1160 roff_node_next(struct roff_node *n)
1161 {
1162 	do {
1163 		n = n->next;
1164 	} while (roff_node_transparent(n));
1165 	return n;
1166 }
1167 
1168 void
1169 deroff(char **dest, const struct roff_node *n)
1170 {
1171 	char	*cp;
1172 	size_t	 sz;
1173 
1174 	if (n->string == NULL) {
1175 		for (n = n->child; n != NULL; n = n->next)
1176 			deroff(dest, n);
1177 		return;
1178 	}
1179 
1180 	/* Skip leading whitespace. */
1181 
1182 	for (cp = n->string; *cp != '\0'; cp++) {
1183 		if (cp[0] == '\\' && cp[1] != '\0' &&
1184 		    strchr(" %&0^|~", cp[1]) != NULL)
1185 			cp++;
1186 		else if ( ! isspace((unsigned char)*cp))
1187 			break;
1188 	}
1189 
1190 	/* Skip trailing backslash. */
1191 
1192 	sz = strlen(cp);
1193 	if (sz > 0 && cp[sz - 1] == '\\')
1194 		sz--;
1195 
1196 	/* Skip trailing whitespace. */
1197 
1198 	for (; sz; sz--)
1199 		if ( ! isspace((unsigned char)cp[sz-1]))
1200 			break;
1201 
1202 	/* Skip empty strings. */
1203 
1204 	if (sz == 0)
1205 		return;
1206 
1207 	if (*dest == NULL) {
1208 		*dest = mandoc_strndup(cp, sz);
1209 		return;
1210 	}
1211 
1212 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1213 	free(*dest);
1214 	*dest = cp;
1215 }
1216 
1217 /* --- main functions of the roff parser ---------------------------------- */
1218 
1219 /*
1220  * In the current line, expand escape sequences that produce parsable
1221  * input text.  Also check the syntax of the remaining escape sequences,
1222  * which typically produce output glyphs or change formatter state.
1223  */
1224 static int
1225 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1226 {
1227 	struct mctx	*ctx;	/* current macro call context */
1228 	char		 ubuf[24]; /* buffer to print the number */
1229 	struct roff_node *n;	/* used for header comments */
1230 	const char	*start;	/* start of the string to process */
1231 	char		*stesc;	/* start of an escape sequence ('\\') */
1232 	const char	*esct;	/* type of esccape sequence */
1233 	char		*ep;	/* end of comment string */
1234 	const char	*stnam;	/* start of the name, after "[(*" */
1235 	const char	*cp;	/* end of the name, e.g. before ']' */
1236 	const char	*res;	/* the string to be substituted */
1237 	char		*nbuf;	/* new buffer to copy buf->buf to */
1238 	size_t		 maxl;  /* expected length of the escape name */
1239 	size_t		 naml;	/* actual length of the escape name */
1240 	size_t		 asz;	/* length of the replacement */
1241 	size_t		 rsz;	/* length of the rest of the string */
1242 	int		 inaml;	/* length returned from mandoc_escape() */
1243 	int		 expand_count;	/* to avoid infinite loops */
1244 	int		 npos;	/* position in numeric expression */
1245 	int		 arg_complete; /* argument not interrupted by eol */
1246 	int		 quote_args; /* true for \\$@, false for \\$* */
1247 	int		 done;	/* no more input available */
1248 	int		 deftype; /* type of definition to paste */
1249 	int		 rcsid;	/* kind of RCS id seen */
1250 	enum mandocerr	 err;	/* for escape sequence problems */
1251 	char		 sign;	/* increment number register */
1252 	char		 term;	/* character terminating the escape */
1253 
1254 	/* Search forward for comments. */
1255 
1256 	done = 0;
1257 	start = buf->buf + pos;
1258 	for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1259 		if (stesc[0] != newesc || stesc[1] == '\0')
1260 			continue;
1261 		stesc++;
1262 		if (*stesc != '"' && *stesc != '#')
1263 			continue;
1264 
1265 		/* Comment found, look for RCS id. */
1266 
1267 		rcsid = 0;
1268 		if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1269 			rcsid = 1 << MANDOC_OS_OPENBSD;
1270 			cp += 8;
1271 		} else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1272 			rcsid = 1 << MANDOC_OS_NETBSD;
1273 			cp += 7;
1274 		}
1275 		if (cp != NULL &&
1276 		    isalnum((unsigned char)*cp) == 0 &&
1277 		    strchr(cp, '$') != NULL) {
1278 			if (r->man->meta.rcsids & rcsid)
1279 				mandoc_msg(MANDOCERR_RCS_REP, ln,
1280 				    (int)(stesc - buf->buf) + 1,
1281 				    "%s", stesc + 1);
1282 			r->man->meta.rcsids |= rcsid;
1283 		}
1284 
1285 		/* Handle trailing whitespace. */
1286 
1287 		ep = strchr(stesc--, '\0') - 1;
1288 		if (*ep == '\n') {
1289 			done = 1;
1290 			ep--;
1291 		}
1292 		if (*ep == ' ' || *ep == '\t')
1293 			mandoc_msg(MANDOCERR_SPACE_EOL,
1294 			    ln, (int)(ep - buf->buf), NULL);
1295 
1296 		/*
1297 		 * Save comments preceding the title macro
1298 		 * in the syntax tree.
1299 		 */
1300 
1301 		if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
1302 			while (*ep == ' ' || *ep == '\t')
1303 				ep--;
1304 			ep[1] = '\0';
1305 			n = roff_node_alloc(r->man,
1306 			    ln, stesc + 1 - buf->buf,
1307 			    ROFFT_COMMENT, TOKEN_NONE);
1308 			n->string = mandoc_strdup(stesc + 2);
1309 			roff_node_append(r->man, n);
1310 			n->flags |= NODE_VALID | NODE_ENDED;
1311 			r->man->next = ROFF_NEXT_SIBLING;
1312 		}
1313 
1314 		/* Line continuation with comment. */
1315 
1316 		if (stesc[1] == '#') {
1317 			*stesc = '\0';
1318 			return ROFF_IGN | ROFF_APPEND;
1319 		}
1320 
1321 		/* Discard normal comments. */
1322 
1323 		while (stesc > start && stesc[-1] == ' ' &&
1324 		    (stesc == start + 1 || stesc[-2] != '\\'))
1325 			stesc--;
1326 		*stesc = '\0';
1327 		break;
1328 	}
1329 	if (stesc == start)
1330 		return ROFF_CONT;
1331 	stesc--;
1332 
1333 	/* Notice the end of the input. */
1334 
1335 	if (*stesc == '\n') {
1336 		*stesc-- = '\0';
1337 		done = 1;
1338 	}
1339 
1340 	expand_count = 0;
1341 	while (stesc >= start) {
1342 		if (*stesc != newesc) {
1343 
1344 			/*
1345 			 * If we have a non-standard escape character,
1346 			 * escape literal backslashes because all
1347 			 * processing in subsequent functions uses
1348 			 * the standard escaping rules.
1349 			 */
1350 
1351 			if (newesc != ASCII_ESC && *stesc == '\\') {
1352 				*stesc = '\0';
1353 				buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1354 				    buf->buf, stesc + 1) + 1;
1355 				start = nbuf + pos;
1356 				stesc = nbuf + (stesc - buf->buf);
1357 				free(buf->buf);
1358 				buf->buf = nbuf;
1359 			}
1360 
1361 			/* Search backwards for the next escape. */
1362 
1363 			stesc--;
1364 			continue;
1365 		}
1366 
1367 		/* If it is escaped, skip it. */
1368 
1369 		for (cp = stesc - 1; cp >= start; cp--)
1370 			if (*cp != r->escape)
1371 				break;
1372 
1373 		if ((stesc - cp) % 2 == 0) {
1374 			while (stesc > cp)
1375 				*stesc-- = '\\';
1376 			continue;
1377 		} else if (stesc[1] != '\0') {
1378 			*stesc = '\\';
1379 		} else {
1380 			*stesc-- = '\0';
1381 			if (done)
1382 				continue;
1383 			else
1384 				return ROFF_IGN | ROFF_APPEND;
1385 		}
1386 
1387 		/* Decide whether to expand or to check only. */
1388 
1389 		term = '\0';
1390 		cp = stesc + 1;
1391 		if (*cp == 'E')
1392 			cp++;
1393 		esct = cp;
1394 		switch (*esct) {
1395 		case '*':
1396 		case '$':
1397 			res = NULL;
1398 			break;
1399 		case 'B':
1400 		case 'w':
1401 			term = cp[1];
1402 			/* FALLTHROUGH */
1403 		case 'n':
1404 			sign = cp[1];
1405 			if (sign == '+' || sign == '-')
1406 				cp++;
1407 			res = ubuf;
1408 			break;
1409 		default:
1410 			err = MANDOCERR_OK;
1411 			switch(mandoc_escape(&cp, &stnam, &inaml)) {
1412 			case ESCAPE_SPECIAL:
1413 				if (mchars_spec2cp(stnam, inaml) >= 0)
1414 					break;
1415 				/* FALLTHROUGH */
1416 			case ESCAPE_ERROR:
1417 				err = MANDOCERR_ESC_BAD;
1418 				break;
1419 			case ESCAPE_UNDEF:
1420 				err = MANDOCERR_ESC_UNDEF;
1421 				break;
1422 			case ESCAPE_UNSUPP:
1423 				err = MANDOCERR_ESC_UNSUPP;
1424 				break;
1425 			default:
1426 				break;
1427 			}
1428 			if (err != MANDOCERR_OK)
1429 				mandoc_msg(err, ln, (int)(stesc - buf->buf),
1430 				    "%.*s", (int)(cp - stesc), stesc);
1431 			stesc--;
1432 			continue;
1433 		}
1434 
1435 		if (EXPAND_LIMIT < ++expand_count) {
1436 			mandoc_msg(MANDOCERR_ROFFLOOP,
1437 			    ln, (int)(stesc - buf->buf), NULL);
1438 			return ROFF_IGN;
1439 		}
1440 
1441 		/*
1442 		 * The third character decides the length
1443 		 * of the name of the string or register.
1444 		 * Save a pointer to the name.
1445 		 */
1446 
1447 		if (term == '\0') {
1448 			switch (*++cp) {
1449 			case '\0':
1450 				maxl = 0;
1451 				break;
1452 			case '(':
1453 				cp++;
1454 				maxl = 2;
1455 				break;
1456 			case '[':
1457 				cp++;
1458 				term = ']';
1459 				maxl = 0;
1460 				break;
1461 			default:
1462 				maxl = 1;
1463 				break;
1464 			}
1465 		} else {
1466 			cp += 2;
1467 			maxl = 0;
1468 		}
1469 		stnam = cp;
1470 
1471 		/* Advance to the end of the name. */
1472 
1473 		naml = 0;
1474 		arg_complete = 1;
1475 		while (maxl == 0 || naml < maxl) {
1476 			if (*cp == '\0') {
1477 				mandoc_msg(MANDOCERR_ESC_BAD, ln,
1478 				    (int)(stesc - buf->buf), "%s", stesc);
1479 				arg_complete = 0;
1480 				break;
1481 			}
1482 			if (maxl == 0 && *cp == term) {
1483 				cp++;
1484 				break;
1485 			}
1486 			if (*cp++ != '\\' || *esct != 'w') {
1487 				naml++;
1488 				continue;
1489 			}
1490 			switch (mandoc_escape(&cp, NULL, NULL)) {
1491 			case ESCAPE_SPECIAL:
1492 			case ESCAPE_UNICODE:
1493 			case ESCAPE_NUMBERED:
1494 			case ESCAPE_UNDEF:
1495 			case ESCAPE_OVERSTRIKE:
1496 				naml++;
1497 				break;
1498 			default:
1499 				break;
1500 			}
1501 		}
1502 
1503 		/*
1504 		 * Retrieve the replacement string; if it is
1505 		 * undefined, resume searching for escapes.
1506 		 */
1507 
1508 		switch (*esct) {
1509 		case '*':
1510 			if (arg_complete) {
1511 				deftype = ROFFDEF_USER | ROFFDEF_PRE;
1512 				res = roff_getstrn(r, stnam, naml, &deftype);
1513 
1514 				/*
1515 				 * If not overriden, let \*(.T
1516 				 * through to the formatters.
1517 				 */
1518 
1519 				if (res == NULL && naml == 2 &&
1520 				    stnam[0] == '.' && stnam[1] == 'T') {
1521 					roff_setstrn(&r->strtab,
1522 					    ".T", 2, NULL, 0, 0);
1523 					stesc--;
1524 					continue;
1525 				}
1526 			}
1527 			break;
1528 		case '$':
1529 			if (r->mstackpos < 0) {
1530 				mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1531 				    (int)(stesc - buf->buf), "%.3s", stesc);
1532 				break;
1533 			}
1534 			ctx = r->mstack + r->mstackpos;
1535 			npos = esct[1] - '1';
1536 			if (npos >= 0 && npos <= 8) {
1537 				res = npos < ctx->argc ?
1538 				    ctx->argv[npos] : "";
1539 				break;
1540 			}
1541 			if (esct[1] == '*')
1542 				quote_args = 0;
1543 			else if (esct[1] == '@')
1544 				quote_args = 1;
1545 			else {
1546 				mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1547 				    (int)(stesc - buf->buf), "%.3s", stesc);
1548 				break;
1549 			}
1550 			asz = 0;
1551 			for (npos = 0; npos < ctx->argc; npos++) {
1552 				if (npos)
1553 					asz++;  /* blank */
1554 				if (quote_args)
1555 					asz += 2;  /* quotes */
1556 				asz += strlen(ctx->argv[npos]);
1557 			}
1558 			if (asz != 3) {
1559 				rsz = buf->sz - (stesc - buf->buf) - 3;
1560 				if (asz < 3)
1561 					memmove(stesc + asz, stesc + 3, rsz);
1562 				buf->sz += asz - 3;
1563 				nbuf = mandoc_realloc(buf->buf, buf->sz);
1564 				start = nbuf + pos;
1565 				stesc = nbuf + (stesc - buf->buf);
1566 				buf->buf = nbuf;
1567 				if (asz > 3)
1568 					memmove(stesc + asz, stesc + 3, rsz);
1569 			}
1570 			for (npos = 0; npos < ctx->argc; npos++) {
1571 				if (npos)
1572 					*stesc++ = ' ';
1573 				if (quote_args)
1574 					*stesc++ = '"';
1575 				cp = ctx->argv[npos];
1576 				while (*cp != '\0')
1577 					*stesc++ = *cp++;
1578 				if (quote_args)
1579 					*stesc++ = '"';
1580 			}
1581 			continue;
1582 		case 'B':
1583 			npos = 0;
1584 			ubuf[0] = arg_complete &&
1585 			    roff_evalnum(r, ln, stnam, &npos,
1586 			      NULL, ROFFNUM_SCALE) &&
1587 			    stnam + npos + 1 == cp ? '1' : '0';
1588 			ubuf[1] = '\0';
1589 			break;
1590 		case 'n':
1591 			if (arg_complete)
1592 				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1593 				    roff_getregn(r, stnam, naml, sign));
1594 			else
1595 				ubuf[0] = '\0';
1596 			break;
1597 		case 'w':
1598 			/* use even incomplete args */
1599 			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1600 			    24 * (int)naml);
1601 			break;
1602 		}
1603 
1604 		if (res == NULL) {
1605 			if (*esct == '*')
1606 				mandoc_msg(MANDOCERR_STR_UNDEF,
1607 				    ln, (int)(stesc - buf->buf),
1608 				    "%.*s", (int)naml, stnam);
1609 			res = "";
1610 		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1611 			mandoc_msg(MANDOCERR_ROFFLOOP,
1612 			    ln, (int)(stesc - buf->buf), NULL);
1613 			return ROFF_IGN;
1614 		}
1615 
1616 		/* Replace the escape sequence by the string. */
1617 
1618 		*stesc = '\0';
1619 		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1620 		    buf->buf, res, cp) + 1;
1621 
1622 		/* Prepare for the next replacement. */
1623 
1624 		start = nbuf + pos;
1625 		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1626 		free(buf->buf);
1627 		buf->buf = nbuf;
1628 	}
1629 	return ROFF_CONT;
1630 }
1631 
1632 /*
1633  * Parse a quoted or unquoted roff-style request or macro argument.
1634  * Return a pointer to the parsed argument, which is either the original
1635  * pointer or advanced by one byte in case the argument is quoted.
1636  * NUL-terminate the argument in place.
1637  * Collapse pairs of quotes inside quoted arguments.
1638  * Advance the argument pointer to the next argument,
1639  * or to the NUL byte terminating the argument line.
1640  */
1641 char *
1642 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1643 {
1644 	struct buf	 buf;
1645 	char		*cp, *start;
1646 	int		 newesc, pairs, quoted, white;
1647 
1648 	/* Quoting can only start with a new word. */
1649 	start = *cpp;
1650 	quoted = 0;
1651 	if ('"' == *start) {
1652 		quoted = 1;
1653 		start++;
1654 	}
1655 
1656 	newesc = pairs = white = 0;
1657 	for (cp = start; '\0' != *cp; cp++) {
1658 
1659 		/*
1660 		 * Move the following text left
1661 		 * after quoted quotes and after "\\" and "\t".
1662 		 */
1663 		if (pairs)
1664 			cp[-pairs] = cp[0];
1665 
1666 		if ('\\' == cp[0]) {
1667 			/*
1668 			 * In copy mode, translate double to single
1669 			 * backslashes and backslash-t to literal tabs.
1670 			 */
1671 			switch (cp[1]) {
1672 			case 'a':
1673 			case 't':
1674 				cp[-pairs] = '\t';
1675 				pairs++;
1676 				cp++;
1677 				break;
1678 			case '\\':
1679 				newesc = 1;
1680 				cp[-pairs] = ASCII_ESC;
1681 				pairs++;
1682 				cp++;
1683 				break;
1684 			case ' ':
1685 				/* Skip escaped blanks. */
1686 				if (0 == quoted)
1687 					cp++;
1688 				break;
1689 			default:
1690 				break;
1691 			}
1692 		} else if (0 == quoted) {
1693 			if (' ' == cp[0]) {
1694 				/* Unescaped blanks end unquoted args. */
1695 				white = 1;
1696 				break;
1697 			}
1698 		} else if ('"' == cp[0]) {
1699 			if ('"' == cp[1]) {
1700 				/* Quoted quotes collapse. */
1701 				pairs++;
1702 				cp++;
1703 			} else {
1704 				/* Unquoted quotes end quoted args. */
1705 				quoted = 2;
1706 				break;
1707 			}
1708 		}
1709 	}
1710 
1711 	/* Quoted argument without a closing quote. */
1712 	if (1 == quoted)
1713 		mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1714 
1715 	/* NUL-terminate this argument and move to the next one. */
1716 	if (pairs)
1717 		cp[-pairs] = '\0';
1718 	if ('\0' != *cp) {
1719 		*cp++ = '\0';
1720 		while (' ' == *cp)
1721 			cp++;
1722 	}
1723 	*pos += (int)(cp - start) + (quoted ? 1 : 0);
1724 	*cpp = cp;
1725 
1726 	if ('\0' == *cp && (white || ' ' == cp[-1]))
1727 		mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1728 
1729 	start = mandoc_strdup(start);
1730 	if (newesc == 0)
1731 		return start;
1732 
1733 	buf.buf = start;
1734 	buf.sz = strlen(start) + 1;
1735 	buf.next = NULL;
1736 	if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1737 		free(buf.buf);
1738 		buf.buf = mandoc_strdup("");
1739 	}
1740 	return buf.buf;
1741 }
1742 
1743 
1744 /*
1745  * Process text streams.
1746  */
1747 static int
1748 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1749 {
1750 	size_t		 sz;
1751 	const char	*start;
1752 	char		*p;
1753 	int		 isz;
1754 	enum mandoc_esc	 esc;
1755 
1756 	/* Spring the input line trap. */
1757 
1758 	if (roffit_lines == 1) {
1759 		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1760 		free(buf->buf);
1761 		buf->buf = p;
1762 		buf->sz = isz + 1;
1763 		*offs = 0;
1764 		free(roffit_macro);
1765 		roffit_lines = 0;
1766 		return ROFF_REPARSE;
1767 	} else if (roffit_lines > 1)
1768 		--roffit_lines;
1769 
1770 	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1771 		if (roffce_lines < 1) {
1772 			r->man->last = roffce_node;
1773 			r->man->next = ROFF_NEXT_SIBLING;
1774 			roffce_lines = 0;
1775 			roffce_node = NULL;
1776 		} else
1777 			roffce_lines--;
1778 	}
1779 
1780 	/* Convert all breakable hyphens into ASCII_HYPH. */
1781 
1782 	start = p = buf->buf + pos;
1783 
1784 	while (*p != '\0') {
1785 		sz = strcspn(p, "-\\");
1786 		p += sz;
1787 
1788 		if (*p == '\0')
1789 			break;
1790 
1791 		if (*p == '\\') {
1792 			/* Skip over escapes. */
1793 			p++;
1794 			esc = mandoc_escape((const char **)&p, NULL, NULL);
1795 			if (esc == ESCAPE_ERROR)
1796 				break;
1797 			while (*p == '-')
1798 				p++;
1799 			continue;
1800 		} else if (p == start) {
1801 			p++;
1802 			continue;
1803 		}
1804 
1805 		if (isalpha((unsigned char)p[-1]) &&
1806 		    isalpha((unsigned char)p[1]))
1807 			*p = ASCII_HYPH;
1808 		p++;
1809 	}
1810 	return ROFF_CONT;
1811 }
1812 
1813 int
1814 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1815 {
1816 	enum roff_tok	 t;
1817 	int		 e;
1818 	int		 pos;	/* parse point */
1819 	int		 spos;	/* saved parse point for messages */
1820 	int		 ppos;	/* original offset in buf->buf */
1821 	int		 ctl;	/* macro line (boolean) */
1822 
1823 	ppos = pos = *offs;
1824 
1825 	/* Handle in-line equation delimiters. */
1826 
1827 	if (r->tbl == NULL &&
1828 	    r->last_eqn != NULL && r->last_eqn->delim &&
1829 	    (r->eqn == NULL || r->eqn_inline)) {
1830 		e = roff_eqndelim(r, buf, pos);
1831 		if (e == ROFF_REPARSE)
1832 			return e;
1833 		assert(e == ROFF_CONT);
1834 	}
1835 
1836 	/* Expand some escape sequences. */
1837 
1838 	e = roff_expand(r, buf, ln, pos, r->escape);
1839 	if ((e & ROFF_MASK) == ROFF_IGN)
1840 		return e;
1841 	assert(e == ROFF_CONT);
1842 
1843 	ctl = roff_getcontrol(r, buf->buf, &pos);
1844 
1845 	/*
1846 	 * First, if a scope is open and we're not a macro, pass the
1847 	 * text through the macro's filter.
1848 	 * Equations process all content themselves.
1849 	 * Tables process almost all content themselves, but we want
1850 	 * to warn about macros before passing it there.
1851 	 */
1852 
1853 	if (r->last != NULL && ! ctl) {
1854 		t = r->last->tok;
1855 		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1856 		if ((e & ROFF_MASK) == ROFF_IGN)
1857 			return e;
1858 		e &= ~ROFF_MASK;
1859 	} else
1860 		e = ROFF_IGN;
1861 	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1862 		eqn_read(r->eqn, buf->buf + ppos);
1863 		return e;
1864 	}
1865 	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1866 		tbl_read(r->tbl, ln, buf->buf, ppos);
1867 		roff_addtbl(r->man, ln, r->tbl);
1868 		return e;
1869 	}
1870 	if ( ! ctl) {
1871 		r->options &= ~MPARSE_COMMENT;
1872 		return roff_parsetext(r, buf, pos, offs) | e;
1873 	}
1874 
1875 	/* Skip empty request lines. */
1876 
1877 	if (buf->buf[pos] == '"') {
1878 		mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1879 		return ROFF_IGN;
1880 	} else if (buf->buf[pos] == '\0')
1881 		return ROFF_IGN;
1882 
1883 	/*
1884 	 * If a scope is open, go to the child handler for that macro,
1885 	 * as it may want to preprocess before doing anything with it.
1886 	 * Don't do so if an equation is open.
1887 	 */
1888 
1889 	if (r->last) {
1890 		t = r->last->tok;
1891 		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1892 	}
1893 
1894 	/* No scope is open.  This is a new request or macro. */
1895 
1896 	r->options &= ~MPARSE_COMMENT;
1897 	spos = pos;
1898 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1899 
1900 	/* Tables ignore most macros. */
1901 
1902 	if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1903 	    t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1904 		mandoc_msg(MANDOCERR_TBLMACRO,
1905 		    ln, pos, "%s", buf->buf + spos);
1906 		if (t != TOKEN_NONE)
1907 			return ROFF_IGN;
1908 		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1909 			pos++;
1910 		while (buf->buf[pos] == ' ')
1911 			pos++;
1912 		tbl_read(r->tbl, ln, buf->buf, pos);
1913 		roff_addtbl(r->man, ln, r->tbl);
1914 		return ROFF_IGN;
1915 	}
1916 
1917 	/* For now, let high level macros abort .ce mode. */
1918 
1919 	if (ctl && roffce_node != NULL &&
1920 	    (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1921 	     t == ROFF_TH || t == ROFF_TS)) {
1922 		r->man->last = roffce_node;
1923 		r->man->next = ROFF_NEXT_SIBLING;
1924 		roffce_lines = 0;
1925 		roffce_node = NULL;
1926 	}
1927 
1928 	/*
1929 	 * This is neither a roff request nor a user-defined macro.
1930 	 * Let the standard macro set parsers handle it.
1931 	 */
1932 
1933 	if (t == TOKEN_NONE)
1934 		return ROFF_CONT;
1935 
1936 	/* Execute a roff request or a user defined macro. */
1937 
1938 	return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1939 }
1940 
1941 /*
1942  * Internal interface function to tell the roff parser that execution
1943  * of the current macro ended.  This is required because macro
1944  * definitions usually do not end with a .return request.
1945  */
1946 void
1947 roff_userret(struct roff *r)
1948 {
1949 	struct mctx	*ctx;
1950 	int		 i;
1951 
1952 	assert(r->mstackpos >= 0);
1953 	ctx = r->mstack + r->mstackpos;
1954 	for (i = 0; i < ctx->argc; i++)
1955 		free(ctx->argv[i]);
1956 	ctx->argc = 0;
1957 	r->mstackpos--;
1958 }
1959 
1960 void
1961 roff_endparse(struct roff *r)
1962 {
1963 	if (r->last != NULL)
1964 		mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1965 		    r->last->col, "%s", roff_name[r->last->tok]);
1966 
1967 	if (r->eqn != NULL) {
1968 		mandoc_msg(MANDOCERR_BLK_NOEND,
1969 		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1970 		eqn_parse(r->eqn);
1971 		r->eqn = NULL;
1972 	}
1973 
1974 	if (r->tbl != NULL) {
1975 		tbl_end(r->tbl, 1);
1976 		r->tbl = NULL;
1977 	}
1978 }
1979 
1980 /*
1981  * Parse a roff node's type from the input buffer.  This must be in the
1982  * form of ".foo xxx" in the usual way.
1983  */
1984 static enum roff_tok
1985 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1986 {
1987 	char		*cp;
1988 	const char	*mac;
1989 	size_t		 maclen;
1990 	int		 deftype;
1991 	enum roff_tok	 t;
1992 
1993 	cp = buf + *pos;
1994 
1995 	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1996 		return TOKEN_NONE;
1997 
1998 	mac = cp;
1999 	maclen = roff_getname(r, &cp, ln, ppos);
2000 
2001 	deftype = ROFFDEF_USER | ROFFDEF_REN;
2002 	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2003 	switch (deftype) {
2004 	case ROFFDEF_USER:
2005 		t = ROFF_USERDEF;
2006 		break;
2007 	case ROFFDEF_REN:
2008 		t = ROFF_RENAMED;
2009 		break;
2010 	default:
2011 		t = roffhash_find(r->reqtab, mac, maclen);
2012 		break;
2013 	}
2014 	if (t != TOKEN_NONE)
2015 		*pos = cp - buf;
2016 	else if (deftype == ROFFDEF_UNDEF) {
2017 		/* Using an undefined macro defines it to be empty. */
2018 		roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2019 		roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2020 	}
2021 	return t;
2022 }
2023 
2024 /* --- handling of request blocks ----------------------------------------- */
2025 
2026 static int
2027 roff_cblock(ROFF_ARGS)
2028 {
2029 
2030 	/*
2031 	 * A block-close `..' should only be invoked as a child of an
2032 	 * ignore macro, otherwise raise a warning and just ignore it.
2033 	 */
2034 
2035 	if (r->last == NULL) {
2036 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2037 		return ROFF_IGN;
2038 	}
2039 
2040 	switch (r->last->tok) {
2041 	case ROFF_am:
2042 		/* ROFF_am1 is remapped to ROFF_am in roff_block(). */
2043 	case ROFF_ami:
2044 	case ROFF_de:
2045 		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
2046 	case ROFF_dei:
2047 	case ROFF_ig:
2048 		break;
2049 	default:
2050 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2051 		return ROFF_IGN;
2052 	}
2053 
2054 	if (buf->buf[pos] != '\0')
2055 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2056 		    ".. %s", buf->buf + pos);
2057 
2058 	roffnode_pop(r);
2059 	roffnode_cleanscope(r);
2060 	return ROFF_IGN;
2061 
2062 }
2063 
2064 /*
2065  * Pop all nodes ending at the end of the current input line.
2066  * Return the number of loops ended.
2067  */
2068 static int
2069 roffnode_cleanscope(struct roff *r)
2070 {
2071 	int inloop;
2072 
2073 	inloop = 0;
2074 	while (r->last != NULL) {
2075 		if (--r->last->endspan != 0)
2076 			break;
2077 		inloop += roffnode_pop(r);
2078 	}
2079 	return inloop;
2080 }
2081 
2082 /*
2083  * Handle the closing \} of a conditional block.
2084  * Apart from generating warnings, this only pops nodes.
2085  * Return the number of loops ended.
2086  */
2087 static int
2088 roff_ccond(struct roff *r, int ln, int ppos)
2089 {
2090 	if (NULL == r->last) {
2091 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2092 		return 0;
2093 	}
2094 
2095 	switch (r->last->tok) {
2096 	case ROFF_el:
2097 	case ROFF_ie:
2098 	case ROFF_if:
2099 	case ROFF_while:
2100 		break;
2101 	default:
2102 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2103 		return 0;
2104 	}
2105 
2106 	if (r->last->endspan > -1) {
2107 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2108 		return 0;
2109 	}
2110 
2111 	return roffnode_pop(r) + roffnode_cleanscope(r);
2112 }
2113 
2114 static int
2115 roff_block(ROFF_ARGS)
2116 {
2117 	const char	*name, *value;
2118 	char		*call, *cp, *iname, *rname;
2119 	size_t		 csz, namesz, rsz;
2120 	int		 deftype;
2121 
2122 	/* Ignore groff compatibility mode for now. */
2123 
2124 	if (tok == ROFF_de1)
2125 		tok = ROFF_de;
2126 	else if (tok == ROFF_dei1)
2127 		tok = ROFF_dei;
2128 	else if (tok == ROFF_am1)
2129 		tok = ROFF_am;
2130 	else if (tok == ROFF_ami1)
2131 		tok = ROFF_ami;
2132 
2133 	/* Parse the macro name argument. */
2134 
2135 	cp = buf->buf + pos;
2136 	if (tok == ROFF_ig) {
2137 		iname = NULL;
2138 		namesz = 0;
2139 	} else {
2140 		iname = cp;
2141 		namesz = roff_getname(r, &cp, ln, ppos);
2142 		iname[namesz] = '\0';
2143 	}
2144 
2145 	/* Resolve the macro name argument if it is indirect. */
2146 
2147 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2148 		deftype = ROFFDEF_USER;
2149 		name = roff_getstrn(r, iname, namesz, &deftype);
2150 		if (name == NULL) {
2151 			mandoc_msg(MANDOCERR_STR_UNDEF,
2152 			    ln, (int)(iname - buf->buf),
2153 			    "%.*s", (int)namesz, iname);
2154 			namesz = 0;
2155 		} else
2156 			namesz = strlen(name);
2157 	} else
2158 		name = iname;
2159 
2160 	if (namesz == 0 && tok != ROFF_ig) {
2161 		mandoc_msg(MANDOCERR_REQ_EMPTY,
2162 		    ln, ppos, "%s", roff_name[tok]);
2163 		return ROFF_IGN;
2164 	}
2165 
2166 	roffnode_push(r, tok, name, ln, ppos);
2167 
2168 	/*
2169 	 * At the beginning of a `de' macro, clear the existing string
2170 	 * with the same name, if there is one.  New content will be
2171 	 * appended from roff_block_text() in multiline mode.
2172 	 */
2173 
2174 	if (tok == ROFF_de || tok == ROFF_dei) {
2175 		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2176 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2177 	} else if (tok == ROFF_am || tok == ROFF_ami) {
2178 		deftype = ROFFDEF_ANY;
2179 		value = roff_getstrn(r, iname, namesz, &deftype);
2180 		switch (deftype) {  /* Before appending, ... */
2181 		case ROFFDEF_PRE: /* copy predefined to user-defined. */
2182 			roff_setstrn(&r->strtab, name, namesz,
2183 			    value, strlen(value), 0);
2184 			break;
2185 		case ROFFDEF_REN: /* call original standard macro. */
2186 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2187 			    (int)strlen(value), value);
2188 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2189 			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2190 			free(call);
2191 			break;
2192 		case ROFFDEF_STD:  /* rename and call standard macro. */
2193 			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2194 			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2195 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2196 			    (int)rsz, rname);
2197 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2198 			free(call);
2199 			free(rname);
2200 			break;
2201 		default:
2202 			break;
2203 		}
2204 	}
2205 
2206 	if (*cp == '\0')
2207 		return ROFF_IGN;
2208 
2209 	/* Get the custom end marker. */
2210 
2211 	iname = cp;
2212 	namesz = roff_getname(r, &cp, ln, ppos);
2213 
2214 	/* Resolve the end marker if it is indirect. */
2215 
2216 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2217 		deftype = ROFFDEF_USER;
2218 		name = roff_getstrn(r, iname, namesz, &deftype);
2219 		if (name == NULL) {
2220 			mandoc_msg(MANDOCERR_STR_UNDEF,
2221 			    ln, (int)(iname - buf->buf),
2222 			    "%.*s", (int)namesz, iname);
2223 			namesz = 0;
2224 		} else
2225 			namesz = strlen(name);
2226 	} else
2227 		name = iname;
2228 
2229 	if (namesz)
2230 		r->last->end = mandoc_strndup(name, namesz);
2231 
2232 	if (*cp != '\0')
2233 		mandoc_msg(MANDOCERR_ARG_EXCESS,
2234 		    ln, pos, ".%s ... %s", roff_name[tok], cp);
2235 
2236 	return ROFF_IGN;
2237 }
2238 
2239 static int
2240 roff_block_sub(ROFF_ARGS)
2241 {
2242 	enum roff_tok	t;
2243 	int		i, j;
2244 
2245 	/*
2246 	 * First check whether a custom macro exists at this level.  If
2247 	 * it does, then check against it.  This is some of groff's
2248 	 * stranger behaviours.  If we encountered a custom end-scope
2249 	 * tag and that tag also happens to be a "real" macro, then we
2250 	 * need to try interpreting it again as a real macro.  If it's
2251 	 * not, then return ignore.  Else continue.
2252 	 */
2253 
2254 	if (r->last->end) {
2255 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
2256 			if (buf->buf[i] != r->last->end[j])
2257 				break;
2258 
2259 		if (r->last->end[j] == '\0' &&
2260 		    (buf->buf[i] == '\0' ||
2261 		     buf->buf[i] == ' ' ||
2262 		     buf->buf[i] == '\t')) {
2263 			roffnode_pop(r);
2264 			roffnode_cleanscope(r);
2265 
2266 			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2267 				i++;
2268 
2269 			pos = i;
2270 			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2271 			    TOKEN_NONE)
2272 				return ROFF_RERUN;
2273 			return ROFF_IGN;
2274 		}
2275 	}
2276 
2277 	/*
2278 	 * If we have no custom end-query or lookup failed, then try
2279 	 * pulling it out of the hashtable.
2280 	 */
2281 
2282 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2283 
2284 	if (t != ROFF_cblock) {
2285 		if (tok != ROFF_ig)
2286 			roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2287 		return ROFF_IGN;
2288 	}
2289 
2290 	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2291 }
2292 
2293 static int
2294 roff_block_text(ROFF_ARGS)
2295 {
2296 
2297 	if (tok != ROFF_ig)
2298 		roff_setstr(r, r->last->name, buf->buf + pos, 2);
2299 
2300 	return ROFF_IGN;
2301 }
2302 
2303 static int
2304 roff_cond_sub(ROFF_ARGS)
2305 {
2306 	struct roffnode	*bl;
2307 	char		*ep;
2308 	int		 endloop, irc, rr;
2309 	enum roff_tok	 t;
2310 
2311 	irc = ROFF_IGN;
2312 	rr = r->last->rule;
2313 	endloop = tok != ROFF_while ? ROFF_IGN :
2314 	    rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2315 	if (roffnode_cleanscope(r))
2316 		irc |= endloop;
2317 
2318 	/*
2319 	 * If `\}' occurs on a macro line without a preceding macro,
2320 	 * drop the line completely.
2321 	 */
2322 
2323 	ep = buf->buf + pos;
2324 	if (ep[0] == '\\' && ep[1] == '}')
2325 		rr = 0;
2326 
2327 	/*
2328 	 * The closing delimiter `\}' rewinds the conditional scope
2329 	 * but is otherwise ignored when interpreting the line.
2330 	 */
2331 
2332 	while ((ep = strchr(ep, '\\')) != NULL) {
2333 		switch (ep[1]) {
2334 		case '}':
2335 			memmove(ep, ep + 2, strlen(ep + 2) + 1);
2336 			if (roff_ccond(r, ln, ep - buf->buf))
2337 				irc |= endloop;
2338 			break;
2339 		case '\0':
2340 			++ep;
2341 			break;
2342 		default:
2343 			ep += 2;
2344 			break;
2345 		}
2346 	}
2347 
2348 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2349 
2350 	/* For now, let high level macros abort .ce mode. */
2351 
2352 	if (roffce_node != NULL &&
2353 	    (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
2354              t == ROFF_TH || t == ROFF_TS)) {
2355 		r->man->last = roffce_node;
2356 		r->man->next = ROFF_NEXT_SIBLING;
2357 		roffce_lines = 0;
2358 		roffce_node = NULL;
2359 	}
2360 
2361 	/*
2362 	 * Fully handle known macros when they are structurally
2363 	 * required or when the conditional evaluated to true.
2364 	 */
2365 
2366 	if (t == ROFF_break) {
2367 		if (irc & ROFF_LOOPMASK)
2368 			irc = ROFF_IGN | ROFF_LOOPEXIT;
2369 		else if (rr) {
2370 			for (bl = r->last; bl != NULL; bl = bl->parent) {
2371 				bl->rule = 0;
2372 				if (bl->tok == ROFF_while)
2373 					break;
2374 			}
2375 		}
2376 	} else if (t != TOKEN_NONE &&
2377 	    (rr || roffs[t].flags & ROFFMAC_STRUCT))
2378 		irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2379 	else
2380 		irc |= rr ? ROFF_CONT : ROFF_IGN;
2381 	return irc;
2382 }
2383 
2384 static int
2385 roff_cond_text(ROFF_ARGS)
2386 {
2387 	char		*ep;
2388 	int		 endloop, irc, rr;
2389 
2390 	irc = ROFF_IGN;
2391 	rr = r->last->rule;
2392 	endloop = tok != ROFF_while ? ROFF_IGN :
2393 	    rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2394 	if (roffnode_cleanscope(r))
2395 		irc |= endloop;
2396 
2397 	/*
2398 	 * If `\}' occurs on a text line with neither preceding
2399 	 * nor following characters, drop the line completely.
2400 	 */
2401 
2402 	ep = buf->buf + pos;
2403 	if (strcmp(ep, "\\}") == 0)
2404 		rr = 0;
2405 
2406 	/*
2407 	 * The closing delimiter `\}' rewinds the conditional scope
2408 	 * but is otherwise ignored when interpreting the line.
2409 	 */
2410 
2411 	while ((ep = strchr(ep, '\\')) != NULL) {
2412 		switch (ep[1]) {
2413 		case '}':
2414 			memmove(ep, ep + 2, strlen(ep + 2) + 1);
2415 			if (roff_ccond(r, ln, ep - buf->buf))
2416 				irc |= endloop;
2417 			break;
2418 		case '\0':
2419 			++ep;
2420 			break;
2421 		default:
2422 			ep += 2;
2423 			break;
2424 		}
2425 	}
2426 	if (rr)
2427 		irc |= ROFF_CONT;
2428 	return irc;
2429 }
2430 
2431 /* --- handling of numeric and conditional expressions -------------------- */
2432 
2433 /*
2434  * Parse a single signed integer number.  Stop at the first non-digit.
2435  * If there is at least one digit, return success and advance the
2436  * parse point, else return failure and let the parse point unchanged.
2437  * Ignore overflows, treat them just like the C language.
2438  */
2439 static int
2440 roff_getnum(const char *v, int *pos, int *res, int flags)
2441 {
2442 	int	 myres, scaled, n, p;
2443 
2444 	if (NULL == res)
2445 		res = &myres;
2446 
2447 	p = *pos;
2448 	n = v[p] == '-';
2449 	if (n || v[p] == '+')
2450 		p++;
2451 
2452 	if (flags & ROFFNUM_WHITE)
2453 		while (isspace((unsigned char)v[p]))
2454 			p++;
2455 
2456 	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2457 		*res = 10 * *res + v[p] - '0';
2458 	if (p == *pos + n)
2459 		return 0;
2460 
2461 	if (n)
2462 		*res = -*res;
2463 
2464 	/* Each number may be followed by one optional scaling unit. */
2465 
2466 	switch (v[p]) {
2467 	case 'f':
2468 		scaled = *res * 65536;
2469 		break;
2470 	case 'i':
2471 		scaled = *res * 240;
2472 		break;
2473 	case 'c':
2474 		scaled = *res * 240 / 2.54;
2475 		break;
2476 	case 'v':
2477 	case 'P':
2478 		scaled = *res * 40;
2479 		break;
2480 	case 'm':
2481 	case 'n':
2482 		scaled = *res * 24;
2483 		break;
2484 	case 'p':
2485 		scaled = *res * 10 / 3;
2486 		break;
2487 	case 'u':
2488 		scaled = *res;
2489 		break;
2490 	case 'M':
2491 		scaled = *res * 6 / 25;
2492 		break;
2493 	default:
2494 		scaled = *res;
2495 		p--;
2496 		break;
2497 	}
2498 	if (flags & ROFFNUM_SCALE)
2499 		*res = scaled;
2500 
2501 	*pos = p + 1;
2502 	return 1;
2503 }
2504 
2505 /*
2506  * Evaluate a string comparison condition.
2507  * The first character is the delimiter.
2508  * Succeed if the string up to its second occurrence
2509  * matches the string up to its third occurence.
2510  * Advance the cursor after the third occurrence
2511  * or lacking that, to the end of the line.
2512  */
2513 static int
2514 roff_evalstrcond(const char *v, int *pos)
2515 {
2516 	const char	*s1, *s2, *s3;
2517 	int		 match;
2518 
2519 	match = 0;
2520 	s1 = v + *pos;		/* initial delimiter */
2521 	s2 = s1 + 1;		/* for scanning the first string */
2522 	s3 = strchr(s2, *s1);	/* for scanning the second string */
2523 
2524 	if (NULL == s3)		/* found no middle delimiter */
2525 		goto out;
2526 
2527 	while ('\0' != *++s3) {
2528 		if (*s2 != *s3) {  /* mismatch */
2529 			s3 = strchr(s3, *s1);
2530 			break;
2531 		}
2532 		if (*s3 == *s1) {  /* found the final delimiter */
2533 			match = 1;
2534 			break;
2535 		}
2536 		s2++;
2537 	}
2538 
2539 out:
2540 	if (NULL == s3)
2541 		s3 = strchr(s2, '\0');
2542 	else if (*s3 != '\0')
2543 		s3++;
2544 	*pos = s3 - v;
2545 	return match;
2546 }
2547 
2548 /*
2549  * Evaluate an optionally negated single character, numerical,
2550  * or string condition.
2551  */
2552 static int
2553 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2554 {
2555 	const char	*start, *end;
2556 	char		*cp, *name;
2557 	size_t		 sz;
2558 	int		 deftype, len, number, savepos, istrue, wanttrue;
2559 
2560 	if ('!' == v[*pos]) {
2561 		wanttrue = 0;
2562 		(*pos)++;
2563 	} else
2564 		wanttrue = 1;
2565 
2566 	switch (v[*pos]) {
2567 	case '\0':
2568 		return 0;
2569 	case 'n':
2570 	case 'o':
2571 		(*pos)++;
2572 		return wanttrue;
2573 	case 'e':
2574 	case 't':
2575 	case 'v':
2576 		(*pos)++;
2577 		return !wanttrue;
2578 	case 'c':
2579 		do {
2580 			(*pos)++;
2581 		} while (v[*pos] == ' ');
2582 
2583 		/*
2584 		 * Quirk for groff compatibility:
2585 		 * The horizontal tab is neither available nor unavailable.
2586 		 */
2587 
2588 		if (v[*pos] == '\t') {
2589 			(*pos)++;
2590 			return 0;
2591 		}
2592 
2593 		/* Printable ASCII characters are available. */
2594 
2595 		if (v[*pos] != '\\') {
2596 			(*pos)++;
2597 			return wanttrue;
2598 		}
2599 
2600 		end = v + ++*pos;
2601 		switch (mandoc_escape(&end, &start, &len)) {
2602 		case ESCAPE_SPECIAL:
2603 			istrue = mchars_spec2cp(start, len) != -1;
2604 			break;
2605 		case ESCAPE_UNICODE:
2606 			istrue = 1;
2607 			break;
2608 		case ESCAPE_NUMBERED:
2609 			istrue = mchars_num2char(start, len) != -1;
2610 			break;
2611 		default:
2612 			istrue = !wanttrue;
2613 			break;
2614 		}
2615 		*pos = end - v;
2616 		return istrue == wanttrue;
2617 	case 'd':
2618 	case 'r':
2619 		cp = v + *pos + 1;
2620 		while (*cp == ' ')
2621 			cp++;
2622 		name = cp;
2623 		sz = roff_getname(r, &cp, ln, cp - v);
2624 		if (sz == 0)
2625 			istrue = 0;
2626 		else if (v[*pos] == 'r')
2627 			istrue = roff_hasregn(r, name, sz);
2628 		else {
2629 			deftype = ROFFDEF_ANY;
2630 		        roff_getstrn(r, name, sz, &deftype);
2631 			istrue = !!deftype;
2632 		}
2633 		*pos = (name + sz) - v;
2634 		return istrue == wanttrue;
2635 	default:
2636 		break;
2637 	}
2638 
2639 	savepos = *pos;
2640 	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2641 		return (number > 0) == wanttrue;
2642 	else if (*pos == savepos)
2643 		return roff_evalstrcond(v, pos) == wanttrue;
2644 	else
2645 		return 0;
2646 }
2647 
2648 static int
2649 roff_line_ignore(ROFF_ARGS)
2650 {
2651 
2652 	return ROFF_IGN;
2653 }
2654 
2655 static int
2656 roff_insec(ROFF_ARGS)
2657 {
2658 
2659 	mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2660 	return ROFF_IGN;
2661 }
2662 
2663 static int
2664 roff_unsupp(ROFF_ARGS)
2665 {
2666 
2667 	mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2668 	return ROFF_IGN;
2669 }
2670 
2671 static int
2672 roff_cond(ROFF_ARGS)
2673 {
2674 	int	 irc;
2675 
2676 	roffnode_push(r, tok, NULL, ln, ppos);
2677 
2678 	/*
2679 	 * An `.el' has no conditional body: it will consume the value
2680 	 * of the current rstack entry set in prior `ie' calls or
2681 	 * defaults to DENY.
2682 	 *
2683 	 * If we're not an `el', however, then evaluate the conditional.
2684 	 */
2685 
2686 	r->last->rule = tok == ROFF_el ?
2687 	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2688 	    roff_evalcond(r, ln, buf->buf, &pos);
2689 
2690 	/*
2691 	 * An if-else will put the NEGATION of the current evaluated
2692 	 * conditional into the stack of rules.
2693 	 */
2694 
2695 	if (tok == ROFF_ie) {
2696 		if (r->rstackpos + 1 == r->rstacksz) {
2697 			r->rstacksz += 16;
2698 			r->rstack = mandoc_reallocarray(r->rstack,
2699 			    r->rstacksz, sizeof(int));
2700 		}
2701 		r->rstack[++r->rstackpos] = !r->last->rule;
2702 	}
2703 
2704 	/* If the parent has false as its rule, then so do we. */
2705 
2706 	if (r->last->parent && !r->last->parent->rule)
2707 		r->last->rule = 0;
2708 
2709 	/*
2710 	 * Determine scope.
2711 	 * If there is nothing on the line after the conditional,
2712 	 * not even whitespace, use next-line scope.
2713 	 * Except that .while does not support next-line scope.
2714 	 */
2715 
2716 	if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2717 		r->last->endspan = 2;
2718 		goto out;
2719 	}
2720 
2721 	while (buf->buf[pos] == ' ')
2722 		pos++;
2723 
2724 	/* An opening brace requests multiline scope. */
2725 
2726 	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2727 		r->last->endspan = -1;
2728 		pos += 2;
2729 		while (buf->buf[pos] == ' ')
2730 			pos++;
2731 		goto out;
2732 	}
2733 
2734 	/*
2735 	 * Anything else following the conditional causes
2736 	 * single-line scope.  Warn if the scope contains
2737 	 * nothing but trailing whitespace.
2738 	 */
2739 
2740 	if (buf->buf[pos] == '\0')
2741 		mandoc_msg(MANDOCERR_COND_EMPTY,
2742 		    ln, ppos, "%s", roff_name[tok]);
2743 
2744 	r->last->endspan = 1;
2745 
2746 out:
2747 	*offs = pos;
2748 	irc = ROFF_RERUN;
2749 	if (tok == ROFF_while)
2750 		irc |= ROFF_WHILE;
2751 	return irc;
2752 }
2753 
2754 static int
2755 roff_ds(ROFF_ARGS)
2756 {
2757 	char		*string;
2758 	const char	*name;
2759 	size_t		 namesz;
2760 
2761 	/* Ignore groff compatibility mode for now. */
2762 
2763 	if (tok == ROFF_ds1)
2764 		tok = ROFF_ds;
2765 	else if (tok == ROFF_as1)
2766 		tok = ROFF_as;
2767 
2768 	/*
2769 	 * The first word is the name of the string.
2770 	 * If it is empty or terminated by an escape sequence,
2771 	 * abort the `ds' request without defining anything.
2772 	 */
2773 
2774 	name = string = buf->buf + pos;
2775 	if (*name == '\0')
2776 		return ROFF_IGN;
2777 
2778 	namesz = roff_getname(r, &string, ln, pos);
2779 	switch (name[namesz]) {
2780 	case '\\':
2781 		return ROFF_IGN;
2782 	case '\t':
2783 		string = buf->buf + pos + namesz;
2784 		break;
2785 	default:
2786 		break;
2787 	}
2788 
2789 	/* Read past the initial double-quote, if any. */
2790 	if (*string == '"')
2791 		string++;
2792 
2793 	/* The rest is the value. */
2794 	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2795 	    ROFF_as == tok);
2796 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2797 	return ROFF_IGN;
2798 }
2799 
2800 /*
2801  * Parse a single operator, one or two characters long.
2802  * If the operator is recognized, return success and advance the
2803  * parse point, else return failure and let the parse point unchanged.
2804  */
2805 static int
2806 roff_getop(const char *v, int *pos, char *res)
2807 {
2808 
2809 	*res = v[*pos];
2810 
2811 	switch (*res) {
2812 	case '+':
2813 	case '-':
2814 	case '*':
2815 	case '/':
2816 	case '%':
2817 	case '&':
2818 	case ':':
2819 		break;
2820 	case '<':
2821 		switch (v[*pos + 1]) {
2822 		case '=':
2823 			*res = 'l';
2824 			(*pos)++;
2825 			break;
2826 		case '>':
2827 			*res = '!';
2828 			(*pos)++;
2829 			break;
2830 		case '?':
2831 			*res = 'i';
2832 			(*pos)++;
2833 			break;
2834 		default:
2835 			break;
2836 		}
2837 		break;
2838 	case '>':
2839 		switch (v[*pos + 1]) {
2840 		case '=':
2841 			*res = 'g';
2842 			(*pos)++;
2843 			break;
2844 		case '?':
2845 			*res = 'a';
2846 			(*pos)++;
2847 			break;
2848 		default:
2849 			break;
2850 		}
2851 		break;
2852 	case '=':
2853 		if ('=' == v[*pos + 1])
2854 			(*pos)++;
2855 		break;
2856 	default:
2857 		return 0;
2858 	}
2859 	(*pos)++;
2860 
2861 	return *res;
2862 }
2863 
2864 /*
2865  * Evaluate either a parenthesized numeric expression
2866  * or a single signed integer number.
2867  */
2868 static int
2869 roff_evalpar(struct roff *r, int ln,
2870 	const char *v, int *pos, int *res, int flags)
2871 {
2872 
2873 	if ('(' != v[*pos])
2874 		return roff_getnum(v, pos, res, flags);
2875 
2876 	(*pos)++;
2877 	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2878 		return 0;
2879 
2880 	/*
2881 	 * Omission of the closing parenthesis
2882 	 * is an error in validation mode,
2883 	 * but ignored in evaluation mode.
2884 	 */
2885 
2886 	if (')' == v[*pos])
2887 		(*pos)++;
2888 	else if (NULL == res)
2889 		return 0;
2890 
2891 	return 1;
2892 }
2893 
2894 /*
2895  * Evaluate a complete numeric expression.
2896  * Proceed left to right, there is no concept of precedence.
2897  */
2898 static int
2899 roff_evalnum(struct roff *r, int ln, const char *v,
2900 	int *pos, int *res, int flags)
2901 {
2902 	int		 mypos, operand2;
2903 	char		 operator;
2904 
2905 	if (NULL == pos) {
2906 		mypos = 0;
2907 		pos = &mypos;
2908 	}
2909 
2910 	if (flags & ROFFNUM_WHITE)
2911 		while (isspace((unsigned char)v[*pos]))
2912 			(*pos)++;
2913 
2914 	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2915 		return 0;
2916 
2917 	while (1) {
2918 		if (flags & ROFFNUM_WHITE)
2919 			while (isspace((unsigned char)v[*pos]))
2920 				(*pos)++;
2921 
2922 		if ( ! roff_getop(v, pos, &operator))
2923 			break;
2924 
2925 		if (flags & ROFFNUM_WHITE)
2926 			while (isspace((unsigned char)v[*pos]))
2927 				(*pos)++;
2928 
2929 		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2930 			return 0;
2931 
2932 		if (flags & ROFFNUM_WHITE)
2933 			while (isspace((unsigned char)v[*pos]))
2934 				(*pos)++;
2935 
2936 		if (NULL == res)
2937 			continue;
2938 
2939 		switch (operator) {
2940 		case '+':
2941 			*res += operand2;
2942 			break;
2943 		case '-':
2944 			*res -= operand2;
2945 			break;
2946 		case '*':
2947 			*res *= operand2;
2948 			break;
2949 		case '/':
2950 			if (operand2 == 0) {
2951 				mandoc_msg(MANDOCERR_DIVZERO,
2952 					ln, *pos, "%s", v);
2953 				*res = 0;
2954 				break;
2955 			}
2956 			*res /= operand2;
2957 			break;
2958 		case '%':
2959 			if (operand2 == 0) {
2960 				mandoc_msg(MANDOCERR_DIVZERO,
2961 					ln, *pos, "%s", v);
2962 				*res = 0;
2963 				break;
2964 			}
2965 			*res %= operand2;
2966 			break;
2967 		case '<':
2968 			*res = *res < operand2;
2969 			break;
2970 		case '>':
2971 			*res = *res > operand2;
2972 			break;
2973 		case 'l':
2974 			*res = *res <= operand2;
2975 			break;
2976 		case 'g':
2977 			*res = *res >= operand2;
2978 			break;
2979 		case '=':
2980 			*res = *res == operand2;
2981 			break;
2982 		case '!':
2983 			*res = *res != operand2;
2984 			break;
2985 		case '&':
2986 			*res = *res && operand2;
2987 			break;
2988 		case ':':
2989 			*res = *res || operand2;
2990 			break;
2991 		case 'i':
2992 			if (operand2 < *res)
2993 				*res = operand2;
2994 			break;
2995 		case 'a':
2996 			if (operand2 > *res)
2997 				*res = operand2;
2998 			break;
2999 		default:
3000 			abort();
3001 		}
3002 	}
3003 	return 1;
3004 }
3005 
3006 /* --- register management ------------------------------------------------ */
3007 
3008 void
3009 roff_setreg(struct roff *r, const char *name, int val, char sign)
3010 {
3011 	roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3012 }
3013 
3014 static void
3015 roff_setregn(struct roff *r, const char *name, size_t len,
3016     int val, char sign, int step)
3017 {
3018 	struct roffreg	*reg;
3019 
3020 	/* Search for an existing register with the same name. */
3021 	reg = r->regtab;
3022 
3023 	while (reg != NULL && (reg->key.sz != len ||
3024 	    strncmp(reg->key.p, name, len) != 0))
3025 		reg = reg->next;
3026 
3027 	if (NULL == reg) {
3028 		/* Create a new register. */
3029 		reg = mandoc_malloc(sizeof(struct roffreg));
3030 		reg->key.p = mandoc_strndup(name, len);
3031 		reg->key.sz = len;
3032 		reg->val = 0;
3033 		reg->step = 0;
3034 		reg->next = r->regtab;
3035 		r->regtab = reg;
3036 	}
3037 
3038 	if ('+' == sign)
3039 		reg->val += val;
3040 	else if ('-' == sign)
3041 		reg->val -= val;
3042 	else
3043 		reg->val = val;
3044 	if (step != INT_MIN)
3045 		reg->step = step;
3046 }
3047 
3048 /*
3049  * Handle some predefined read-only number registers.
3050  * For now, return -1 if the requested register is not predefined;
3051  * in case a predefined read-only register having the value -1
3052  * were to turn up, another special value would have to be chosen.
3053  */
3054 static int
3055 roff_getregro(const struct roff *r, const char *name)
3056 {
3057 
3058 	switch (*name) {
3059 	case '$':  /* Number of arguments of the last macro evaluated. */
3060 		return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3061 	case 'A':  /* ASCII approximation mode is always off. */
3062 		return 0;
3063 	case 'g':  /* Groff compatibility mode is always on. */
3064 		return 1;
3065 	case 'H':  /* Fixed horizontal resolution. */
3066 		return 24;
3067 	case 'j':  /* Always adjust left margin only. */
3068 		return 0;
3069 	case 'T':  /* Some output device is always defined. */
3070 		return 1;
3071 	case 'V':  /* Fixed vertical resolution. */
3072 		return 40;
3073 	default:
3074 		return -1;
3075 	}
3076 }
3077 
3078 int
3079 roff_getreg(struct roff *r, const char *name)
3080 {
3081 	return roff_getregn(r, name, strlen(name), '\0');
3082 }
3083 
3084 static int
3085 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3086 {
3087 	struct roffreg	*reg;
3088 	int		 val;
3089 
3090 	if ('.' == name[0] && 2 == len) {
3091 		val = roff_getregro(r, name + 1);
3092 		if (-1 != val)
3093 			return val;
3094 	}
3095 
3096 	for (reg = r->regtab; reg; reg = reg->next) {
3097 		if (len == reg->key.sz &&
3098 		    0 == strncmp(name, reg->key.p, len)) {
3099 			switch (sign) {
3100 			case '+':
3101 				reg->val += reg->step;
3102 				break;
3103 			case '-':
3104 				reg->val -= reg->step;
3105 				break;
3106 			default:
3107 				break;
3108 			}
3109 			return reg->val;
3110 		}
3111 	}
3112 
3113 	roff_setregn(r, name, len, 0, '\0', INT_MIN);
3114 	return 0;
3115 }
3116 
3117 static int
3118 roff_hasregn(const struct roff *r, const char *name, size_t len)
3119 {
3120 	struct roffreg	*reg;
3121 	int		 val;
3122 
3123 	if ('.' == name[0] && 2 == len) {
3124 		val = roff_getregro(r, name + 1);
3125 		if (-1 != val)
3126 			return 1;
3127 	}
3128 
3129 	for (reg = r->regtab; reg; reg = reg->next)
3130 		if (len == reg->key.sz &&
3131 		    0 == strncmp(name, reg->key.p, len))
3132 			return 1;
3133 
3134 	return 0;
3135 }
3136 
3137 static void
3138 roff_freereg(struct roffreg *reg)
3139 {
3140 	struct roffreg	*old_reg;
3141 
3142 	while (NULL != reg) {
3143 		free(reg->key.p);
3144 		old_reg = reg;
3145 		reg = reg->next;
3146 		free(old_reg);
3147 	}
3148 }
3149 
3150 static int
3151 roff_nr(ROFF_ARGS)
3152 {
3153 	char		*key, *val, *step;
3154 	size_t		 keysz;
3155 	int		 iv, is, len;
3156 	char		 sign;
3157 
3158 	key = val = buf->buf + pos;
3159 	if (*key == '\0')
3160 		return ROFF_IGN;
3161 
3162 	keysz = roff_getname(r, &val, ln, pos);
3163 	if (key[keysz] == '\\' || key[keysz] == '\t')
3164 		return ROFF_IGN;
3165 
3166 	sign = *val;
3167 	if (sign == '+' || sign == '-')
3168 		val++;
3169 
3170 	len = 0;
3171 	if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3172 		return ROFF_IGN;
3173 
3174 	step = val + len;
3175 	while (isspace((unsigned char)*step))
3176 		step++;
3177 	if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3178 		is = INT_MIN;
3179 
3180 	roff_setregn(r, key, keysz, iv, sign, is);
3181 	return ROFF_IGN;
3182 }
3183 
3184 static int
3185 roff_rr(ROFF_ARGS)
3186 {
3187 	struct roffreg	*reg, **prev;
3188 	char		*name, *cp;
3189 	size_t		 namesz;
3190 
3191 	name = cp = buf->buf + pos;
3192 	if (*name == '\0')
3193 		return ROFF_IGN;
3194 	namesz = roff_getname(r, &cp, ln, pos);
3195 	name[namesz] = '\0';
3196 
3197 	prev = &r->regtab;
3198 	while (1) {
3199 		reg = *prev;
3200 		if (reg == NULL || !strcmp(name, reg->key.p))
3201 			break;
3202 		prev = &reg->next;
3203 	}
3204 	if (reg != NULL) {
3205 		*prev = reg->next;
3206 		free(reg->key.p);
3207 		free(reg);
3208 	}
3209 	return ROFF_IGN;
3210 }
3211 
3212 /* --- handler functions for roff requests -------------------------------- */
3213 
3214 static int
3215 roff_rm(ROFF_ARGS)
3216 {
3217 	const char	 *name;
3218 	char		 *cp;
3219 	size_t		  namesz;
3220 
3221 	cp = buf->buf + pos;
3222 	while (*cp != '\0') {
3223 		name = cp;
3224 		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3225 		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3226 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3227 		if (name[namesz] == '\\' || name[namesz] == '\t')
3228 			break;
3229 	}
3230 	return ROFF_IGN;
3231 }
3232 
3233 static int
3234 roff_it(ROFF_ARGS)
3235 {
3236 	int		 iv;
3237 
3238 	/* Parse the number of lines. */
3239 
3240 	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3241 		mandoc_msg(MANDOCERR_IT_NONUM,
3242 		    ln, ppos, "%s", buf->buf + 1);
3243 		return ROFF_IGN;
3244 	}
3245 
3246 	while (isspace((unsigned char)buf->buf[pos]))
3247 		pos++;
3248 
3249 	/*
3250 	 * Arm the input line trap.
3251 	 * Special-casing "an-trap" is an ugly workaround to cope
3252 	 * with DocBook stupidly fiddling with man(7) internals.
3253 	 */
3254 
3255 	roffit_lines = iv;
3256 	roffit_macro = mandoc_strdup(iv != 1 ||
3257 	    strcmp(buf->buf + pos, "an-trap") ?
3258 	    buf->buf + pos : "br");
3259 	return ROFF_IGN;
3260 }
3261 
3262 static int
3263 roff_Dd(ROFF_ARGS)
3264 {
3265 	int		 mask;
3266 	enum roff_tok	 t, te;
3267 
3268 	switch (tok) {
3269 	case ROFF_Dd:
3270 		tok = MDOC_Dd;
3271 		te = MDOC_MAX;
3272 		if (r->format == 0)
3273 			r->format = MPARSE_MDOC;
3274 		mask = MPARSE_MDOC | MPARSE_QUICK;
3275 		break;
3276 	case ROFF_TH:
3277 		tok = MAN_TH;
3278 		te = MAN_MAX;
3279 		if (r->format == 0)
3280 			r->format = MPARSE_MAN;
3281 		mask = MPARSE_QUICK;
3282 		break;
3283 	default:
3284 		abort();
3285 	}
3286 	if ((r->options & mask) == 0)
3287 		for (t = tok; t < te; t++)
3288 			roff_setstr(r, roff_name[t], NULL, 0);
3289 	return ROFF_CONT;
3290 }
3291 
3292 static int
3293 roff_TE(ROFF_ARGS)
3294 {
3295 	r->man->flags &= ~ROFF_NONOFILL;
3296 	if (r->tbl == NULL) {
3297 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3298 		return ROFF_IGN;
3299 	}
3300 	if (tbl_end(r->tbl, 0) == 0) {
3301 		r->tbl = NULL;
3302 		free(buf->buf);
3303 		buf->buf = mandoc_strdup(".sp");
3304 		buf->sz = 4;
3305 		*offs = 0;
3306 		return ROFF_REPARSE;
3307 	}
3308 	r->tbl = NULL;
3309 	return ROFF_IGN;
3310 }
3311 
3312 static int
3313 roff_T_(ROFF_ARGS)
3314 {
3315 
3316 	if (NULL == r->tbl)
3317 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3318 	else
3319 		tbl_restart(ln, ppos, r->tbl);
3320 
3321 	return ROFF_IGN;
3322 }
3323 
3324 /*
3325  * Handle in-line equation delimiters.
3326  */
3327 static int
3328 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3329 {
3330 	char		*cp1, *cp2;
3331 	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3332 
3333 	/*
3334 	 * Outside equations, look for an opening delimiter.
3335 	 * If we are inside an equation, we already know it is
3336 	 * in-line, or this function wouldn't have been called;
3337 	 * so look for a closing delimiter.
3338 	 */
3339 
3340 	cp1 = buf->buf + pos;
3341 	cp2 = strchr(cp1, r->eqn == NULL ?
3342 	    r->last_eqn->odelim : r->last_eqn->cdelim);
3343 	if (cp2 == NULL)
3344 		return ROFF_CONT;
3345 
3346 	*cp2++ = '\0';
3347 	bef_pr = bef_nl = aft_nl = aft_pr = "";
3348 
3349 	/* Handle preceding text, protecting whitespace. */
3350 
3351 	if (*buf->buf != '\0') {
3352 		if (r->eqn == NULL)
3353 			bef_pr = "\\&";
3354 		bef_nl = "\n";
3355 	}
3356 
3357 	/*
3358 	 * Prepare replacing the delimiter with an equation macro
3359 	 * and drop leading white space from the equation.
3360 	 */
3361 
3362 	if (r->eqn == NULL) {
3363 		while (*cp2 == ' ')
3364 			cp2++;
3365 		mac = ".EQ";
3366 	} else
3367 		mac = ".EN";
3368 
3369 	/* Handle following text, protecting whitespace. */
3370 
3371 	if (*cp2 != '\0') {
3372 		aft_nl = "\n";
3373 		if (r->eqn != NULL)
3374 			aft_pr = "\\&";
3375 	}
3376 
3377 	/* Do the actual replacement. */
3378 
3379 	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3380 	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3381 	free(buf->buf);
3382 	buf->buf = cp1;
3383 
3384 	/* Toggle the in-line state of the eqn subsystem. */
3385 
3386 	r->eqn_inline = r->eqn == NULL;
3387 	return ROFF_REPARSE;
3388 }
3389 
3390 static int
3391 roff_EQ(ROFF_ARGS)
3392 {
3393 	struct roff_node	*n;
3394 
3395 	if (r->man->meta.macroset == MACROSET_MAN)
3396 		man_breakscope(r->man, ROFF_EQ);
3397 	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3398 	if (ln > r->man->last->line)
3399 		n->flags |= NODE_LINE;
3400 	n->eqn = eqn_box_new();
3401 	roff_node_append(r->man, n);
3402 	r->man->next = ROFF_NEXT_SIBLING;
3403 
3404 	assert(r->eqn == NULL);
3405 	if (r->last_eqn == NULL)
3406 		r->last_eqn = eqn_alloc();
3407 	else
3408 		eqn_reset(r->last_eqn);
3409 	r->eqn = r->last_eqn;
3410 	r->eqn->node = n;
3411 
3412 	if (buf->buf[pos] != '\0')
3413 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3414 		    ".EQ %s", buf->buf + pos);
3415 
3416 	return ROFF_IGN;
3417 }
3418 
3419 static int
3420 roff_EN(ROFF_ARGS)
3421 {
3422 	if (r->eqn != NULL) {
3423 		eqn_parse(r->eqn);
3424 		r->eqn = NULL;
3425 	} else
3426 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3427 	if (buf->buf[pos] != '\0')
3428 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3429 		    "EN %s", buf->buf + pos);
3430 	return ROFF_IGN;
3431 }
3432 
3433 static int
3434 roff_TS(ROFF_ARGS)
3435 {
3436 	if (r->tbl != NULL) {
3437 		mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3438 		tbl_end(r->tbl, 0);
3439 	}
3440 	r->man->flags |= ROFF_NONOFILL;
3441 	r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3442 	if (r->last_tbl == NULL)
3443 		r->first_tbl = r->tbl;
3444 	r->last_tbl = r->tbl;
3445 	return ROFF_IGN;
3446 }
3447 
3448 static int
3449 roff_noarg(ROFF_ARGS)
3450 {
3451 	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3452 		man_breakscope(r->man, tok);
3453 	if (tok == ROFF_brp)
3454 		tok = ROFF_br;
3455 	roff_elem_alloc(r->man, ln, ppos, tok);
3456 	if (buf->buf[pos] != '\0')
3457 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3458 		   "%s %s", roff_name[tok], buf->buf + pos);
3459 	if (tok == ROFF_nf)
3460 		r->man->flags |= ROFF_NOFILL;
3461 	else if (tok == ROFF_fi)
3462 		r->man->flags &= ~ROFF_NOFILL;
3463 	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3464 	r->man->next = ROFF_NEXT_SIBLING;
3465 	return ROFF_IGN;
3466 }
3467 
3468 static int
3469 roff_onearg(ROFF_ARGS)
3470 {
3471 	struct roff_node	*n;
3472 	char			*cp;
3473 	int			 npos;
3474 
3475 	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3476 	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3477 	     tok == ROFF_ti))
3478 		man_breakscope(r->man, tok);
3479 
3480 	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3481 		r->man->last = roffce_node;
3482 		r->man->next = ROFF_NEXT_SIBLING;
3483 	}
3484 
3485 	roff_elem_alloc(r->man, ln, ppos, tok);
3486 	n = r->man->last;
3487 
3488 	cp = buf->buf + pos;
3489 	if (*cp != '\0') {
3490 		while (*cp != '\0' && *cp != ' ')
3491 			cp++;
3492 		while (*cp == ' ')
3493 			*cp++ = '\0';
3494 		if (*cp != '\0')
3495 			mandoc_msg(MANDOCERR_ARG_EXCESS,
3496 			    ln, (int)(cp - buf->buf),
3497 			    "%s ... %s", roff_name[tok], cp);
3498 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3499 	}
3500 
3501 	if (tok == ROFF_ce || tok == ROFF_rj) {
3502 		if (r->man->last->type == ROFFT_ELEM) {
3503 			roff_word_alloc(r->man, ln, pos, "1");
3504 			r->man->last->flags |= NODE_NOSRC;
3505 		}
3506 		npos = 0;
3507 		if (roff_evalnum(r, ln, r->man->last->string, &npos,
3508 		    &roffce_lines, 0) == 0) {
3509 			mandoc_msg(MANDOCERR_CE_NONUM,
3510 			    ln, pos, "ce %s", buf->buf + pos);
3511 			roffce_lines = 1;
3512 		}
3513 		if (roffce_lines < 1) {
3514 			r->man->last = r->man->last->parent;
3515 			roffce_node = NULL;
3516 			roffce_lines = 0;
3517 		} else
3518 			roffce_node = r->man->last->parent;
3519 	} else {
3520 		n->flags |= NODE_VALID | NODE_ENDED;
3521 		r->man->last = n;
3522 	}
3523 	n->flags |= NODE_LINE;
3524 	r->man->next = ROFF_NEXT_SIBLING;
3525 	return ROFF_IGN;
3526 }
3527 
3528 static int
3529 roff_manyarg(ROFF_ARGS)
3530 {
3531 	struct roff_node	*n;
3532 	char			*sp, *ep;
3533 
3534 	roff_elem_alloc(r->man, ln, ppos, tok);
3535 	n = r->man->last;
3536 
3537 	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3538 		while (*ep != '\0' && *ep != ' ')
3539 			ep++;
3540 		while (*ep == ' ')
3541 			*ep++ = '\0';
3542 		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3543 	}
3544 
3545 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3546 	r->man->last = n;
3547 	r->man->next = ROFF_NEXT_SIBLING;
3548 	return ROFF_IGN;
3549 }
3550 
3551 static int
3552 roff_als(ROFF_ARGS)
3553 {
3554 	char		*oldn, *newn, *end, *value;
3555 	size_t		 oldsz, newsz, valsz;
3556 
3557 	newn = oldn = buf->buf + pos;
3558 	if (*newn == '\0')
3559 		return ROFF_IGN;
3560 
3561 	newsz = roff_getname(r, &oldn, ln, pos);
3562 	if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3563 		return ROFF_IGN;
3564 
3565 	end = oldn;
3566 	oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3567 	if (oldsz == 0)
3568 		return ROFF_IGN;
3569 
3570 	valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3571 	    (int)oldsz, oldn);
3572 	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3573 	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3574 	free(value);
3575 	return ROFF_IGN;
3576 }
3577 
3578 /*
3579  * The .break request only makes sense inside conditionals,
3580  * and that case is already handled in roff_cond_sub().
3581  */
3582 static int
3583 roff_break(ROFF_ARGS)
3584 {
3585 	mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3586 	return ROFF_IGN;
3587 }
3588 
3589 static int
3590 roff_cc(ROFF_ARGS)
3591 {
3592 	const char	*p;
3593 
3594 	p = buf->buf + pos;
3595 
3596 	if (*p == '\0' || (r->control = *p++) == '.')
3597 		r->control = '\0';
3598 
3599 	if (*p != '\0')
3600 		mandoc_msg(MANDOCERR_ARG_EXCESS,
3601 		    ln, p - buf->buf, "cc ... %s", p);
3602 
3603 	return ROFF_IGN;
3604 }
3605 
3606 static int
3607 roff_char(ROFF_ARGS)
3608 {
3609 	const char	*p, *kp, *vp;
3610 	size_t		 ksz, vsz;
3611 	int		 font;
3612 
3613 	/* Parse the character to be replaced. */
3614 
3615 	kp = buf->buf + pos;
3616 	p = kp + 1;
3617 	if (*kp == '\0' || (*kp == '\\' &&
3618 	     mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3619 	    (*p != ' ' && *p != '\0')) {
3620 		mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3621 		return ROFF_IGN;
3622 	}
3623 	ksz = p - kp;
3624 	while (*p == ' ')
3625 		p++;
3626 
3627 	/*
3628 	 * If the replacement string contains a font escape sequence,
3629 	 * we have to restore the font at the end.
3630 	 */
3631 
3632 	vp = p;
3633 	vsz = strlen(p);
3634 	font = 0;
3635 	while (*p != '\0') {
3636 		if (*p++ != '\\')
3637 			continue;
3638 		switch (mandoc_escape(&p, NULL, NULL)) {
3639 		case ESCAPE_FONT:
3640 		case ESCAPE_FONTROMAN:
3641 		case ESCAPE_FONTITALIC:
3642 		case ESCAPE_FONTBOLD:
3643 		case ESCAPE_FONTBI:
3644 		case ESCAPE_FONTCW:
3645 		case ESCAPE_FONTPREV:
3646 			font++;
3647 			break;
3648 		default:
3649 			break;
3650 		}
3651 	}
3652 	if (font > 1)
3653 		mandoc_msg(MANDOCERR_CHAR_FONT,
3654 		    ln, (int)(vp - buf->buf), "%s", vp);
3655 
3656 	/*
3657 	 * Approximate the effect of .char using the .tr tables.
3658 	 * XXX In groff, .char and .tr interact differently.
3659 	 */
3660 
3661 	if (ksz == 1) {
3662 		if (r->xtab == NULL)
3663 			r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3664 		assert((unsigned int)*kp < 128);
3665 		free(r->xtab[(int)*kp].p);
3666 		r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3667 		    "%s%s", vp, font ? "\fP" : "");
3668 	} else {
3669 		roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3670 		if (font)
3671 			roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3672 	}
3673 	return ROFF_IGN;
3674 }
3675 
3676 static int
3677 roff_ec(ROFF_ARGS)
3678 {
3679 	const char	*p;
3680 
3681 	p = buf->buf + pos;
3682 	if (*p == '\0')
3683 		r->escape = '\\';
3684 	else {
3685 		r->escape = *p;
3686 		if (*++p != '\0')
3687 			mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3688 			    (int)(p - buf->buf), "ec ... %s", p);
3689 	}
3690 	return ROFF_IGN;
3691 }
3692 
3693 static int
3694 roff_eo(ROFF_ARGS)
3695 {
3696 	r->escape = '\0';
3697 	if (buf->buf[pos] != '\0')
3698 		mandoc_msg(MANDOCERR_ARG_SKIP,
3699 		    ln, pos, "eo %s", buf->buf + pos);
3700 	return ROFF_IGN;
3701 }
3702 
3703 static int
3704 roff_nop(ROFF_ARGS)
3705 {
3706 	while (buf->buf[pos] == ' ')
3707 		pos++;
3708 	*offs = pos;
3709 	return ROFF_RERUN;
3710 }
3711 
3712 static int
3713 roff_tr(ROFF_ARGS)
3714 {
3715 	const char	*p, *first, *second;
3716 	size_t		 fsz, ssz;
3717 	enum mandoc_esc	 esc;
3718 
3719 	p = buf->buf + pos;
3720 
3721 	if (*p == '\0') {
3722 		mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3723 		return ROFF_IGN;
3724 	}
3725 
3726 	while (*p != '\0') {
3727 		fsz = ssz = 1;
3728 
3729 		first = p++;
3730 		if (*first == '\\') {
3731 			esc = mandoc_escape(&p, NULL, NULL);
3732 			if (esc == ESCAPE_ERROR) {
3733 				mandoc_msg(MANDOCERR_ESC_BAD, ln,
3734 				    (int)(p - buf->buf), "%s", first);
3735 				return ROFF_IGN;
3736 			}
3737 			fsz = (size_t)(p - first);
3738 		}
3739 
3740 		second = p++;
3741 		if (*second == '\\') {
3742 			esc = mandoc_escape(&p, NULL, NULL);
3743 			if (esc == ESCAPE_ERROR) {
3744 				mandoc_msg(MANDOCERR_ESC_BAD, ln,
3745 				    (int)(p - buf->buf), "%s", second);
3746 				return ROFF_IGN;
3747 			}
3748 			ssz = (size_t)(p - second);
3749 		} else if (*second == '\0') {
3750 			mandoc_msg(MANDOCERR_TR_ODD, ln,
3751 			    (int)(first - buf->buf), "tr %s", first);
3752 			second = " ";
3753 			p--;
3754 		}
3755 
3756 		if (fsz > 1) {
3757 			roff_setstrn(&r->xmbtab, first, fsz,
3758 			    second, ssz, 0);
3759 			continue;
3760 		}
3761 
3762 		if (r->xtab == NULL)
3763 			r->xtab = mandoc_calloc(128,
3764 			    sizeof(struct roffstr));
3765 
3766 		free(r->xtab[(int)*first].p);
3767 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3768 		r->xtab[(int)*first].sz = ssz;
3769 	}
3770 
3771 	return ROFF_IGN;
3772 }
3773 
3774 /*
3775  * Implementation of the .return request.
3776  * There is no need to call roff_userret() from here.
3777  * The read module will call that after rewinding the reader stack
3778  * to the place from where the current macro was called.
3779  */
3780 static int
3781 roff_return(ROFF_ARGS)
3782 {
3783 	if (r->mstackpos >= 0)
3784 		return ROFF_IGN | ROFF_USERRET;
3785 
3786 	mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3787 	return ROFF_IGN;
3788 }
3789 
3790 static int
3791 roff_rn(ROFF_ARGS)
3792 {
3793 	const char	*value;
3794 	char		*oldn, *newn, *end;
3795 	size_t		 oldsz, newsz;
3796 	int		 deftype;
3797 
3798 	oldn = newn = buf->buf + pos;
3799 	if (*oldn == '\0')
3800 		return ROFF_IGN;
3801 
3802 	oldsz = roff_getname(r, &newn, ln, pos);
3803 	if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3804 		return ROFF_IGN;
3805 
3806 	end = newn;
3807 	newsz = roff_getname(r, &end, ln, newn - buf->buf);
3808 	if (newsz == 0)
3809 		return ROFF_IGN;
3810 
3811 	deftype = ROFFDEF_ANY;
3812 	value = roff_getstrn(r, oldn, oldsz, &deftype);
3813 	switch (deftype) {
3814 	case ROFFDEF_USER:
3815 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3816 		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3817 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3818 		break;
3819 	case ROFFDEF_PRE:
3820 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3821 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3822 		break;
3823 	case ROFFDEF_REN:
3824 		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3825 		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3826 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3827 		break;
3828 	case ROFFDEF_STD:
3829 		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3830 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3831 		break;
3832 	default:
3833 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3834 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3835 		break;
3836 	}
3837 	return ROFF_IGN;
3838 }
3839 
3840 static int
3841 roff_shift(ROFF_ARGS)
3842 {
3843 	struct mctx	*ctx;
3844 	int		 levels, i;
3845 
3846 	levels = 1;
3847 	if (buf->buf[pos] != '\0' &&
3848 	    roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3849 		mandoc_msg(MANDOCERR_CE_NONUM,
3850 		    ln, pos, "shift %s", buf->buf + pos);
3851 		levels = 1;
3852 	}
3853 	if (r->mstackpos < 0) {
3854 		mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3855 		return ROFF_IGN;
3856 	}
3857 	ctx = r->mstack + r->mstackpos;
3858 	if (levels > ctx->argc) {
3859 		mandoc_msg(MANDOCERR_SHIFT,
3860 		    ln, pos, "%d, but max is %d", levels, ctx->argc);
3861 		levels = ctx->argc;
3862 	}
3863 	if (levels == 0)
3864 		return ROFF_IGN;
3865 	for (i = 0; i < levels; i++)
3866 		free(ctx->argv[i]);
3867 	ctx->argc -= levels;
3868 	for (i = 0; i < ctx->argc; i++)
3869 		ctx->argv[i] = ctx->argv[i + levels];
3870 	return ROFF_IGN;
3871 }
3872 
3873 static int
3874 roff_so(ROFF_ARGS)
3875 {
3876 	char *name, *cp;
3877 
3878 	name = buf->buf + pos;
3879 	mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3880 
3881 	/*
3882 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3883 	 * opening anything that's not in our cwd or anything beneath
3884 	 * it.  Thus, explicitly disallow traversing up the file-system
3885 	 * or using absolute paths.
3886 	 */
3887 
3888 	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3889 		mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3890 		buf->sz = mandoc_asprintf(&cp,
3891 		    ".sp\nSee the file %s.\n.sp", name) + 1;
3892 		free(buf->buf);
3893 		buf->buf = cp;
3894 		*offs = 0;
3895 		return ROFF_REPARSE;
3896 	}
3897 
3898 	*offs = pos;
3899 	return ROFF_SO;
3900 }
3901 
3902 /* --- user defined strings and macros ------------------------------------ */
3903 
3904 static int
3905 roff_userdef(ROFF_ARGS)
3906 {
3907 	struct mctx	 *ctx;
3908 	char		 *arg, *ap, *dst, *src;
3909 	size_t		  sz;
3910 
3911 	/* If the macro is empty, ignore it altogether. */
3912 
3913 	if (*r->current_string == '\0')
3914 		return ROFF_IGN;
3915 
3916 	/* Initialize a new macro stack context. */
3917 
3918 	if (++r->mstackpos == r->mstacksz) {
3919 		r->mstack = mandoc_recallocarray(r->mstack,
3920 		    r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3921 		r->mstacksz += 8;
3922 	}
3923 	ctx = r->mstack + r->mstackpos;
3924 	ctx->argsz = 0;
3925 	ctx->argc = 0;
3926 	ctx->argv = NULL;
3927 
3928 	/*
3929 	 * Collect pointers to macro argument strings,
3930 	 * NUL-terminating them and escaping quotes.
3931 	 */
3932 
3933 	src = buf->buf + pos;
3934 	while (*src != '\0') {
3935 		if (ctx->argc == ctx->argsz) {
3936 			ctx->argsz += 8;
3937 			ctx->argv = mandoc_reallocarray(ctx->argv,
3938 			    ctx->argsz, sizeof(*ctx->argv));
3939 		}
3940 		arg = roff_getarg(r, &src, ln, &pos);
3941 		sz = 1;  /* For the terminating NUL. */
3942 		for (ap = arg; *ap != '\0'; ap++)
3943 			sz += *ap == '"' ? 4 : 1;
3944 		ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3945 		for (ap = arg; *ap != '\0'; ap++) {
3946 			if (*ap == '"') {
3947 				memcpy(dst, "\\(dq", 4);
3948 				dst += 4;
3949 			} else
3950 				*dst++ = *ap;
3951 		}
3952 		*dst = '\0';
3953 		free(arg);
3954 	}
3955 
3956 	/* Replace the macro invocation by the macro definition. */
3957 
3958 	free(buf->buf);
3959 	buf->buf = mandoc_strdup(r->current_string);
3960 	buf->sz = strlen(buf->buf) + 1;
3961 	*offs = 0;
3962 
3963 	return buf->buf[buf->sz - 2] == '\n' ?
3964 	    ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3965 }
3966 
3967 /*
3968  * Calling a high-level macro that was renamed with .rn.
3969  * r->current_string has already been set up by roff_parse().
3970  */
3971 static int
3972 roff_renamed(ROFF_ARGS)
3973 {
3974 	char	*nbuf;
3975 
3976 	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3977 	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3978 	free(buf->buf);
3979 	buf->buf = nbuf;
3980 	*offs = 0;
3981 	return ROFF_CONT;
3982 }
3983 
3984 /*
3985  * Measure the length in bytes of the roff identifier at *cpp
3986  * and advance the pointer to the next word.
3987  */
3988 static size_t
3989 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3990 {
3991 	char	 *name, *cp;
3992 	size_t	  namesz;
3993 
3994 	name = *cpp;
3995 	if (*name == '\0')
3996 		return 0;
3997 
3998 	/* Advance cp to the byte after the end of the name. */
3999 
4000 	for (cp = name; 1; cp++) {
4001 		namesz = cp - name;
4002 		if (*cp == '\0')
4003 			break;
4004 		if (*cp == ' ' || *cp == '\t') {
4005 			cp++;
4006 			break;
4007 		}
4008 		if (*cp != '\\')
4009 			continue;
4010 		if (cp[1] == '{' || cp[1] == '}')
4011 			break;
4012 		if (*++cp == '\\')
4013 			continue;
4014 		mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4015 		    "%.*s", (int)(cp - name + 1), name);
4016 		mandoc_escape((const char **)&cp, NULL, NULL);
4017 		break;
4018 	}
4019 
4020 	/* Read past spaces. */
4021 
4022 	while (*cp == ' ')
4023 		cp++;
4024 
4025 	*cpp = cp;
4026 	return namesz;
4027 }
4028 
4029 /*
4030  * Store *string into the user-defined string called *name.
4031  * To clear an existing entry, call with (*r, *name, NULL, 0).
4032  * append == 0: replace mode
4033  * append == 1: single-line append mode
4034  * append == 2: multiline append mode, append '\n' after each call
4035  */
4036 static void
4037 roff_setstr(struct roff *r, const char *name, const char *string,
4038 	int append)
4039 {
4040 	size_t	 namesz;
4041 
4042 	namesz = strlen(name);
4043 	roff_setstrn(&r->strtab, name, namesz, string,
4044 	    string ? strlen(string) : 0, append);
4045 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4046 }
4047 
4048 static void
4049 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4050 		const char *string, size_t stringsz, int append)
4051 {
4052 	struct roffkv	*n;
4053 	char		*c;
4054 	int		 i;
4055 	size_t		 oldch, newch;
4056 
4057 	/* Search for an existing string with the same name. */
4058 	n = *r;
4059 
4060 	while (n && (namesz != n->key.sz ||
4061 			strncmp(n->key.p, name, namesz)))
4062 		n = n->next;
4063 
4064 	if (NULL == n) {
4065 		/* Create a new string table entry. */
4066 		n = mandoc_malloc(sizeof(struct roffkv));
4067 		n->key.p = mandoc_strndup(name, namesz);
4068 		n->key.sz = namesz;
4069 		n->val.p = NULL;
4070 		n->val.sz = 0;
4071 		n->next = *r;
4072 		*r = n;
4073 	} else if (0 == append) {
4074 		free(n->val.p);
4075 		n->val.p = NULL;
4076 		n->val.sz = 0;
4077 	}
4078 
4079 	if (NULL == string)
4080 		return;
4081 
4082 	/*
4083 	 * One additional byte for the '\n' in multiline mode,
4084 	 * and one for the terminating '\0'.
4085 	 */
4086 	newch = stringsz + (1 < append ? 2u : 1u);
4087 
4088 	if (NULL == n->val.p) {
4089 		n->val.p = mandoc_malloc(newch);
4090 		*n->val.p = '\0';
4091 		oldch = 0;
4092 	} else {
4093 		oldch = n->val.sz;
4094 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4095 	}
4096 
4097 	/* Skip existing content in the destination buffer. */
4098 	c = n->val.p + (int)oldch;
4099 
4100 	/* Append new content to the destination buffer. */
4101 	i = 0;
4102 	while (i < (int)stringsz) {
4103 		/*
4104 		 * Rudimentary roff copy mode:
4105 		 * Handle escaped backslashes.
4106 		 */
4107 		if ('\\' == string[i] && '\\' == string[i + 1])
4108 			i++;
4109 		*c++ = string[i++];
4110 	}
4111 
4112 	/* Append terminating bytes. */
4113 	if (1 < append)
4114 		*c++ = '\n';
4115 
4116 	*c = '\0';
4117 	n->val.sz = (int)(c - n->val.p);
4118 }
4119 
4120 static const char *
4121 roff_getstrn(struct roff *r, const char *name, size_t len,
4122     int *deftype)
4123 {
4124 	const struct roffkv	*n;
4125 	int			 found, i;
4126 	enum roff_tok		 tok;
4127 
4128 	found = 0;
4129 	for (n = r->strtab; n != NULL; n = n->next) {
4130 		if (strncmp(name, n->key.p, len) != 0 ||
4131 		    n->key.p[len] != '\0' || n->val.p == NULL)
4132 			continue;
4133 		if (*deftype & ROFFDEF_USER) {
4134 			*deftype = ROFFDEF_USER;
4135 			return n->val.p;
4136 		} else {
4137 			found = 1;
4138 			break;
4139 		}
4140 	}
4141 	for (n = r->rentab; n != NULL; n = n->next) {
4142 		if (strncmp(name, n->key.p, len) != 0 ||
4143 		    n->key.p[len] != '\0' || n->val.p == NULL)
4144 			continue;
4145 		if (*deftype & ROFFDEF_REN) {
4146 			*deftype = ROFFDEF_REN;
4147 			return n->val.p;
4148 		} else {
4149 			found = 1;
4150 			break;
4151 		}
4152 	}
4153 	for (i = 0; i < PREDEFS_MAX; i++) {
4154 		if (strncmp(name, predefs[i].name, len) != 0 ||
4155 		    predefs[i].name[len] != '\0')
4156 			continue;
4157 		if (*deftype & ROFFDEF_PRE) {
4158 			*deftype = ROFFDEF_PRE;
4159 			return predefs[i].str;
4160 		} else {
4161 			found = 1;
4162 			break;
4163 		}
4164 	}
4165 	if (r->man->meta.macroset != MACROSET_MAN) {
4166 		for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4167 			if (strncmp(name, roff_name[tok], len) != 0 ||
4168 			    roff_name[tok][len] != '\0')
4169 				continue;
4170 			if (*deftype & ROFFDEF_STD) {
4171 				*deftype = ROFFDEF_STD;
4172 				return NULL;
4173 			} else {
4174 				found = 1;
4175 				break;
4176 			}
4177 		}
4178 	}
4179 	if (r->man->meta.macroset != MACROSET_MDOC) {
4180 		for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4181 			if (strncmp(name, roff_name[tok], len) != 0 ||
4182 			    roff_name[tok][len] != '\0')
4183 				continue;
4184 			if (*deftype & ROFFDEF_STD) {
4185 				*deftype = ROFFDEF_STD;
4186 				return NULL;
4187 			} else {
4188 				found = 1;
4189 				break;
4190 			}
4191 		}
4192 	}
4193 
4194 	if (found == 0 && *deftype != ROFFDEF_ANY) {
4195 		if (*deftype & ROFFDEF_REN) {
4196 			/*
4197 			 * This might still be a request,
4198 			 * so do not treat it as undefined yet.
4199 			 */
4200 			*deftype = ROFFDEF_UNDEF;
4201 			return NULL;
4202 		}
4203 
4204 		/* Using an undefined string defines it to be empty. */
4205 
4206 		roff_setstrn(&r->strtab, name, len, "", 0, 0);
4207 		roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4208 	}
4209 
4210 	*deftype = 0;
4211 	return NULL;
4212 }
4213 
4214 static void
4215 roff_freestr(struct roffkv *r)
4216 {
4217 	struct roffkv	 *n, *nn;
4218 
4219 	for (n = r; n; n = nn) {
4220 		free(n->key.p);
4221 		free(n->val.p);
4222 		nn = n->next;
4223 		free(n);
4224 	}
4225 }
4226 
4227 /* --- accessors and utility functions ------------------------------------ */
4228 
4229 /*
4230  * Duplicate an input string, making the appropriate character
4231  * conversations (as stipulated by `tr') along the way.
4232  * Returns a heap-allocated string with all the replacements made.
4233  */
4234 char *
4235 roff_strdup(const struct roff *r, const char *p)
4236 {
4237 	const struct roffkv *cp;
4238 	char		*res;
4239 	const char	*pp;
4240 	size_t		 ssz, sz;
4241 	enum mandoc_esc	 esc;
4242 
4243 	if (NULL == r->xmbtab && NULL == r->xtab)
4244 		return mandoc_strdup(p);
4245 	else if ('\0' == *p)
4246 		return mandoc_strdup("");
4247 
4248 	/*
4249 	 * Step through each character looking for term matches
4250 	 * (remember that a `tr' can be invoked with an escape, which is
4251 	 * a glyph but the escape is multi-character).
4252 	 * We only do this if the character hash has been initialised
4253 	 * and the string is >0 length.
4254 	 */
4255 
4256 	res = NULL;
4257 	ssz = 0;
4258 
4259 	while ('\0' != *p) {
4260 		assert((unsigned int)*p < 128);
4261 		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4262 			sz = r->xtab[(int)*p].sz;
4263 			res = mandoc_realloc(res, ssz + sz + 1);
4264 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4265 			ssz += sz;
4266 			p++;
4267 			continue;
4268 		} else if ('\\' != *p) {
4269 			res = mandoc_realloc(res, ssz + 2);
4270 			res[ssz++] = *p++;
4271 			continue;
4272 		}
4273 
4274 		/* Search for term matches. */
4275 		for (cp = r->xmbtab; cp; cp = cp->next)
4276 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
4277 				break;
4278 
4279 		if (NULL != cp) {
4280 			/*
4281 			 * A match has been found.
4282 			 * Append the match to the array and move
4283 			 * forward by its keysize.
4284 			 */
4285 			res = mandoc_realloc(res,
4286 			    ssz + cp->val.sz + 1);
4287 			memcpy(res + ssz, cp->val.p, cp->val.sz);
4288 			ssz += cp->val.sz;
4289 			p += (int)cp->key.sz;
4290 			continue;
4291 		}
4292 
4293 		/*
4294 		 * Handle escapes carefully: we need to copy
4295 		 * over just the escape itself, or else we might
4296 		 * do replacements within the escape itself.
4297 		 * Make sure to pass along the bogus string.
4298 		 */
4299 		pp = p++;
4300 		esc = mandoc_escape(&p, NULL, NULL);
4301 		if (ESCAPE_ERROR == esc) {
4302 			sz = strlen(pp);
4303 			res = mandoc_realloc(res, ssz + sz + 1);
4304 			memcpy(res + ssz, pp, sz);
4305 			break;
4306 		}
4307 		/*
4308 		 * We bail out on bad escapes.
4309 		 * No need to warn: we already did so when
4310 		 * roff_expand() was called.
4311 		 */
4312 		sz = (int)(p - pp);
4313 		res = mandoc_realloc(res, ssz + sz + 1);
4314 		memcpy(res + ssz, pp, sz);
4315 		ssz += sz;
4316 	}
4317 
4318 	res[(int)ssz] = '\0';
4319 	return res;
4320 }
4321 
4322 int
4323 roff_getformat(const struct roff *r)
4324 {
4325 
4326 	return r->format;
4327 }
4328 
4329 /*
4330  * Find out whether a line is a macro line or not.
4331  * If it is, adjust the current position and return one; if it isn't,
4332  * return zero and don't change the current position.
4333  * If the control character has been set with `.cc', then let that grain
4334  * precedence.
4335  * This is slighly contrary to groff, where using the non-breaking
4336  * control character when `cc' has been invoked will cause the
4337  * non-breaking macro contents to be printed verbatim.
4338  */
4339 int
4340 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4341 {
4342 	int		pos;
4343 
4344 	pos = *ppos;
4345 
4346 	if (r->control != '\0' && cp[pos] == r->control)
4347 		pos++;
4348 	else if (r->control != '\0')
4349 		return 0;
4350 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4351 		pos += 2;
4352 	else if ('.' == cp[pos] || '\'' == cp[pos])
4353 		pos++;
4354 	else
4355 		return 0;
4356 
4357 	while (' ' == cp[pos] || '\t' == cp[pos])
4358 		pos++;
4359 
4360 	*ppos = pos;
4361 	return 1;
4362 }
4363