xref: /openbsd-src/usr.bin/mandoc/roff.c (revision fc405d53b73a2d73393cb97f684863d17b583e38)
1 /* $OpenBSD: roff.c,v 1.268 2022/12/26 19:16:02 jmc Exp $ */
2 /*
3  * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Implementation of the roff(7) parser for mandoc(1).
19  */
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mandoc_parse.h"
36 #include "libmandoc.h"
37 #include "roff_int.h"
38 #include "tbl_parse.h"
39 #include "eqn_parse.h"
40 
41 /* Maximum number of string expansions per line, to break infinite loops. */
42 #define	EXPAND_LIMIT	1000
43 
44 /* Types of definitions of macros and strings. */
45 #define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
46 #define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
47 #define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
48 #define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
49 #define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
50 			 ROFFDEF_REN | ROFFDEF_STD)
51 #define	ROFFDEF_UNDEF	(1 << 5)  /* Completely undefined. */
52 
53 /* --- data types --------------------------------------------------------- */
54 
55 /*
56  * An incredibly-simple string buffer.
57  */
58 struct	roffstr {
59 	char		*p; /* nil-terminated buffer */
60 	size_t		 sz; /* saved strlen(p) */
61 };
62 
63 /*
64  * A key-value roffstr pair as part of a singly-linked list.
65  */
66 struct	roffkv {
67 	struct roffstr	 key;
68 	struct roffstr	 val;
69 	struct roffkv	*next; /* next in list */
70 };
71 
72 /*
73  * A single number register as part of a singly-linked list.
74  */
75 struct	roffreg {
76 	struct roffstr	 key;
77 	int		 val;
78 	int		 step;
79 	struct roffreg	*next;
80 };
81 
82 /*
83  * Association of request and macro names with token IDs.
84  */
85 struct	roffreq {
86 	enum roff_tok	 tok;
87 	char		 name[];
88 };
89 
90 /*
91  * A macro processing context.
92  * More than one is needed when macro calls are nested.
93  */
94 struct	mctx {
95 	char		**argv;
96 	int		 argc;
97 	int		 argsz;
98 };
99 
100 struct	roff {
101 	struct roff_man	*man; /* mdoc or man parser */
102 	struct roffnode	*last; /* leaf of stack */
103 	struct mctx	*mstack; /* stack of macro contexts */
104 	int		*rstack; /* stack of inverted `ie' values */
105 	struct ohash	*reqtab; /* request lookup table */
106 	struct roffreg	*regtab; /* number registers */
107 	struct roffkv	*strtab; /* user-defined strings & macros */
108 	struct roffkv	*rentab; /* renamed strings & macros */
109 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
110 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
111 	const char	*current_string; /* value of last called user macro */
112 	struct tbl_node	*first_tbl; /* first table parsed */
113 	struct tbl_node	*last_tbl; /* last table parsed */
114 	struct tbl_node	*tbl; /* current table being parsed */
115 	struct eqn_node	*last_eqn; /* equation parser */
116 	struct eqn_node	*eqn; /* active equation parser */
117 	int		 eqn_inline; /* current equation is inline */
118 	int		 options; /* parse options */
119 	int		 mstacksz; /* current size of mstack */
120 	int		 mstackpos; /* position in mstack */
121 	int		 rstacksz; /* current size limit of rstack */
122 	int		 rstackpos; /* position in rstack */
123 	int		 format; /* current file in mdoc or man format */
124 	char		 control; /* control character */
125 	char		 escape; /* escape character */
126 };
127 
128 /*
129  * A macro definition, condition, or ignored block.
130  */
131 struct	roffnode {
132 	enum roff_tok	 tok; /* type of node */
133 	struct roffnode	*parent; /* up one in stack */
134 	int		 line; /* parse line */
135 	int		 col; /* parse col */
136 	char		*name; /* node name, e.g. macro name */
137 	char		*end; /* custom end macro of the block */
138 	int		 endspan; /* scope to: 1=eol 2=next line -1=\} */
139 	int		 rule; /* content is: 1=evaluated 0=skipped */
140 };
141 
142 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
143 			 enum roff_tok tok, /* tok of macro */ \
144 			 struct buf *buf, /* input buffer */ \
145 			 int ln, /* parse line */ \
146 			 int ppos, /* original pos in buffer */ \
147 			 int pos, /* current pos in buffer */ \
148 			 int *offs /* reset offset of buffer data */
149 
150 typedef	int (*roffproc)(ROFF_ARGS);
151 
152 struct	roffmac {
153 	roffproc	 proc; /* process new macro */
154 	roffproc	 text; /* process as child text of macro */
155 	roffproc	 sub; /* process as child of macro */
156 	int		 flags;
157 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
158 };
159 
160 struct	predef {
161 	const char	*name; /* predefined input name */
162 	const char	*str; /* replacement symbol */
163 };
164 
165 #define	PREDEF(__name, __str) \
166 	{ (__name), (__str) },
167 
168 /* --- function prototypes ------------------------------------------------ */
169 
170 static	int		 roffnode_cleanscope(struct roff *);
171 static	int		 roffnode_pop(struct roff *);
172 static	void		 roffnode_push(struct roff *, enum roff_tok,
173 				const char *, int, int);
174 static	void		 roff_addtbl(struct roff_man *, int, struct tbl_node *);
175 static	int		 roff_als(ROFF_ARGS);
176 static	int		 roff_block(ROFF_ARGS);
177 static	int		 roff_block_text(ROFF_ARGS);
178 static	int		 roff_block_sub(ROFF_ARGS);
179 static	int		 roff_break(ROFF_ARGS);
180 static	int		 roff_cblock(ROFF_ARGS);
181 static	int		 roff_cc(ROFF_ARGS);
182 static	int		 roff_ccond(struct roff *, int, int);
183 static	int		 roff_char(ROFF_ARGS);
184 static	int		 roff_cond(ROFF_ARGS);
185 static	int		 roff_cond_checkend(ROFF_ARGS);
186 static	int		 roff_cond_text(ROFF_ARGS);
187 static	int		 roff_cond_sub(ROFF_ARGS);
188 static	int		 roff_ds(ROFF_ARGS);
189 static	int		 roff_ec(ROFF_ARGS);
190 static	int		 roff_eo(ROFF_ARGS);
191 static	int		 roff_eqndelim(struct roff *, struct buf *, int);
192 static	int		 roff_evalcond(struct roff *, int, char *, int *);
193 static	int		 roff_evalnum(struct roff *, int,
194 				const char *, int *, int *, int);
195 static	int		 roff_evalpar(struct roff *, int,
196 				const char *, int *, int *, int);
197 static	int		 roff_evalstrcond(const char *, int *);
198 static	int		 roff_expand(struct roff *, struct buf *,
199 				int, int, char);
200 static	void		 roff_expand_patch(struct buf *, int,
201 				const char *, int);
202 static	void		 roff_free1(struct roff *);
203 static	void		 roff_freereg(struct roffreg *);
204 static	void		 roff_freestr(struct roffkv *);
205 static	size_t		 roff_getname(struct roff *, char **, int, int);
206 static	int		 roff_getnum(const char *, int *, int *, int);
207 static	int		 roff_getop(const char *, int *, char *);
208 static	int		 roff_getregn(struct roff *,
209 				const char *, size_t, char);
210 static	int		 roff_getregro(const struct roff *,
211 				const char *name);
212 static	const char	*roff_getstrn(struct roff *,
213 				const char *, size_t, int *);
214 static	int		 roff_hasregn(const struct roff *,
215 				const char *, size_t);
216 static	int		 roff_insec(ROFF_ARGS);
217 static	int		 roff_it(ROFF_ARGS);
218 static	int		 roff_line_ignore(ROFF_ARGS);
219 static	void		 roff_man_alloc1(struct roff_man *);
220 static	void		 roff_man_free1(struct roff_man *);
221 static	int		 roff_manyarg(ROFF_ARGS);
222 static	int		 roff_mc(ROFF_ARGS);
223 static	int		 roff_noarg(ROFF_ARGS);
224 static	int		 roff_nop(ROFF_ARGS);
225 static	int		 roff_nr(ROFF_ARGS);
226 static	int		 roff_onearg(ROFF_ARGS);
227 static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
228 				int, int);
229 static	int		 roff_parse_comment(struct roff *, struct buf *,
230 				int, int, char);
231 static	int		 roff_parsetext(struct roff *, struct buf *,
232 				int, int *);
233 static	int		 roff_renamed(ROFF_ARGS);
234 static	int		 roff_req_or_macro(ROFF_ARGS);
235 static	int		 roff_return(ROFF_ARGS);
236 static	int		 roff_rm(ROFF_ARGS);
237 static	int		 roff_rn(ROFF_ARGS);
238 static	int		 roff_rr(ROFF_ARGS);
239 static	void		 roff_setregn(struct roff *, const char *,
240 				size_t, int, char, int);
241 static	void		 roff_setstr(struct roff *,
242 				const char *, const char *, int);
243 static	void		 roff_setstrn(struct roffkv **, const char *,
244 				size_t, const char *, size_t, int);
245 static	int		 roff_shift(ROFF_ARGS);
246 static	int		 roff_so(ROFF_ARGS);
247 static	int		 roff_tr(ROFF_ARGS);
248 static	int		 roff_Dd(ROFF_ARGS);
249 static	int		 roff_TE(ROFF_ARGS);
250 static	int		 roff_TS(ROFF_ARGS);
251 static	int		 roff_EQ(ROFF_ARGS);
252 static	int		 roff_EN(ROFF_ARGS);
253 static	int		 roff_T_(ROFF_ARGS);
254 static	int		 roff_unsupp(ROFF_ARGS);
255 static	int		 roff_userdef(ROFF_ARGS);
256 
257 /* --- constant data ------------------------------------------------------ */
258 
259 #define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
260 #define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
261 
262 const char *__roff_name[MAN_MAX + 1] = {
263 	"br",		"ce",		"fi",		"ft",
264 	"ll",		"mc",		"nf",
265 	"po",		"rj",		"sp",
266 	"ta",		"ti",		NULL,
267 	"ab",		"ad",		"af",		"aln",
268 	"als",		"am",		"am1",		"ami",
269 	"ami1",		"as",		"as1",		"asciify",
270 	"backtrace",	"bd",		"bleedat",	"blm",
271         "box",		"boxa",		"bp",		"BP",
272 	"break",	"breakchar",	"brnl",		"brp",
273 	"brpnl",	"c2",		"cc",
274 	"cf",		"cflags",	"ch",		"char",
275 	"chop",		"class",	"close",	"CL",
276 	"color",	"composite",	"continue",	"cp",
277 	"cropat",	"cs",		"cu",		"da",
278 	"dch",		"Dd",		"de",		"de1",
279 	"defcolor",	"dei",		"dei1",		"device",
280 	"devicem",	"di",		"do",		"ds",
281 	"ds1",		"dwh",		"dt",		"ec",
282 	"ecr",		"ecs",		"el",		"em",
283 	"EN",		"eo",		"EP",		"EQ",
284 	"errprint",	"ev",		"evc",		"ex",
285 	"fallback",	"fam",		"fc",		"fchar",
286 	"fcolor",	"fdeferlig",	"feature",	"fkern",
287 	"fl",		"flig",		"fp",		"fps",
288 	"fschar",	"fspacewidth",	"fspecial",	"ftr",
289 	"fzoom",	"gcolor",	"hc",		"hcode",
290 	"hidechar",	"hla",		"hlm",		"hpf",
291 	"hpfa",		"hpfcode",	"hw",		"hy",
292 	"hylang",	"hylen",	"hym",		"hypp",
293 	"hys",		"ie",		"if",		"ig",
294 	"index",	"it",		"itc",		"IX",
295 	"kern",		"kernafter",	"kernbefore",	"kernpair",
296 	"lc",		"lc_ctype",	"lds",		"length",
297 	"letadj",	"lf",		"lg",		"lhang",
298 	"linetabs",	"lnr",		"lnrf",		"lpfx",
299 	"ls",		"lsm",		"lt",
300 	"mediasize",	"minss",	"mk",		"mso",
301 	"na",		"ne",		"nh",		"nhychar",
302 	"nm",		"nn",		"nop",		"nr",
303 	"nrf",		"nroff",	"ns",		"nx",
304 	"open",		"opena",	"os",		"output",
305 	"padj",		"papersize",	"pc",		"pev",
306 	"pi",		"PI",		"pl",		"pm",
307 	"pn",		"pnr",		"ps",
308 	"psbb",		"pshape",	"pso",		"ptr",
309 	"pvs",		"rchar",	"rd",		"recursionlimit",
310 	"return",	"rfschar",	"rhang",
311 	"rm",		"rn",		"rnn",		"rr",
312 	"rs",		"rt",		"schar",	"sentchar",
313 	"shc",		"shift",	"sizes",	"so",
314 	"spacewidth",	"special",	"spreadwarn",	"ss",
315 	"sty",		"substring",	"sv",		"sy",
316 	"T&",		"tc",		"TE",
317 	"TH",		"tkf",		"tl",
318 	"tm",		"tm1",		"tmc",		"tr",
319 	"track",	"transchar",	"trf",		"trimat",
320 	"trin",		"trnt",		"troff",	"TS",
321 	"uf",		"ul",		"unformat",	"unwatch",
322 	"unwatchn",	"vpt",		"vs",		"warn",
323 	"warnscale",	"watch",	"watchlength",	"watchn",
324 	"wh",		"while",	"write",	"writec",
325 	"writem",	"xflag",	".",		NULL,
326 	NULL,		"text",
327 	"Dd",		"Dt",		"Os",		"Sh",
328 	"Ss",		"Pp",		"D1",		"Dl",
329 	"Bd",		"Ed",		"Bl",		"El",
330 	"It",		"Ad",		"An",		"Ap",
331 	"Ar",		"Cd",		"Cm",		"Dv",
332 	"Er",		"Ev",		"Ex",		"Fa",
333 	"Fd",		"Fl",		"Fn",		"Ft",
334 	"Ic",		"In",		"Li",		"Nd",
335 	"Nm",		"Op",		"Ot",		"Pa",
336 	"Rv",		"St",		"Va",		"Vt",
337 	"Xr",		"%A",		"%B",		"%D",
338 	"%I",		"%J",		"%N",		"%O",
339 	"%P",		"%R",		"%T",		"%V",
340 	"Ac",		"Ao",		"Aq",		"At",
341 	"Bc",		"Bf",		"Bo",		"Bq",
342 	"Bsx",		"Bx",		"Db",		"Dc",
343 	"Do",		"Dq",		"Ec",		"Ef",
344 	"Em",		"Eo",		"Fx",		"Ms",
345 	"No",		"Ns",		"Nx",		"Ox",
346 	"Pc",		"Pf",		"Po",		"Pq",
347 	"Qc",		"Ql",		"Qo",		"Qq",
348 	"Re",		"Rs",		"Sc",		"So",
349 	"Sq",		"Sm",		"Sx",		"Sy",
350 	"Tn",		"Ux",		"Xc",		"Xo",
351 	"Fo",		"Fc",		"Oo",		"Oc",
352 	"Bk",		"Ek",		"Bt",		"Hf",
353 	"Fr",		"Ud",		"Lb",		"Lp",
354 	"Lk",		"Mt",		"Brq",		"Bro",
355 	"Brc",		"%C",		"Es",		"En",
356 	"Dx",		"%Q",		"%U",		"Ta",
357 	"Tg",		NULL,
358 	"TH",		"SH",		"SS",		"TP",
359 	"TQ",
360 	"LP",		"PP",		"P",		"IP",
361 	"HP",		"SM",		"SB",		"BI",
362 	"IB",		"BR",		"RB",		"R",
363 	"B",		"I",		"IR",		"RI",
364 	"RE",		"RS",		"DT",		"UC",
365 	"PD",		"AT",		"in",
366 	"SY",		"YS",		"OP",
367 	"EX",		"EE",		"UR",
368 	"UE",		"MT",		"ME",		NULL
369 };
370 const	char *const *roff_name = __roff_name;
371 
372 static	struct roffmac	 roffs[TOKEN_NONE] = {
373 	{ roff_noarg, NULL, NULL, 0 },  /* br */
374 	{ roff_onearg, NULL, NULL, 0 },  /* ce */
375 	{ roff_noarg, NULL, NULL, 0 },  /* fi */
376 	{ roff_onearg, NULL, NULL, 0 },  /* ft */
377 	{ roff_onearg, NULL, NULL, 0 },  /* ll */
378 	{ roff_mc, NULL, NULL, 0 },  /* mc */
379 	{ roff_noarg, NULL, NULL, 0 },  /* nf */
380 	{ roff_onearg, NULL, NULL, 0 },  /* po */
381 	{ roff_onearg, NULL, NULL, 0 },  /* rj */
382 	{ roff_onearg, NULL, NULL, 0 },  /* sp */
383 	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
384 	{ roff_onearg, NULL, NULL, 0 },  /* ti */
385 	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
386 	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
387 	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
388 	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
389 	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
390 	{ roff_als, NULL, NULL, 0 },  /* als */
391 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
392 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
393 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
394 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
395 	{ roff_ds, NULL, NULL, 0 },  /* as */
396 	{ roff_ds, NULL, NULL, 0 },  /* as1 */
397 	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
398 	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
399 	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
400 	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
401 	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
402 	{ roff_unsupp, NULL, NULL, 0 },  /* box */
403 	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
404 	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
405 	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
406 	{ roff_break, NULL, NULL, 0 },  /* break */
407 	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
408 	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
409 	{ roff_noarg, NULL, NULL, 0 },  /* brp */
410 	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
411 	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
412 	{ roff_cc, NULL, NULL, 0 },  /* cc */
413 	{ roff_insec, NULL, NULL, 0 },  /* cf */
414 	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
415 	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
416 	{ roff_char, NULL, NULL, 0 },  /* char */
417 	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
418 	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
419 	{ roff_insec, NULL, NULL, 0 },  /* close */
420 	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
421 	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
422 	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
423 	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
424 	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
425 	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
426 	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
427 	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
428 	{ roff_unsupp, NULL, NULL, 0 },  /* da */
429 	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
430 	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
431 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
432 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
433 	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
434 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
435 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
436 	{ roff_unsupp, NULL, NULL, 0 },  /* device */
437 	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
438 	{ roff_unsupp, NULL, NULL, 0 },  /* di */
439 	{ roff_unsupp, NULL, NULL, 0 },  /* do */
440 	{ roff_ds, NULL, NULL, 0 },  /* ds */
441 	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
442 	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
443 	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
444 	{ roff_ec, NULL, NULL, 0 },  /* ec */
445 	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
446 	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
447 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
448 	{ roff_unsupp, NULL, NULL, 0 },  /* em */
449 	{ roff_EN, NULL, NULL, 0 },  /* EN */
450 	{ roff_eo, NULL, NULL, 0 },  /* eo */
451 	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
452 	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
453 	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
454 	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
455 	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
456 	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
457 	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
458 	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
459 	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
460 	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
461 	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
462 	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
463 	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
464 	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
465 	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
466 	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
467 	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
468 	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
469 	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
470 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
471 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
472 	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
473 	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
474 	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
475 	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
476 	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
477 	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
478 	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
479 	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
480 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
481 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
482 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
483 	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
484 	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
485 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
486 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
487 	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
488 	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
489 	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
490 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
491 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
492 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
493 	{ roff_unsupp, NULL, NULL, 0 },  /* index */
494 	{ roff_it, NULL, NULL, 0 },  /* it */
495 	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
496 	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
497 	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
498 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
499 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
500 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
501 	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
502 	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
503 	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
504 	{ roff_unsupp, NULL, NULL, 0 },  /* length */
505 	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
506 	{ roff_insec, NULL, NULL, 0 },  /* lf */
507 	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
508 	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
509 	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
510 	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
511 	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
512 	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
513 	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
514 	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
515 	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
516 	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
517 	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
518 	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
519 	{ roff_insec, NULL, NULL, 0 },  /* mso */
520 	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
521 	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
522 	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
523 	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
524 	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
525 	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
526 	{ roff_nop, NULL, NULL, 0 },  /* nop */
527 	{ roff_nr, NULL, NULL, 0 },  /* nr */
528 	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
529 	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
530 	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
531 	{ roff_insec, NULL, NULL, 0 },  /* nx */
532 	{ roff_insec, NULL, NULL, 0 },  /* open */
533 	{ roff_insec, NULL, NULL, 0 },  /* opena */
534 	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
535 	{ roff_unsupp, NULL, NULL, 0 },  /* output */
536 	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
537 	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
538 	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
539 	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
540 	{ roff_insec, NULL, NULL, 0 },  /* pi */
541 	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
542 	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
543 	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
544 	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
545 	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
546 	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
547 	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
548 	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
549 	{ roff_insec, NULL, NULL, 0 },  /* pso */
550 	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
551 	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
552 	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
553 	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
554 	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
555 	{ roff_return, NULL, NULL, 0 },  /* return */
556 	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
557 	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
558 	{ roff_rm, NULL, NULL, 0 },  /* rm */
559 	{ roff_rn, NULL, NULL, 0 },  /* rn */
560 	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
561 	{ roff_rr, NULL, NULL, 0 },  /* rr */
562 	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
563 	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
564 	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
565 	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
566 	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
567 	{ roff_shift, NULL, NULL, 0 },  /* shift */
568 	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
569 	{ roff_so, NULL, NULL, 0 },  /* so */
570 	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
571 	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
572 	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
573 	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
574 	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
575 	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
576 	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
577 	{ roff_insec, NULL, NULL, 0 },  /* sy */
578 	{ roff_T_, NULL, NULL, 0 },  /* T& */
579 	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
580 	{ roff_TE, NULL, NULL, 0 },  /* TE */
581 	{ roff_Dd, NULL, NULL, 0 },  /* TH */
582 	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
583 	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
584 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
585 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
586 	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
587 	{ roff_tr, NULL, NULL, 0 },  /* tr */
588 	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
589 	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
590 	{ roff_insec, NULL, NULL, 0 },  /* trf */
591 	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
592 	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
593 	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
594 	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
595 	{ roff_TS, NULL, NULL, 0 },  /* TS */
596 	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
597 	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
598 	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
599 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
600 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
601 	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
602 	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
603 	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
604 	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
605 	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
606 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
607 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
608 	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
609 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
610 	{ roff_insec, NULL, NULL, 0 },  /* write */
611 	{ roff_insec, NULL, NULL, 0 },  /* writec */
612 	{ roff_insec, NULL, NULL, 0 },  /* writem */
613 	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
614 	{ roff_cblock, NULL, NULL, 0 },  /* . */
615 	{ roff_renamed, NULL, NULL, 0 },
616 	{ roff_userdef, NULL, NULL, 0 }
617 };
618 
619 /* Array of injected predefined strings. */
620 #define	PREDEFS_MAX	 38
621 static	const struct predef predefs[PREDEFS_MAX] = {
622 #include "predefs.in"
623 };
624 
625 static	int	 roffce_lines;	/* number of input lines to center */
626 static	struct roff_node *roffce_node;  /* active request */
627 static	int	 roffit_lines;  /* number of lines to delay */
628 static	char	*roffit_macro;  /* nil-terminated macro line */
629 
630 
631 /* --- request table ------------------------------------------------------ */
632 
633 struct ohash *
634 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
635 {
636 	struct ohash	*htab;
637 	struct roffreq	*req;
638 	enum roff_tok	 tok;
639 	size_t		 sz;
640 	unsigned int	 slot;
641 
642 	htab = mandoc_malloc(sizeof(*htab));
643 	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
644 
645 	for (tok = mintok; tok < maxtok; tok++) {
646 		if (roff_name[tok] == NULL)
647 			continue;
648 		sz = strlen(roff_name[tok]);
649 		req = mandoc_malloc(sizeof(*req) + sz + 1);
650 		req->tok = tok;
651 		memcpy(req->name, roff_name[tok], sz + 1);
652 		slot = ohash_qlookup(htab, req->name);
653 		ohash_insert(htab, slot, req);
654 	}
655 	return htab;
656 }
657 
658 void
659 roffhash_free(struct ohash *htab)
660 {
661 	struct roffreq	*req;
662 	unsigned int	 slot;
663 
664 	if (htab == NULL)
665 		return;
666 	for (req = ohash_first(htab, &slot); req != NULL;
667 	     req = ohash_next(htab, &slot))
668 		free(req);
669 	ohash_delete(htab);
670 	free(htab);
671 }
672 
673 enum roff_tok
674 roffhash_find(struct ohash *htab, const char *name, size_t sz)
675 {
676 	struct roffreq	*req;
677 	const char	*end;
678 
679 	if (sz) {
680 		end = name + sz;
681 		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
682 	} else
683 		req = ohash_find(htab, ohash_qlookup(htab, name));
684 	return req == NULL ? TOKEN_NONE : req->tok;
685 }
686 
687 /* --- stack of request blocks -------------------------------------------- */
688 
689 /*
690  * Pop the current node off of the stack of roff instructions currently
691  * pending.  Return 1 if it is a loop or 0 otherwise.
692  */
693 static int
694 roffnode_pop(struct roff *r)
695 {
696 	struct roffnode	*p;
697 	int		 inloop;
698 
699 	p = r->last;
700 	inloop = p->tok == ROFF_while;
701 	r->last = p->parent;
702 	free(p->name);
703 	free(p->end);
704 	free(p);
705 	return inloop;
706 }
707 
708 /*
709  * Push a roff node onto the instruction stack.  This must later be
710  * removed with roffnode_pop().
711  */
712 static void
713 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
714 		int line, int col)
715 {
716 	struct roffnode	*p;
717 
718 	p = mandoc_calloc(1, sizeof(struct roffnode));
719 	p->tok = tok;
720 	if (name)
721 		p->name = mandoc_strdup(name);
722 	p->parent = r->last;
723 	p->line = line;
724 	p->col = col;
725 	p->rule = p->parent ? p->parent->rule : 0;
726 
727 	r->last = p;
728 }
729 
730 /* --- roff parser state data management ---------------------------------- */
731 
732 static void
733 roff_free1(struct roff *r)
734 {
735 	int		 i;
736 
737 	tbl_free(r->first_tbl);
738 	r->first_tbl = r->last_tbl = r->tbl = NULL;
739 
740 	eqn_free(r->last_eqn);
741 	r->last_eqn = r->eqn = NULL;
742 
743 	while (r->mstackpos >= 0)
744 		roff_userret(r);
745 
746 	while (r->last)
747 		roffnode_pop(r);
748 
749 	free (r->rstack);
750 	r->rstack = NULL;
751 	r->rstacksz = 0;
752 	r->rstackpos = -1;
753 
754 	roff_freereg(r->regtab);
755 	r->regtab = NULL;
756 
757 	roff_freestr(r->strtab);
758 	roff_freestr(r->rentab);
759 	roff_freestr(r->xmbtab);
760 	r->strtab = r->rentab = r->xmbtab = NULL;
761 
762 	if (r->xtab)
763 		for (i = 0; i < 128; i++)
764 			free(r->xtab[i].p);
765 	free(r->xtab);
766 	r->xtab = NULL;
767 }
768 
769 void
770 roff_reset(struct roff *r)
771 {
772 	roff_free1(r);
773 	r->options |= MPARSE_COMMENT;
774 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
775 	r->control = '\0';
776 	r->escape = '\\';
777 	roffce_lines = 0;
778 	roffce_node = NULL;
779 	roffit_lines = 0;
780 	roffit_macro = NULL;
781 }
782 
783 void
784 roff_free(struct roff *r)
785 {
786 	int		 i;
787 
788 	roff_free1(r);
789 	for (i = 0; i < r->mstacksz; i++)
790 		free(r->mstack[i].argv);
791 	free(r->mstack);
792 	roffhash_free(r->reqtab);
793 	free(r);
794 }
795 
796 struct roff *
797 roff_alloc(int options)
798 {
799 	struct roff	*r;
800 
801 	r = mandoc_calloc(1, sizeof(struct roff));
802 	r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
803 	r->options = options | MPARSE_COMMENT;
804 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
805 	r->mstackpos = -1;
806 	r->rstackpos = -1;
807 	r->escape = '\\';
808 	return r;
809 }
810 
811 /* --- syntax tree state data management ---------------------------------- */
812 
813 static void
814 roff_man_free1(struct roff_man *man)
815 {
816 	if (man->meta.first != NULL)
817 		roff_node_delete(man, man->meta.first);
818 	free(man->meta.msec);
819 	free(man->meta.vol);
820 	free(man->meta.os);
821 	free(man->meta.arch);
822 	free(man->meta.title);
823 	free(man->meta.name);
824 	free(man->meta.date);
825 	free(man->meta.sodest);
826 }
827 
828 void
829 roff_state_reset(struct roff_man *man)
830 {
831 	man->last = man->meta.first;
832 	man->last_es = NULL;
833 	man->flags = 0;
834 	man->lastsec = man->lastnamed = SEC_NONE;
835 	man->next = ROFF_NEXT_CHILD;
836 	roff_setreg(man->roff, "nS", 0, '=');
837 }
838 
839 static void
840 roff_man_alloc1(struct roff_man *man)
841 {
842 	memset(&man->meta, 0, sizeof(man->meta));
843 	man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
844 	man->meta.first->type = ROFFT_ROOT;
845 	man->meta.macroset = MACROSET_NONE;
846 	roff_state_reset(man);
847 }
848 
849 void
850 roff_man_reset(struct roff_man *man)
851 {
852 	roff_man_free1(man);
853 	roff_man_alloc1(man);
854 }
855 
856 void
857 roff_man_free(struct roff_man *man)
858 {
859 	roff_man_free1(man);
860 	free(man->os_r);
861 	free(man);
862 }
863 
864 struct roff_man *
865 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
866 {
867 	struct roff_man *man;
868 
869 	man = mandoc_calloc(1, sizeof(*man));
870 	man->roff = roff;
871 	man->os_s = os_s;
872 	man->quick = quick;
873 	roff_man_alloc1(man);
874 	roff->man = man;
875 	return man;
876 }
877 
878 /* --- syntax tree handling ----------------------------------------------- */
879 
880 struct roff_node *
881 roff_node_alloc(struct roff_man *man, int line, int pos,
882 	enum roff_type type, int tok)
883 {
884 	struct roff_node	*n;
885 
886 	n = mandoc_calloc(1, sizeof(*n));
887 	n->line = line;
888 	n->pos = pos;
889 	n->tok = tok;
890 	n->type = type;
891 	n->sec = man->lastsec;
892 
893 	if (man->flags & MDOC_SYNOPSIS)
894 		n->flags |= NODE_SYNPRETTY;
895 	else
896 		n->flags &= ~NODE_SYNPRETTY;
897 	if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
898 		n->flags |= NODE_NOFILL;
899 	else
900 		n->flags &= ~NODE_NOFILL;
901 	if (man->flags & MDOC_NEWLINE)
902 		n->flags |= NODE_LINE;
903 	man->flags &= ~MDOC_NEWLINE;
904 
905 	return n;
906 }
907 
908 void
909 roff_node_append(struct roff_man *man, struct roff_node *n)
910 {
911 
912 	switch (man->next) {
913 	case ROFF_NEXT_SIBLING:
914 		if (man->last->next != NULL) {
915 			n->next = man->last->next;
916 			man->last->next->prev = n;
917 		} else
918 			man->last->parent->last = n;
919 		man->last->next = n;
920 		n->prev = man->last;
921 		n->parent = man->last->parent;
922 		break;
923 	case ROFF_NEXT_CHILD:
924 		if (man->last->child != NULL) {
925 			n->next = man->last->child;
926 			man->last->child->prev = n;
927 		} else
928 			man->last->last = n;
929 		man->last->child = n;
930 		n->parent = man->last;
931 		break;
932 	default:
933 		abort();
934 	}
935 	man->last = n;
936 
937 	switch (n->type) {
938 	case ROFFT_HEAD:
939 		n->parent->head = n;
940 		break;
941 	case ROFFT_BODY:
942 		if (n->end != ENDBODY_NOT)
943 			return;
944 		n->parent->body = n;
945 		break;
946 	case ROFFT_TAIL:
947 		n->parent->tail = n;
948 		break;
949 	default:
950 		return;
951 	}
952 
953 	/*
954 	 * Copy over the normalised-data pointer of our parent.  Not
955 	 * everybody has one, but copying a null pointer is fine.
956 	 */
957 
958 	n->norm = n->parent->norm;
959 	assert(n->parent->type == ROFFT_BLOCK);
960 }
961 
962 void
963 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
964 {
965 	struct roff_node	*n;
966 
967 	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
968 	n->string = roff_strdup(man->roff, word);
969 	roff_node_append(man, n);
970 	n->flags |= NODE_VALID | NODE_ENDED;
971 	man->next = ROFF_NEXT_SIBLING;
972 }
973 
974 void
975 roff_word_append(struct roff_man *man, const char *word)
976 {
977 	struct roff_node	*n;
978 	char			*addstr, *newstr;
979 
980 	n = man->last;
981 	addstr = roff_strdup(man->roff, word);
982 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
983 	free(addstr);
984 	free(n->string);
985 	n->string = newstr;
986 	man->next = ROFF_NEXT_SIBLING;
987 }
988 
989 void
990 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
991 {
992 	struct roff_node	*n;
993 
994 	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
995 	roff_node_append(man, n);
996 	man->next = ROFF_NEXT_CHILD;
997 }
998 
999 struct roff_node *
1000 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1001 {
1002 	struct roff_node	*n;
1003 
1004 	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1005 	roff_node_append(man, n);
1006 	man->next = ROFF_NEXT_CHILD;
1007 	return n;
1008 }
1009 
1010 struct roff_node *
1011 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1012 {
1013 	struct roff_node	*n;
1014 
1015 	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1016 	roff_node_append(man, n);
1017 	man->next = ROFF_NEXT_CHILD;
1018 	return n;
1019 }
1020 
1021 struct roff_node *
1022 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1023 {
1024 	struct roff_node	*n;
1025 
1026 	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1027 	roff_node_append(man, n);
1028 	man->next = ROFF_NEXT_CHILD;
1029 	return n;
1030 }
1031 
1032 static void
1033 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1034 {
1035 	struct roff_node	*n;
1036 	struct tbl_span		*span;
1037 
1038 	if (man->meta.macroset == MACROSET_MAN)
1039 		man_breakscope(man, ROFF_TS);
1040 	while ((span = tbl_span(tbl)) != NULL) {
1041 		n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1042 		n->span = span;
1043 		roff_node_append(man, n);
1044 		n->flags |= NODE_VALID | NODE_ENDED;
1045 		man->next = ROFF_NEXT_SIBLING;
1046 	}
1047 }
1048 
1049 void
1050 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1051 {
1052 
1053 	/* Adjust siblings. */
1054 
1055 	if (n->prev)
1056 		n->prev->next = n->next;
1057 	if (n->next)
1058 		n->next->prev = n->prev;
1059 
1060 	/* Adjust parent. */
1061 
1062 	if (n->parent != NULL) {
1063 		if (n->parent->child == n)
1064 			n->parent->child = n->next;
1065 		if (n->parent->last == n)
1066 			n->parent->last = n->prev;
1067 	}
1068 
1069 	/* Adjust parse point. */
1070 
1071 	if (man == NULL)
1072 		return;
1073 	if (man->last == n) {
1074 		if (n->prev == NULL) {
1075 			man->last = n->parent;
1076 			man->next = ROFF_NEXT_CHILD;
1077 		} else {
1078 			man->last = n->prev;
1079 			man->next = ROFF_NEXT_SIBLING;
1080 		}
1081 	}
1082 	if (man->meta.first == n)
1083 		man->meta.first = NULL;
1084 }
1085 
1086 void
1087 roff_node_relink(struct roff_man *man, struct roff_node *n)
1088 {
1089 	roff_node_unlink(man, n);
1090 	n->prev = n->next = NULL;
1091 	roff_node_append(man, n);
1092 }
1093 
1094 void
1095 roff_node_free(struct roff_node *n)
1096 {
1097 
1098 	if (n->args != NULL)
1099 		mdoc_argv_free(n->args);
1100 	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1101 		free(n->norm);
1102 	eqn_box_free(n->eqn);
1103 	free(n->string);
1104 	free(n->tag);
1105 	free(n);
1106 }
1107 
1108 void
1109 roff_node_delete(struct roff_man *man, struct roff_node *n)
1110 {
1111 
1112 	while (n->child != NULL)
1113 		roff_node_delete(man, n->child);
1114 	roff_node_unlink(man, n);
1115 	roff_node_free(n);
1116 }
1117 
1118 int
1119 roff_node_transparent(struct roff_node *n)
1120 {
1121 	if (n == NULL)
1122 		return 0;
1123 	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1124 		return 1;
1125 	return roff_tok_transparent(n->tok);
1126 }
1127 
1128 int
1129 roff_tok_transparent(enum roff_tok tok)
1130 {
1131 	switch (tok) {
1132 	case ROFF_ft:
1133 	case ROFF_ll:
1134 	case ROFF_mc:
1135 	case ROFF_po:
1136 	case ROFF_ta:
1137 	case MDOC_Db:
1138 	case MDOC_Es:
1139 	case MDOC_Sm:
1140 	case MDOC_Tg:
1141 	case MAN_DT:
1142 	case MAN_UC:
1143 	case MAN_PD:
1144 	case MAN_AT:
1145 		return 1;
1146 	default:
1147 		return 0;
1148 	}
1149 }
1150 
1151 struct roff_node *
1152 roff_node_child(struct roff_node *n)
1153 {
1154 	for (n = n->child; roff_node_transparent(n); n = n->next)
1155 		continue;
1156 	return n;
1157 }
1158 
1159 struct roff_node *
1160 roff_node_prev(struct roff_node *n)
1161 {
1162 	do {
1163 		n = n->prev;
1164 	} while (roff_node_transparent(n));
1165 	return n;
1166 }
1167 
1168 struct roff_node *
1169 roff_node_next(struct roff_node *n)
1170 {
1171 	do {
1172 		n = n->next;
1173 	} while (roff_node_transparent(n));
1174 	return n;
1175 }
1176 
1177 void
1178 deroff(char **dest, const struct roff_node *n)
1179 {
1180 	char	*cp;
1181 	size_t	 sz;
1182 
1183 	if (n->string == NULL) {
1184 		for (n = n->child; n != NULL; n = n->next)
1185 			deroff(dest, n);
1186 		return;
1187 	}
1188 
1189 	/* Skip leading whitespace. */
1190 
1191 	for (cp = n->string; *cp != '\0'; cp++) {
1192 		if (cp[0] == '\\' && cp[1] != '\0' &&
1193 		    strchr(" %&0^|~", cp[1]) != NULL)
1194 			cp++;
1195 		else if ( ! isspace((unsigned char)*cp))
1196 			break;
1197 	}
1198 
1199 	/* Skip trailing backslash. */
1200 
1201 	sz = strlen(cp);
1202 	if (sz > 0 && cp[sz - 1] == '\\')
1203 		sz--;
1204 
1205 	/* Skip trailing whitespace. */
1206 
1207 	for (; sz; sz--)
1208 		if ( ! isspace((unsigned char)cp[sz-1]))
1209 			break;
1210 
1211 	/* Skip empty strings. */
1212 
1213 	if (sz == 0)
1214 		return;
1215 
1216 	if (*dest == NULL) {
1217 		*dest = mandoc_strndup(cp, sz);
1218 		return;
1219 	}
1220 
1221 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1222 	free(*dest);
1223 	*dest = cp;
1224 }
1225 
1226 /* --- main functions of the roff parser ---------------------------------- */
1227 
1228 /*
1229  * Save comments preceding the title macro, for example in order to
1230  * preserve Copyright and license headers in HTML output,
1231  * provide diagnostics about RCS ids and trailing whitespace in comments,
1232  * then discard comments including preceding whitespace.
1233  * This function also handles input line continuation.
1234  */
1235 static int
1236 roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1237 {
1238 	struct roff_node *n;	/* used for header comments */
1239 	const char	*start;	/* start of the string to process */
1240 	const char	*cp;	/* for RCS id parsing */
1241 	char		*stesc;	/* start of an escape sequence ('\\') */
1242 	char		*ep;	/* end of comment string */
1243 	int		 rcsid;	/* kind of RCS id seen */
1244 
1245 	for (start = stesc = buf->buf + pos;; stesc++) {
1246 		/*
1247 		 * XXX Ugly hack: Remove the newline character that
1248 		 * mparse_buf_r() appended to mark the end of input
1249 		 * if it is not preceded by an escape character.
1250 		 */
1251 		if (stesc[0] == '\n') {
1252 			assert(stesc[1] == '\0');
1253 			stesc[0] = '\0';
1254 		}
1255 
1256 		/* The line ends without continuation or comment. */
1257 		if (stesc[0] == '\0')
1258 			return ROFF_CONT;
1259 
1260 		/* Unescaped byte: skip it. */
1261 		if (stesc[0] != ec)
1262 			continue;
1263 
1264 		/*
1265 		 * XXX Ugly hack: Do not attempt to append another line
1266 		 * if the function mparse_buf_r() appended a newline
1267 		 * character to indicate the end of input.
1268 		 */
1269 		if (stesc[1] == '\n') {
1270 			assert(stesc[2] == '\0');
1271 			stesc[0] = '\0';
1272 			return ROFF_CONT;
1273 		}
1274 
1275 		/*
1276 		 * An escape character at the end of an input line
1277 		 * requests line continuation.
1278 		 */
1279 		if (stesc[1] == '\0') {
1280 			stesc[0] = '\0';
1281 			return ROFF_IGN | ROFF_APPEND;
1282 		}
1283 
1284 		/* Found a comment: process it. */
1285 		if (stesc[1] == '"' || stesc[1] == '#')
1286 			break;
1287 
1288 		/* Escaped escape character: skip them both. */
1289 		if (stesc[1] == ec)
1290 			stesc++;
1291 	}
1292 
1293 	/* Look for an RCS id in the comment. */
1294 
1295 	rcsid = 0;
1296 	if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
1297 		rcsid = 1 << MANDOC_OS_OPENBSD;
1298 		cp += 8;
1299 	} else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
1300 		rcsid = 1 << MANDOC_OS_NETBSD;
1301 		cp += 7;
1302 	}
1303 	if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
1304 	    strchr(cp, '$') != NULL) {
1305 		if (r->man->meta.rcsids & rcsid)
1306 			mandoc_msg(MANDOCERR_RCS_REP, ln,
1307 			    (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
1308 		r->man->meta.rcsids |= rcsid;
1309 	}
1310 
1311 	/* Warn about trailing whitespace at the end of the comment. */
1312 
1313 	ep = strchr(stesc + 2, '\0') - 1;
1314 	if (*ep == '\n')
1315 		*ep-- = '\0';
1316 	if (*ep == ' ' || *ep == '\t')
1317 		mandoc_msg(MANDOCERR_SPACE_EOL,
1318 		    ln, (int)(ep - buf->buf), NULL);
1319 
1320 	/* Save comments preceding the title macro in the syntax tree. */
1321 
1322 	if (r->options & MPARSE_COMMENT) {
1323 		while (*ep == ' ' || *ep == '\t')
1324 			ep--;
1325 		ep[1] = '\0';
1326 		n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
1327 		    ROFFT_COMMENT, TOKEN_NONE);
1328 		n->string = mandoc_strdup(stesc + 2);
1329 		roff_node_append(r->man, n);
1330 		n->flags |= NODE_VALID | NODE_ENDED;
1331 		r->man->next = ROFF_NEXT_SIBLING;
1332 	}
1333 
1334 	/* The comment requests line continuation. */
1335 
1336 	if (stesc[1] == '#') {
1337 		*stesc = '\0';
1338 		return ROFF_IGN | ROFF_APPEND;
1339 	}
1340 
1341 	/* Discard the comment including preceding whitespace. */
1342 
1343 	while (stesc > start && stesc[-1] == ' ' &&
1344 	    (stesc == start + 1 || stesc[-2] != '\\'))
1345 		stesc--;
1346 	*stesc = '\0';
1347 	return ROFF_CONT;
1348 }
1349 
1350 /*
1351  * In the current line, expand escape sequences that produce parsable
1352  * input text.  Also check the syntax of the remaining escape sequences,
1353  * which typically produce output glyphs or change formatter state.
1354  */
1355 static int
1356 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1357 {
1358 	char		 ubuf[24];	/* buffer to print a number */
1359 	struct mctx	*ctx;		/* current macro call context */
1360 	const char	*res;		/* the string to be pasted */
1361 	const char	*src;		/* source for copying */
1362 	char		*dst;		/* destination for copying */
1363 	int		 iesc;		/* index of leading escape char */
1364 	int		 inam;		/* index of the escape name */
1365 	int		 iarg;		/* index beginning the argument */
1366 	int		 iendarg;	/* index right after the argument */
1367 	int		 iend;		/* index right after the sequence */
1368 	int		 isrc, idst;	/* to reduce \\ and \. in names */
1369 	int		 deftype;	/* type of definition to paste */
1370 	int		 argi;		/* macro argument index */
1371 	int		 quote_args;	/* true for \\$@, false for \\$* */
1372 	int		 asz;		/* length of the replacement */
1373 	int		 rsz;		/* length of the rest of the string */
1374 	int		 npos;		/* position in numeric expression */
1375 	int		 expand_count;	/* to avoid infinite loops */
1376 
1377 	expand_count = 0;
1378 	while (buf->buf[pos] != '\0') {
1379 
1380 		/*
1381 		 * Skip plain ASCII characters.
1382 		 * If we have a non-standard escape character,
1383 		 * escape literal backslashes because all processing in
1384 		 * subsequent functions uses the standard escaping rules.
1385 		 */
1386 
1387 		if (buf->buf[pos] != ec) {
1388 			if (ec != ASCII_ESC && buf->buf[pos] == '\\') {
1389 				roff_expand_patch(buf, pos, "\\e", pos + 1);
1390 				pos++;
1391 			}
1392 			pos++;
1393 			continue;
1394 		}
1395 
1396 		/*
1397 		 * Parse escape sequences,
1398 		 * issue diagnostic messages when appropriate,
1399 		 * and skip sequences that do not need expansion.
1400 		 * If we have a non-standard escape character, translate
1401 		 * it to backslashes and translate backslashes to \e.
1402 		 */
1403 
1404 		if (roff_escape(buf->buf, ln, pos, &iesc, &inam,
1405 		    &iarg, &iendarg, &iend) != ESCAPE_EXPAND) {
1406 			while (pos < iend) {
1407 				if (buf->buf[pos] == ec) {
1408 					buf->buf[pos] = '\\';
1409 					if (pos + 1 < iend)
1410 						pos++;
1411 				} else if (buf->buf[pos] == '\\') {
1412 					roff_expand_patch(buf,
1413 					    pos, "\\e", pos + 1);
1414 					pos++;
1415 					iend++;
1416 				}
1417 				pos++;
1418 			}
1419 			continue;
1420 		}
1421 
1422 		/* Reduce \\ and \. in names. */
1423 
1424 		if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') {
1425 			isrc = idst = iarg;
1426 			while (isrc < iendarg) {
1427 				if (isrc + 1 < iendarg &&
1428 				    buf->buf[isrc] == '\\' &&
1429 				    (buf->buf[isrc + 1] == '\\' ||
1430 				     buf->buf[isrc + 1] == '.'))
1431 					isrc++;
1432 				buf->buf[idst++] = buf->buf[isrc++];
1433 			}
1434 			iendarg -= isrc - idst;
1435 		}
1436 
1437 		/* Handle expansion. */
1438 
1439 		res = NULL;
1440 		switch (buf->buf[inam]) {
1441 		case '*':
1442 			if (iendarg == iarg)
1443 				break;
1444 			deftype = ROFFDEF_USER | ROFFDEF_PRE;
1445 			if ((res = roff_getstrn(r, buf->buf + iarg,
1446 			    iendarg - iarg, &deftype)) != NULL)
1447 				break;
1448 
1449 			/*
1450 			 * If not overridden,
1451 			 * let \*(.T through to the formatters.
1452 			 */
1453 
1454 			if (iendarg - iarg == 2 &&
1455 			    buf->buf[iarg] == '.' &&
1456 			    buf->buf[iarg + 1] == 'T') {
1457 				roff_setstrn(&r->strtab, ".T", 2, NULL, 0, 0);
1458 				pos = iend;
1459 				continue;
1460 			}
1461 
1462 			mandoc_msg(MANDOCERR_STR_UNDEF, ln, iesc,
1463 			    "%.*s", iendarg - iarg, buf->buf + iarg);
1464 			break;
1465 
1466 		case '$':
1467 			if (r->mstackpos < 0) {
1468 				mandoc_msg(MANDOCERR_ARG_UNDEF, ln, iesc,
1469 				    "%.*s", iend - iesc, buf->buf + iesc);
1470 				break;
1471 			}
1472 			ctx = r->mstack + r->mstackpos;
1473 			argi = buf->buf[iarg] - '1';
1474 			if (argi >= 0 && argi <= 8) {
1475 				if (argi < ctx->argc)
1476 					res = ctx->argv[argi];
1477 				break;
1478 			}
1479 			if (buf->buf[iarg] == '*')
1480 				quote_args = 0;
1481 			else if (buf->buf[iarg] == '@')
1482 				quote_args = 1;
1483 			else {
1484 				mandoc_msg(MANDOCERR_ARG_NONUM, ln, iesc,
1485 				    "%.*s", iend - iesc, buf->buf + iesc);
1486 				break;
1487 			}
1488 			asz = 0;
1489 			for (argi = 0; argi < ctx->argc; argi++) {
1490 				if (argi)
1491 					asz++;  /* blank */
1492 				if (quote_args)
1493 					asz += 2;  /* quotes */
1494 				asz += strlen(ctx->argv[argi]);
1495 			}
1496 			if (asz != iend - iesc) {
1497 				rsz = buf->sz - iend;
1498 				if (asz < iend - iesc)
1499 					memmove(buf->buf + iesc + asz,
1500 					    buf->buf + iend, rsz);
1501 				buf->sz = iesc + asz + rsz;
1502 				buf->buf = mandoc_realloc(buf->buf, buf->sz);
1503 				if (asz > iend - iesc)
1504 					memmove(buf->buf + iesc + asz,
1505 					    buf->buf + iend, rsz);
1506 			}
1507 			dst = buf->buf + iesc;
1508 			for (argi = 0; argi < ctx->argc; argi++) {
1509 				if (argi)
1510 					*dst++ = ' ';
1511 				if (quote_args)
1512 					*dst++ = '"';
1513 				src = ctx->argv[argi];
1514 				while (*src != '\0')
1515 					*dst++ = *src++;
1516 				if (quote_args)
1517 					*dst++ = '"';
1518 			}
1519 			continue;
1520 		case 'A':
1521 			ubuf[0] = iendarg > iarg ? '1' : '0';
1522 			ubuf[1] = '\0';
1523 			res = ubuf;
1524 			break;
1525 		case 'B':
1526 			npos = 0;
1527 			ubuf[0] = iendarg > iarg && iend > iendarg &&
1528 			    roff_evalnum(r, ln, buf->buf + iarg, &npos,
1529 					 NULL, ROFFNUM_SCALE) &&
1530 			    npos == iendarg - iarg ? '1' : '0';
1531 			ubuf[1] = '\0';
1532 			res = ubuf;
1533 			break;
1534 		case 'V':
1535 			mandoc_msg(MANDOCERR_UNSUPP, ln, iesc,
1536 			    "%.*s", iend - iesc, buf->buf + iesc);
1537 			roff_expand_patch(buf, iendarg, "}", iend);
1538 			roff_expand_patch(buf, iesc, "${", iarg);
1539 			continue;
1540 		case 'g':
1541 			break;
1542 		case 'n':
1543 			if (iendarg > iarg)
1544 				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1545 				    roff_getregn(r, buf->buf + iarg,
1546 				    iendarg - iarg, buf->buf[inam + 1]));
1547 			else
1548 				ubuf[0] = '\0';
1549 			res = ubuf;
1550 			break;
1551 		case 'w':
1552 			(void)snprintf(ubuf, sizeof(ubuf),
1553 			    "%d", (iendarg - iarg) * 24);
1554 			res = ubuf;
1555 			break;
1556 		default:
1557 			break;
1558 		}
1559 		if (res == NULL)
1560 			res = "";
1561 		if (++expand_count > EXPAND_LIMIT ||
1562 		    buf->sz + strlen(res) > SHRT_MAX) {
1563 			mandoc_msg(MANDOCERR_ROFFLOOP, ln, iesc, NULL);
1564 			return ROFF_IGN;
1565 		}
1566 		roff_expand_patch(buf, iesc, res, iend);
1567 	}
1568 	return ROFF_CONT;
1569 }
1570 
1571 /*
1572  * Replace the substring from the start position (inclusive)
1573  * to end position (exclusive) with the repl(acement) string.
1574  */
1575 static void
1576 roff_expand_patch(struct buf *buf, int start, const char *repl, int end)
1577 {
1578 	char	*nbuf;
1579 
1580 	buf->sz = mandoc_asprintf(&nbuf, "%.*s%s%s", start, buf->buf,
1581 	    repl, buf->buf + end) + 1;
1582 	free(buf->buf);
1583 	buf->buf = nbuf;
1584 }
1585 
1586 /*
1587  * Parse a quoted or unquoted roff-style request or macro argument.
1588  * Return a pointer to the parsed argument, which is either the original
1589  * pointer or advanced by one byte in case the argument is quoted.
1590  * NUL-terminate the argument in place.
1591  * Collapse pairs of quotes inside quoted arguments.
1592  * Advance the argument pointer to the next argument,
1593  * or to the NUL byte terminating the argument line.
1594  */
1595 char *
1596 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1597 {
1598 	struct buf	 buf;
1599 	char		*cp, *start;
1600 	int		 newesc, pairs, quoted, white;
1601 
1602 	/* Quoting can only start with a new word. */
1603 	start = *cpp;
1604 	quoted = 0;
1605 	if ('"' == *start) {
1606 		quoted = 1;
1607 		start++;
1608 	}
1609 
1610 	newesc = pairs = white = 0;
1611 	for (cp = start; '\0' != *cp; cp++) {
1612 
1613 		/*
1614 		 * Move the following text left
1615 		 * after quoted quotes and after "\\" and "\t".
1616 		 */
1617 		if (pairs)
1618 			cp[-pairs] = cp[0];
1619 
1620 		if ('\\' == cp[0]) {
1621 			/*
1622 			 * In copy mode, translate double to single
1623 			 * backslashes and backslash-t to literal tabs.
1624 			 */
1625 			switch (cp[1]) {
1626 			case 'a':
1627 			case 't':
1628 				cp[-pairs] = '\t';
1629 				pairs++;
1630 				cp++;
1631 				break;
1632 			case '\\':
1633 				/*
1634 				 * Signal to roff_expand() that an escape
1635 				 * sequence resulted from copy-in processing
1636 				 * and needs to be checked or interpolated.
1637 				 */
1638 				cp[-pairs] = ASCII_ESC;
1639 				newesc = 1;
1640 				pairs++;
1641 				cp++;
1642 				break;
1643 			case ' ':
1644 				/* Skip escaped blanks. */
1645 				if (0 == quoted)
1646 					cp++;
1647 				break;
1648 			default:
1649 				break;
1650 			}
1651 		} else if (0 == quoted) {
1652 			if (' ' == cp[0]) {
1653 				/* Unescaped blanks end unquoted args. */
1654 				white = 1;
1655 				break;
1656 			}
1657 		} else if ('"' == cp[0]) {
1658 			if ('"' == cp[1]) {
1659 				/* Quoted quotes collapse. */
1660 				pairs++;
1661 				cp++;
1662 			} else {
1663 				/* Unquoted quotes end quoted args. */
1664 				quoted = 2;
1665 				break;
1666 			}
1667 		}
1668 	}
1669 
1670 	/* Quoted argument without a closing quote. */
1671 	if (1 == quoted)
1672 		mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1673 
1674 	/* NUL-terminate this argument and move to the next one. */
1675 	if (pairs)
1676 		cp[-pairs] = '\0';
1677 	if ('\0' != *cp) {
1678 		*cp++ = '\0';
1679 		while (' ' == *cp)
1680 			cp++;
1681 	}
1682 	*pos += (int)(cp - start) + (quoted ? 1 : 0);
1683 	*cpp = cp;
1684 
1685 	if ('\0' == *cp && (white || ' ' == cp[-1]))
1686 		mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1687 
1688 	start = mandoc_strdup(start);
1689 	if (newesc == 0)
1690 		return start;
1691 
1692 	buf.buf = start;
1693 	buf.sz = strlen(start) + 1;
1694 	buf.next = NULL;
1695 	if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1696 		free(buf.buf);
1697 		buf.buf = mandoc_strdup("");
1698 	}
1699 	return buf.buf;
1700 }
1701 
1702 
1703 /*
1704  * Process text streams.
1705  */
1706 static int
1707 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1708 {
1709 	size_t		 sz;
1710 	const char	*start;
1711 	char		*p;
1712 	int		 isz;
1713 	enum mandoc_esc	 esc;
1714 
1715 	/* Spring the input line trap. */
1716 
1717 	if (roffit_lines == 1) {
1718 		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1719 		free(buf->buf);
1720 		buf->buf = p;
1721 		buf->sz = isz + 1;
1722 		*offs = 0;
1723 		free(roffit_macro);
1724 		roffit_lines = 0;
1725 		return ROFF_REPARSE;
1726 	} else if (roffit_lines > 1)
1727 		--roffit_lines;
1728 
1729 	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1730 		if (roffce_lines < 1) {
1731 			r->man->last = roffce_node;
1732 			r->man->next = ROFF_NEXT_SIBLING;
1733 			roffce_lines = 0;
1734 			roffce_node = NULL;
1735 		} else
1736 			roffce_lines--;
1737 	}
1738 
1739 	/* Convert all breakable hyphens into ASCII_HYPH. */
1740 
1741 	start = p = buf->buf + pos;
1742 
1743 	while (*p != '\0') {
1744 		sz = strcspn(p, "-\\");
1745 		p += sz;
1746 
1747 		if (*p == '\0')
1748 			break;
1749 
1750 		if (*p == '\\') {
1751 			/* Skip over escapes. */
1752 			p++;
1753 			esc = mandoc_escape((const char **)&p, NULL, NULL);
1754 			if (esc == ESCAPE_ERROR)
1755 				break;
1756 			while (*p == '-')
1757 				p++;
1758 			continue;
1759 		} else if (p == start) {
1760 			p++;
1761 			continue;
1762 		}
1763 
1764 		if (isalpha((unsigned char)p[-1]) &&
1765 		    isalpha((unsigned char)p[1]))
1766 			*p = ASCII_HYPH;
1767 		p++;
1768 	}
1769 	return ROFF_CONT;
1770 }
1771 
1772 int
1773 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1774 {
1775 	enum roff_tok	 t;
1776 	int		 e;
1777 	int		 pos;	/* parse point */
1778 	int		 spos;	/* saved parse point for messages */
1779 	int		 ppos;	/* original offset in buf->buf */
1780 	int		 ctl;	/* macro line (boolean) */
1781 
1782 	ppos = pos = *offs;
1783 
1784 	if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1785 	    (r->man->flags & ROFF_NOFILL) == 0 &&
1786 	    strchr(" .\\", buf->buf[pos]) == NULL &&
1787 	    buf->buf[pos] != r->control &&
1788 	    strcspn(buf->buf, " ") < 80)
1789 		mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1790 		    "%.20s...", buf->buf + pos);
1791 
1792 	/* Handle in-line equation delimiters. */
1793 
1794 	if (r->tbl == NULL &&
1795 	    r->last_eqn != NULL && r->last_eqn->delim &&
1796 	    (r->eqn == NULL || r->eqn_inline)) {
1797 		e = roff_eqndelim(r, buf, pos);
1798 		if (e == ROFF_REPARSE)
1799 			return e;
1800 		assert(e == ROFF_CONT);
1801 	}
1802 
1803 	/* Handle comments and escape sequences. */
1804 
1805 	e = roff_parse_comment(r, buf, ln, pos, r->escape);
1806 	if ((e & ROFF_MASK) == ROFF_IGN)
1807 		return e;
1808 	assert(e == ROFF_CONT);
1809 
1810 	e = roff_expand(r, buf, ln, pos, r->escape);
1811 	if ((e & ROFF_MASK) == ROFF_IGN)
1812 		return e;
1813 	assert(e == ROFF_CONT);
1814 
1815 	ctl = roff_getcontrol(r, buf->buf, &pos);
1816 
1817 	/*
1818 	 * First, if a scope is open and we're not a macro, pass the
1819 	 * text through the macro's filter.
1820 	 * Equations process all content themselves.
1821 	 * Tables process almost all content themselves, but we want
1822 	 * to warn about macros before passing it there.
1823 	 */
1824 
1825 	if (r->last != NULL && ! ctl) {
1826 		t = r->last->tok;
1827 		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1828 		if ((e & ROFF_MASK) == ROFF_IGN)
1829 			return e;
1830 		e &= ~ROFF_MASK;
1831 	} else
1832 		e = ROFF_IGN;
1833 	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1834 		eqn_read(r->eqn, buf->buf + ppos);
1835 		return e;
1836 	}
1837 	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1838 		tbl_read(r->tbl, ln, buf->buf, ppos);
1839 		roff_addtbl(r->man, ln, r->tbl);
1840 		return e;
1841 	}
1842 	if ( ! ctl) {
1843 		r->options &= ~MPARSE_COMMENT;
1844 		return roff_parsetext(r, buf, pos, offs) | e;
1845 	}
1846 
1847 	/* Skip empty request lines. */
1848 
1849 	if (buf->buf[pos] == '"') {
1850 		mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1851 		return ROFF_IGN;
1852 	} else if (buf->buf[pos] == '\0')
1853 		return ROFF_IGN;
1854 
1855 	/*
1856 	 * If a scope is open, go to the child handler for that macro,
1857 	 * as it may want to preprocess before doing anything with it.
1858 	 */
1859 
1860 	if (r->last) {
1861 		t = r->last->tok;
1862 		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1863 	}
1864 
1865 	r->options &= ~MPARSE_COMMENT;
1866 	spos = pos;
1867 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1868 	return roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
1869 }
1870 
1871 /*
1872  * Handle a new request or macro.
1873  * May be called outside any scope or from inside a conditional scope.
1874  */
1875 static int
1876 roff_req_or_macro(ROFF_ARGS) {
1877 
1878 	/* For now, tables ignore most macros and some request. */
1879 
1880 	if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS ||
1881 	    tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj ||
1882 	    tok == ROFF_sp)) {
1883 		mandoc_msg(MANDOCERR_TBLMACRO,
1884 		    ln, ppos, "%s", buf->buf + ppos);
1885 		if (tok != TOKEN_NONE)
1886 			return ROFF_IGN;
1887 		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1888 			pos++;
1889 		while (buf->buf[pos] == ' ')
1890 			pos++;
1891 		tbl_read(r->tbl, ln, buf->buf, pos);
1892 		roff_addtbl(r->man, ln, r->tbl);
1893 		return ROFF_IGN;
1894 	}
1895 
1896 	/* For now, let high level macros abort .ce mode. */
1897 
1898 	if (roffce_node != NULL &&
1899 	    (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ ||
1900 	     tok == ROFF_TH || tok == ROFF_TS)) {
1901 		r->man->last = roffce_node;
1902 		r->man->next = ROFF_NEXT_SIBLING;
1903 		roffce_lines = 0;
1904 		roffce_node = NULL;
1905 	}
1906 
1907 	/*
1908 	 * This is neither a roff request nor a user-defined macro.
1909 	 * Let the standard macro set parsers handle it.
1910 	 */
1911 
1912 	if (tok == TOKEN_NONE)
1913 		return ROFF_CONT;
1914 
1915 	/* Execute a roff request or a user-defined macro. */
1916 
1917 	return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs);
1918 }
1919 
1920 /*
1921  * Internal interface function to tell the roff parser that execution
1922  * of the current macro ended.  This is required because macro
1923  * definitions usually do not end with a .return request.
1924  */
1925 void
1926 roff_userret(struct roff *r)
1927 {
1928 	struct mctx	*ctx;
1929 	int		 i;
1930 
1931 	assert(r->mstackpos >= 0);
1932 	ctx = r->mstack + r->mstackpos;
1933 	for (i = 0; i < ctx->argc; i++)
1934 		free(ctx->argv[i]);
1935 	ctx->argc = 0;
1936 	r->mstackpos--;
1937 }
1938 
1939 void
1940 roff_endparse(struct roff *r)
1941 {
1942 	if (r->last != NULL)
1943 		mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1944 		    r->last->col, "%s", roff_name[r->last->tok]);
1945 
1946 	if (r->eqn != NULL) {
1947 		mandoc_msg(MANDOCERR_BLK_NOEND,
1948 		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1949 		eqn_parse(r->eqn);
1950 		r->eqn = NULL;
1951 	}
1952 
1953 	if (r->tbl != NULL) {
1954 		tbl_end(r->tbl, 1);
1955 		r->tbl = NULL;
1956 	}
1957 }
1958 
1959 /*
1960  * Parse the request or macro name at buf[*pos].
1961  * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value.
1962  * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE.
1963  * As a side effect, set r->current_string to the definition or to NULL.
1964  */
1965 static enum roff_tok
1966 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1967 {
1968 	char		*cp;
1969 	const char	*mac;
1970 	size_t		 maclen;
1971 	int		 deftype;
1972 	enum roff_tok	 t;
1973 
1974 	cp = buf + *pos;
1975 
1976 	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1977 		return TOKEN_NONE;
1978 
1979 	mac = cp;
1980 	maclen = roff_getname(r, &cp, ln, ppos);
1981 
1982 	deftype = ROFFDEF_USER | ROFFDEF_REN;
1983 	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1984 	switch (deftype) {
1985 	case ROFFDEF_USER:
1986 		t = ROFF_USERDEF;
1987 		break;
1988 	case ROFFDEF_REN:
1989 		t = ROFF_RENAMED;
1990 		break;
1991 	default:
1992 		t = roffhash_find(r->reqtab, mac, maclen);
1993 		break;
1994 	}
1995 	if (t != TOKEN_NONE)
1996 		*pos = cp - buf;
1997 	else if (deftype == ROFFDEF_UNDEF) {
1998 		/* Using an undefined macro defines it to be empty. */
1999 		roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2000 		roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2001 	}
2002 	return t;
2003 }
2004 
2005 /* --- handling of request blocks ----------------------------------------- */
2006 
2007 /*
2008  * Close a macro definition block or an "ignore" block.
2009  */
2010 static int
2011 roff_cblock(ROFF_ARGS)
2012 {
2013 	int	 rr;
2014 
2015 	if (r->last == NULL) {
2016 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2017 		return ROFF_IGN;
2018 	}
2019 
2020 	switch (r->last->tok) {
2021 	case ROFF_am:
2022 	case ROFF_ami:
2023 	case ROFF_de:
2024 	case ROFF_dei:
2025 	case ROFF_ig:
2026 		break;
2027 	case ROFF_am1:
2028 	case ROFF_de1:
2029 		/* Remapped in roff_block(). */
2030 		abort();
2031 	default:
2032 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2033 		return ROFF_IGN;
2034 	}
2035 
2036 	roffnode_pop(r);
2037 	roffnode_cleanscope(r);
2038 
2039 	/*
2040 	 * If a conditional block with braces is still open,
2041 	 * check for "\}" block end markers.
2042 	 */
2043 
2044 	if (r->last != NULL && r->last->endspan < 0) {
2045 		rr = 1;  /* If arguments follow "\}", warn about them. */
2046 		roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2047 	}
2048 
2049 	if (buf->buf[pos] != '\0')
2050 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2051 		    ".. %s", buf->buf + pos);
2052 
2053 	return ROFF_IGN;
2054 }
2055 
2056 /*
2057  * Pop all nodes ending at the end of the current input line.
2058  * Return the number of loops ended.
2059  */
2060 static int
2061 roffnode_cleanscope(struct roff *r)
2062 {
2063 	int inloop;
2064 
2065 	inloop = 0;
2066 	while (r->last != NULL && r->last->endspan > 0) {
2067 		if (--r->last->endspan != 0)
2068 			break;
2069 		inloop += roffnode_pop(r);
2070 	}
2071 	return inloop;
2072 }
2073 
2074 /*
2075  * Handle the closing "\}" of a conditional block.
2076  * Apart from generating warnings, this only pops nodes.
2077  * Return the number of loops ended.
2078  */
2079 static int
2080 roff_ccond(struct roff *r, int ln, int ppos)
2081 {
2082 	if (NULL == r->last) {
2083 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2084 		return 0;
2085 	}
2086 
2087 	switch (r->last->tok) {
2088 	case ROFF_el:
2089 	case ROFF_ie:
2090 	case ROFF_if:
2091 	case ROFF_while:
2092 		break;
2093 	default:
2094 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2095 		return 0;
2096 	}
2097 
2098 	if (r->last->endspan > -1) {
2099 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2100 		return 0;
2101 	}
2102 
2103 	return roffnode_pop(r) + roffnode_cleanscope(r);
2104 }
2105 
2106 static int
2107 roff_block(ROFF_ARGS)
2108 {
2109 	const char	*name, *value;
2110 	char		*call, *cp, *iname, *rname;
2111 	size_t		 csz, namesz, rsz;
2112 	int		 deftype;
2113 
2114 	/* Ignore groff compatibility mode for now. */
2115 
2116 	if (tok == ROFF_de1)
2117 		tok = ROFF_de;
2118 	else if (tok == ROFF_dei1)
2119 		tok = ROFF_dei;
2120 	else if (tok == ROFF_am1)
2121 		tok = ROFF_am;
2122 	else if (tok == ROFF_ami1)
2123 		tok = ROFF_ami;
2124 
2125 	/* Parse the macro name argument. */
2126 
2127 	cp = buf->buf + pos;
2128 	if (tok == ROFF_ig) {
2129 		iname = NULL;
2130 		namesz = 0;
2131 	} else {
2132 		iname = cp;
2133 		namesz = roff_getname(r, &cp, ln, ppos);
2134 		iname[namesz] = '\0';
2135 	}
2136 
2137 	/* Resolve the macro name argument if it is indirect. */
2138 
2139 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2140 		deftype = ROFFDEF_USER;
2141 		name = roff_getstrn(r, iname, namesz, &deftype);
2142 		if (name == NULL) {
2143 			mandoc_msg(MANDOCERR_STR_UNDEF,
2144 			    ln, (int)(iname - buf->buf),
2145 			    "%.*s", (int)namesz, iname);
2146 			namesz = 0;
2147 		} else
2148 			namesz = strlen(name);
2149 	} else
2150 		name = iname;
2151 
2152 	if (namesz == 0 && tok != ROFF_ig) {
2153 		mandoc_msg(MANDOCERR_REQ_EMPTY,
2154 		    ln, ppos, "%s", roff_name[tok]);
2155 		return ROFF_IGN;
2156 	}
2157 
2158 	roffnode_push(r, tok, name, ln, ppos);
2159 
2160 	/*
2161 	 * At the beginning of a `de' macro, clear the existing string
2162 	 * with the same name, if there is one.  New content will be
2163 	 * appended from roff_block_text() in multiline mode.
2164 	 */
2165 
2166 	if (tok == ROFF_de || tok == ROFF_dei) {
2167 		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2168 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2169 	} else if (tok == ROFF_am || tok == ROFF_ami) {
2170 		deftype = ROFFDEF_ANY;
2171 		value = roff_getstrn(r, iname, namesz, &deftype);
2172 		switch (deftype) {  /* Before appending, ... */
2173 		case ROFFDEF_PRE: /* copy predefined to user-defined. */
2174 			roff_setstrn(&r->strtab, name, namesz,
2175 			    value, strlen(value), 0);
2176 			break;
2177 		case ROFFDEF_REN: /* call original standard macro. */
2178 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2179 			    (int)strlen(value), value);
2180 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2181 			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2182 			free(call);
2183 			break;
2184 		case ROFFDEF_STD:  /* rename and call standard macro. */
2185 			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2186 			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2187 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2188 			    (int)rsz, rname);
2189 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2190 			free(call);
2191 			free(rname);
2192 			break;
2193 		default:
2194 			break;
2195 		}
2196 	}
2197 
2198 	if (*cp == '\0')
2199 		return ROFF_IGN;
2200 
2201 	/* Get the custom end marker. */
2202 
2203 	iname = cp;
2204 	namesz = roff_getname(r, &cp, ln, ppos);
2205 
2206 	/* Resolve the end marker if it is indirect. */
2207 
2208 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2209 		deftype = ROFFDEF_USER;
2210 		name = roff_getstrn(r, iname, namesz, &deftype);
2211 		if (name == NULL) {
2212 			mandoc_msg(MANDOCERR_STR_UNDEF,
2213 			    ln, (int)(iname - buf->buf),
2214 			    "%.*s", (int)namesz, iname);
2215 			namesz = 0;
2216 		} else
2217 			namesz = strlen(name);
2218 	} else
2219 		name = iname;
2220 
2221 	if (namesz)
2222 		r->last->end = mandoc_strndup(name, namesz);
2223 
2224 	if (*cp != '\0')
2225 		mandoc_msg(MANDOCERR_ARG_EXCESS,
2226 		    ln, pos, ".%s ... %s", roff_name[tok], cp);
2227 
2228 	return ROFF_IGN;
2229 }
2230 
2231 static int
2232 roff_block_sub(ROFF_ARGS)
2233 {
2234 	enum roff_tok	t;
2235 	int		i, j;
2236 
2237 	/*
2238 	 * If a custom end marker is a user-defined or predefined macro
2239 	 * or a request, interpret it.
2240 	 */
2241 
2242 	if (r->last->end) {
2243 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
2244 			if (buf->buf[i] != r->last->end[j])
2245 				break;
2246 
2247 		if (r->last->end[j] == '\0' &&
2248 		    (buf->buf[i] == '\0' ||
2249 		     buf->buf[i] == ' ' ||
2250 		     buf->buf[i] == '\t')) {
2251 			roffnode_pop(r);
2252 			roffnode_cleanscope(r);
2253 
2254 			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2255 				i++;
2256 
2257 			pos = i;
2258 			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2259 			    TOKEN_NONE)
2260 				return ROFF_RERUN;
2261 			return ROFF_IGN;
2262 		}
2263 	}
2264 
2265 	/* Handle the standard end marker. */
2266 
2267 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2268 	if (t == ROFF_cblock)
2269 		return roff_cblock(r, t, buf, ln, ppos, pos, offs);
2270 
2271 	/* Not an end marker, so append the line to the block. */
2272 
2273 	if (tok != ROFF_ig)
2274 		roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2275 	return ROFF_IGN;
2276 }
2277 
2278 static int
2279 roff_block_text(ROFF_ARGS)
2280 {
2281 
2282 	if (tok != ROFF_ig)
2283 		roff_setstr(r, r->last->name, buf->buf + pos, 2);
2284 
2285 	return ROFF_IGN;
2286 }
2287 
2288 /*
2289  * Check for a closing "\}" and handle it.
2290  * In this function, the final "int *offs" argument is used for
2291  * different purposes than elsewhere:
2292  * Input: *offs == 0: caller wants to discard arguments following \}
2293  *        *offs == 1: caller wants to preserve text following \}
2294  * Output: *offs = 0: tell caller to discard input line
2295  *         *offs = 1: tell caller to use input line
2296  */
2297 static int
2298 roff_cond_checkend(ROFF_ARGS)
2299 {
2300 	char		*ep;
2301 	int		 endloop, irc, rr;
2302 
2303 	irc = ROFF_IGN;
2304 	rr = r->last->rule;
2305 	endloop = tok != ROFF_while ? ROFF_IGN :
2306 	    rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2307 	if (roffnode_cleanscope(r))
2308 		irc |= endloop;
2309 
2310 	/*
2311 	 * If "\}" occurs on a macro line without a preceding macro or
2312 	 * a text line contains nothing else, drop the line completely.
2313 	 */
2314 
2315 	ep = buf->buf + pos;
2316 	if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2317 		rr = 0;
2318 
2319 	/*
2320 	 * The closing delimiter "\}" rewinds the conditional scope
2321 	 * but is otherwise ignored when interpreting the line.
2322 	 */
2323 
2324 	while ((ep = strchr(ep, '\\')) != NULL) {
2325 		switch (ep[1]) {
2326 		case '}':
2327 			if (ep[2] == '\0')
2328 				ep[0] = '\0';
2329 			else if (rr)
2330 				ep[1] = '&';
2331 			else
2332 				memmove(ep, ep + 2, strlen(ep + 2) + 1);
2333 			if (roff_ccond(r, ln, ep - buf->buf))
2334 				irc |= endloop;
2335 			break;
2336 		case '\0':
2337 			++ep;
2338 			break;
2339 		default:
2340 			ep += 2;
2341 			break;
2342 		}
2343 	}
2344 	*offs = rr;
2345 	return irc;
2346 }
2347 
2348 /*
2349  * Parse and process a request or macro line in conditional scope.
2350  */
2351 static int
2352 roff_cond_sub(ROFF_ARGS)
2353 {
2354 	struct roffnode	*bl;
2355 	int		 irc, rr, spos;
2356 	enum roff_tok	 t;
2357 
2358 	rr = 0;  /* If arguments follow "\}", skip them. */
2359 	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2360 	spos = pos;
2361 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2362 
2363 	/*
2364 	 * Handle requests and macros if the conditional evaluated
2365 	 * to true or if they are structurally required.
2366 	 * The .break request is always handled specially.
2367 	 */
2368 
2369 	if (t == ROFF_break) {
2370 		if (irc & ROFF_LOOPMASK)
2371 			irc = ROFF_IGN | ROFF_LOOPEXIT;
2372 		else if (rr) {
2373 			for (bl = r->last; bl != NULL; bl = bl->parent) {
2374 				bl->rule = 0;
2375 				if (bl->tok == ROFF_while)
2376 					break;
2377 			}
2378 		}
2379 	} else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) {
2380 		irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
2381 		if (irc & ROFF_WHILE)
2382 			irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
2383 	}
2384 	return irc;
2385 }
2386 
2387 /*
2388  * Parse and process a text line in conditional scope.
2389  */
2390 static int
2391 roff_cond_text(ROFF_ARGS)
2392 {
2393 	int	 irc, rr;
2394 
2395 	rr = 1;  /* If arguments follow "\}", preserve them. */
2396 	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2397 	if (rr)
2398 		irc |= ROFF_CONT;
2399 	return irc;
2400 }
2401 
2402 /* --- handling of numeric and conditional expressions -------------------- */
2403 
2404 /*
2405  * Parse a single signed integer number.  Stop at the first non-digit.
2406  * If there is at least one digit, return success and advance the
2407  * parse point, else return failure and let the parse point unchanged.
2408  * Ignore overflows, treat them just like the C language.
2409  */
2410 static int
2411 roff_getnum(const char *v, int *pos, int *res, int flags)
2412 {
2413 	int	 myres, scaled, n, p;
2414 
2415 	if (NULL == res)
2416 		res = &myres;
2417 
2418 	p = *pos;
2419 	n = v[p] == '-';
2420 	if (n || v[p] == '+')
2421 		p++;
2422 
2423 	if (flags & ROFFNUM_WHITE)
2424 		while (isspace((unsigned char)v[p]))
2425 			p++;
2426 
2427 	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2428 		*res = 10 * *res + v[p] - '0';
2429 	if (p == *pos + n)
2430 		return 0;
2431 
2432 	if (n)
2433 		*res = -*res;
2434 
2435 	/* Each number may be followed by one optional scaling unit. */
2436 
2437 	switch (v[p]) {
2438 	case 'f':
2439 		scaled = *res * 65536;
2440 		break;
2441 	case 'i':
2442 		scaled = *res * 240;
2443 		break;
2444 	case 'c':
2445 		scaled = *res * 240 / 2.54;
2446 		break;
2447 	case 'v':
2448 	case 'P':
2449 		scaled = *res * 40;
2450 		break;
2451 	case 'm':
2452 	case 'n':
2453 		scaled = *res * 24;
2454 		break;
2455 	case 'p':
2456 		scaled = *res * 10 / 3;
2457 		break;
2458 	case 'u':
2459 		scaled = *res;
2460 		break;
2461 	case 'M':
2462 		scaled = *res * 6 / 25;
2463 		break;
2464 	default:
2465 		scaled = *res;
2466 		p--;
2467 		break;
2468 	}
2469 	if (flags & ROFFNUM_SCALE)
2470 		*res = scaled;
2471 
2472 	*pos = p + 1;
2473 	return 1;
2474 }
2475 
2476 /*
2477  * Evaluate a string comparison condition.
2478  * The first character is the delimiter.
2479  * Succeed if the string up to its second occurrence
2480  * matches the string up to its third occurrence.
2481  * Advance the cursor after the third occurrence
2482  * or lacking that, to the end of the line.
2483  */
2484 static int
2485 roff_evalstrcond(const char *v, int *pos)
2486 {
2487 	const char	*s1, *s2, *s3;
2488 	int		 match;
2489 
2490 	match = 0;
2491 	s1 = v + *pos;		/* initial delimiter */
2492 	s2 = s1 + 1;		/* for scanning the first string */
2493 	s3 = strchr(s2, *s1);	/* for scanning the second string */
2494 
2495 	if (NULL == s3)		/* found no middle delimiter */
2496 		goto out;
2497 
2498 	while ('\0' != *++s3) {
2499 		if (*s2 != *s3) {  /* mismatch */
2500 			s3 = strchr(s3, *s1);
2501 			break;
2502 		}
2503 		if (*s3 == *s1) {  /* found the final delimiter */
2504 			match = 1;
2505 			break;
2506 		}
2507 		s2++;
2508 	}
2509 
2510 out:
2511 	if (NULL == s3)
2512 		s3 = strchr(s2, '\0');
2513 	else if (*s3 != '\0')
2514 		s3++;
2515 	*pos = s3 - v;
2516 	return match;
2517 }
2518 
2519 /*
2520  * Evaluate an optionally negated single character, numerical,
2521  * or string condition.
2522  */
2523 static int
2524 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2525 {
2526 	const char	*start, *end;
2527 	char		*cp, *name;
2528 	size_t		 sz;
2529 	int		 deftype, len, number, savepos, istrue, wanttrue;
2530 
2531 	if ('!' == v[*pos]) {
2532 		wanttrue = 0;
2533 		(*pos)++;
2534 	} else
2535 		wanttrue = 1;
2536 
2537 	switch (v[*pos]) {
2538 	case '\0':
2539 		return 0;
2540 	case 'n':
2541 	case 'o':
2542 		(*pos)++;
2543 		return wanttrue;
2544 	case 'e':
2545 	case 't':
2546 	case 'v':
2547 		(*pos)++;
2548 		return !wanttrue;
2549 	case 'c':
2550 		do {
2551 			(*pos)++;
2552 		} while (v[*pos] == ' ');
2553 
2554 		/*
2555 		 * Quirk for groff compatibility:
2556 		 * The horizontal tab is neither available nor unavailable.
2557 		 */
2558 
2559 		if (v[*pos] == '\t') {
2560 			(*pos)++;
2561 			return 0;
2562 		}
2563 
2564 		/* Printable ASCII characters are available. */
2565 
2566 		if (v[*pos] != '\\') {
2567 			(*pos)++;
2568 			return wanttrue;
2569 		}
2570 
2571 		end = v + ++*pos;
2572 		switch (mandoc_escape(&end, &start, &len)) {
2573 		case ESCAPE_SPECIAL:
2574 			istrue = mchars_spec2cp(start, len) != -1;
2575 			break;
2576 		case ESCAPE_UNICODE:
2577 			istrue = 1;
2578 			break;
2579 		case ESCAPE_NUMBERED:
2580 			istrue = mchars_num2char(start, len) != -1;
2581 			break;
2582 		default:
2583 			istrue = !wanttrue;
2584 			break;
2585 		}
2586 		*pos = end - v;
2587 		return istrue == wanttrue;
2588 	case 'd':
2589 	case 'r':
2590 		cp = v + *pos + 1;
2591 		while (*cp == ' ')
2592 			cp++;
2593 		name = cp;
2594 		sz = roff_getname(r, &cp, ln, cp - v);
2595 		if (sz == 0)
2596 			istrue = 0;
2597 		else if (v[*pos] == 'r')
2598 			istrue = roff_hasregn(r, name, sz);
2599 		else {
2600 			deftype = ROFFDEF_ANY;
2601 		        roff_getstrn(r, name, sz, &deftype);
2602 			istrue = !!deftype;
2603 		}
2604 		*pos = (name + sz) - v;
2605 		return istrue == wanttrue;
2606 	default:
2607 		break;
2608 	}
2609 
2610 	savepos = *pos;
2611 	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2612 		return (number > 0) == wanttrue;
2613 	else if (*pos == savepos)
2614 		return roff_evalstrcond(v, pos) == wanttrue;
2615 	else
2616 		return 0;
2617 }
2618 
2619 static int
2620 roff_line_ignore(ROFF_ARGS)
2621 {
2622 
2623 	return ROFF_IGN;
2624 }
2625 
2626 static int
2627 roff_insec(ROFF_ARGS)
2628 {
2629 
2630 	mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2631 	return ROFF_IGN;
2632 }
2633 
2634 static int
2635 roff_unsupp(ROFF_ARGS)
2636 {
2637 
2638 	mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2639 	return ROFF_IGN;
2640 }
2641 
2642 static int
2643 roff_cond(ROFF_ARGS)
2644 {
2645 	int	 irc;
2646 
2647 	roffnode_push(r, tok, NULL, ln, ppos);
2648 
2649 	/*
2650 	 * An `.el' has no conditional body: it will consume the value
2651 	 * of the current rstack entry set in prior `ie' calls or
2652 	 * defaults to DENY.
2653 	 *
2654 	 * If we're not an `el', however, then evaluate the conditional.
2655 	 */
2656 
2657 	r->last->rule = tok == ROFF_el ?
2658 	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2659 	    roff_evalcond(r, ln, buf->buf, &pos);
2660 
2661 	/*
2662 	 * An if-else will put the NEGATION of the current evaluated
2663 	 * conditional into the stack of rules.
2664 	 */
2665 
2666 	if (tok == ROFF_ie) {
2667 		if (r->rstackpos + 1 == r->rstacksz) {
2668 			r->rstacksz += 16;
2669 			r->rstack = mandoc_reallocarray(r->rstack,
2670 			    r->rstacksz, sizeof(int));
2671 		}
2672 		r->rstack[++r->rstackpos] = !r->last->rule;
2673 	}
2674 
2675 	/* If the parent has false as its rule, then so do we. */
2676 
2677 	if (r->last->parent && !r->last->parent->rule)
2678 		r->last->rule = 0;
2679 
2680 	/*
2681 	 * Determine scope.
2682 	 * If there is nothing on the line after the conditional,
2683 	 * not even whitespace, use next-line scope.
2684 	 * Except that .while does not support next-line scope.
2685 	 */
2686 
2687 	if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2688 		r->last->endspan = 2;
2689 		goto out;
2690 	}
2691 
2692 	while (buf->buf[pos] == ' ')
2693 		pos++;
2694 
2695 	/* An opening brace requests multiline scope. */
2696 
2697 	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2698 		r->last->endspan = -1;
2699 		pos += 2;
2700 		while (buf->buf[pos] == ' ')
2701 			pos++;
2702 		goto out;
2703 	}
2704 
2705 	/*
2706 	 * Anything else following the conditional causes
2707 	 * single-line scope.  Warn if the scope contains
2708 	 * nothing but trailing whitespace.
2709 	 */
2710 
2711 	if (buf->buf[pos] == '\0')
2712 		mandoc_msg(MANDOCERR_COND_EMPTY,
2713 		    ln, ppos, "%s", roff_name[tok]);
2714 
2715 	r->last->endspan = 1;
2716 
2717 out:
2718 	*offs = pos;
2719 	irc = ROFF_RERUN;
2720 	if (tok == ROFF_while)
2721 		irc |= ROFF_WHILE;
2722 	return irc;
2723 }
2724 
2725 static int
2726 roff_ds(ROFF_ARGS)
2727 {
2728 	char		*string;
2729 	const char	*name;
2730 	size_t		 namesz;
2731 
2732 	/* Ignore groff compatibility mode for now. */
2733 
2734 	if (tok == ROFF_ds1)
2735 		tok = ROFF_ds;
2736 	else if (tok == ROFF_as1)
2737 		tok = ROFF_as;
2738 
2739 	/*
2740 	 * The first word is the name of the string.
2741 	 * If it is empty or terminated by an escape sequence,
2742 	 * abort the `ds' request without defining anything.
2743 	 */
2744 
2745 	name = string = buf->buf + pos;
2746 	if (*name == '\0')
2747 		return ROFF_IGN;
2748 
2749 	namesz = roff_getname(r, &string, ln, pos);
2750 	switch (name[namesz]) {
2751 	case '\\':
2752 		return ROFF_IGN;
2753 	case '\t':
2754 		string = buf->buf + pos + namesz;
2755 		break;
2756 	default:
2757 		break;
2758 	}
2759 
2760 	/* Read past the initial double-quote, if any. */
2761 	if (*string == '"')
2762 		string++;
2763 
2764 	/* The rest is the value. */
2765 	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2766 	    ROFF_as == tok);
2767 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2768 	return ROFF_IGN;
2769 }
2770 
2771 /*
2772  * Parse a single operator, one or two characters long.
2773  * If the operator is recognized, return success and advance the
2774  * parse point, else return failure and let the parse point unchanged.
2775  */
2776 static int
2777 roff_getop(const char *v, int *pos, char *res)
2778 {
2779 
2780 	*res = v[*pos];
2781 
2782 	switch (*res) {
2783 	case '+':
2784 	case '-':
2785 	case '*':
2786 	case '/':
2787 	case '%':
2788 	case '&':
2789 	case ':':
2790 		break;
2791 	case '<':
2792 		switch (v[*pos + 1]) {
2793 		case '=':
2794 			*res = 'l';
2795 			(*pos)++;
2796 			break;
2797 		case '>':
2798 			*res = '!';
2799 			(*pos)++;
2800 			break;
2801 		case '?':
2802 			*res = 'i';
2803 			(*pos)++;
2804 			break;
2805 		default:
2806 			break;
2807 		}
2808 		break;
2809 	case '>':
2810 		switch (v[*pos + 1]) {
2811 		case '=':
2812 			*res = 'g';
2813 			(*pos)++;
2814 			break;
2815 		case '?':
2816 			*res = 'a';
2817 			(*pos)++;
2818 			break;
2819 		default:
2820 			break;
2821 		}
2822 		break;
2823 	case '=':
2824 		if ('=' == v[*pos + 1])
2825 			(*pos)++;
2826 		break;
2827 	default:
2828 		return 0;
2829 	}
2830 	(*pos)++;
2831 
2832 	return *res;
2833 }
2834 
2835 /*
2836  * Evaluate either a parenthesized numeric expression
2837  * or a single signed integer number.
2838  */
2839 static int
2840 roff_evalpar(struct roff *r, int ln,
2841 	const char *v, int *pos, int *res, int flags)
2842 {
2843 
2844 	if ('(' != v[*pos])
2845 		return roff_getnum(v, pos, res, flags);
2846 
2847 	(*pos)++;
2848 	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2849 		return 0;
2850 
2851 	/*
2852 	 * Omission of the closing parenthesis
2853 	 * is an error in validation mode,
2854 	 * but ignored in evaluation mode.
2855 	 */
2856 
2857 	if (')' == v[*pos])
2858 		(*pos)++;
2859 	else if (NULL == res)
2860 		return 0;
2861 
2862 	return 1;
2863 }
2864 
2865 /*
2866  * Evaluate a complete numeric expression.
2867  * Proceed left to right, there is no concept of precedence.
2868  */
2869 static int
2870 roff_evalnum(struct roff *r, int ln, const char *v,
2871 	int *pos, int *res, int flags)
2872 {
2873 	int		 mypos, operand2;
2874 	char		 operator;
2875 
2876 	if (NULL == pos) {
2877 		mypos = 0;
2878 		pos = &mypos;
2879 	}
2880 
2881 	if (flags & ROFFNUM_WHITE)
2882 		while (isspace((unsigned char)v[*pos]))
2883 			(*pos)++;
2884 
2885 	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2886 		return 0;
2887 
2888 	while (1) {
2889 		if (flags & ROFFNUM_WHITE)
2890 			while (isspace((unsigned char)v[*pos]))
2891 				(*pos)++;
2892 
2893 		if ( ! roff_getop(v, pos, &operator))
2894 			break;
2895 
2896 		if (flags & ROFFNUM_WHITE)
2897 			while (isspace((unsigned char)v[*pos]))
2898 				(*pos)++;
2899 
2900 		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2901 			return 0;
2902 
2903 		if (flags & ROFFNUM_WHITE)
2904 			while (isspace((unsigned char)v[*pos]))
2905 				(*pos)++;
2906 
2907 		if (NULL == res)
2908 			continue;
2909 
2910 		switch (operator) {
2911 		case '+':
2912 			*res += operand2;
2913 			break;
2914 		case '-':
2915 			*res -= operand2;
2916 			break;
2917 		case '*':
2918 			*res *= operand2;
2919 			break;
2920 		case '/':
2921 			if (operand2 == 0) {
2922 				mandoc_msg(MANDOCERR_DIVZERO,
2923 					ln, *pos, "%s", v);
2924 				*res = 0;
2925 				break;
2926 			}
2927 			*res /= operand2;
2928 			break;
2929 		case '%':
2930 			if (operand2 == 0) {
2931 				mandoc_msg(MANDOCERR_DIVZERO,
2932 					ln, *pos, "%s", v);
2933 				*res = 0;
2934 				break;
2935 			}
2936 			*res %= operand2;
2937 			break;
2938 		case '<':
2939 			*res = *res < operand2;
2940 			break;
2941 		case '>':
2942 			*res = *res > operand2;
2943 			break;
2944 		case 'l':
2945 			*res = *res <= operand2;
2946 			break;
2947 		case 'g':
2948 			*res = *res >= operand2;
2949 			break;
2950 		case '=':
2951 			*res = *res == operand2;
2952 			break;
2953 		case '!':
2954 			*res = *res != operand2;
2955 			break;
2956 		case '&':
2957 			*res = *res && operand2;
2958 			break;
2959 		case ':':
2960 			*res = *res || operand2;
2961 			break;
2962 		case 'i':
2963 			if (operand2 < *res)
2964 				*res = operand2;
2965 			break;
2966 		case 'a':
2967 			if (operand2 > *res)
2968 				*res = operand2;
2969 			break;
2970 		default:
2971 			abort();
2972 		}
2973 	}
2974 	return 1;
2975 }
2976 
2977 /* --- register management ------------------------------------------------ */
2978 
2979 void
2980 roff_setreg(struct roff *r, const char *name, int val, char sign)
2981 {
2982 	roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
2983 }
2984 
2985 static void
2986 roff_setregn(struct roff *r, const char *name, size_t len,
2987     int val, char sign, int step)
2988 {
2989 	struct roffreg	*reg;
2990 
2991 	/* Search for an existing register with the same name. */
2992 	reg = r->regtab;
2993 
2994 	while (reg != NULL && (reg->key.sz != len ||
2995 	    strncmp(reg->key.p, name, len) != 0))
2996 		reg = reg->next;
2997 
2998 	if (NULL == reg) {
2999 		/* Create a new register. */
3000 		reg = mandoc_malloc(sizeof(struct roffreg));
3001 		reg->key.p = mandoc_strndup(name, len);
3002 		reg->key.sz = len;
3003 		reg->val = 0;
3004 		reg->step = 0;
3005 		reg->next = r->regtab;
3006 		r->regtab = reg;
3007 	}
3008 
3009 	if ('+' == sign)
3010 		reg->val += val;
3011 	else if ('-' == sign)
3012 		reg->val -= val;
3013 	else
3014 		reg->val = val;
3015 	if (step != INT_MIN)
3016 		reg->step = step;
3017 }
3018 
3019 /*
3020  * Handle some predefined read-only number registers.
3021  * For now, return -1 if the requested register is not predefined;
3022  * in case a predefined read-only register having the value -1
3023  * were to turn up, another special value would have to be chosen.
3024  */
3025 static int
3026 roff_getregro(const struct roff *r, const char *name)
3027 {
3028 
3029 	switch (*name) {
3030 	case '$':  /* Number of arguments of the last macro evaluated. */
3031 		return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3032 	case 'A':  /* ASCII approximation mode is always off. */
3033 		return 0;
3034 	case 'g':  /* Groff compatibility mode is always on. */
3035 		return 1;
3036 	case 'H':  /* Fixed horizontal resolution. */
3037 		return 24;
3038 	case 'j':  /* Always adjust left margin only. */
3039 		return 0;
3040 	case 'T':  /* Some output device is always defined. */
3041 		return 1;
3042 	case 'V':  /* Fixed vertical resolution. */
3043 		return 40;
3044 	default:
3045 		return -1;
3046 	}
3047 }
3048 
3049 int
3050 roff_getreg(struct roff *r, const char *name)
3051 {
3052 	return roff_getregn(r, name, strlen(name), '\0');
3053 }
3054 
3055 static int
3056 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3057 {
3058 	struct roffreg	*reg;
3059 	int		 val;
3060 
3061 	if ('.' == name[0] && 2 == len) {
3062 		val = roff_getregro(r, name + 1);
3063 		if (-1 != val)
3064 			return val;
3065 	}
3066 
3067 	for (reg = r->regtab; reg; reg = reg->next) {
3068 		if (len == reg->key.sz &&
3069 		    0 == strncmp(name, reg->key.p, len)) {
3070 			switch (sign) {
3071 			case '+':
3072 				reg->val += reg->step;
3073 				break;
3074 			case '-':
3075 				reg->val -= reg->step;
3076 				break;
3077 			default:
3078 				break;
3079 			}
3080 			return reg->val;
3081 		}
3082 	}
3083 
3084 	roff_setregn(r, name, len, 0, '\0', INT_MIN);
3085 	return 0;
3086 }
3087 
3088 static int
3089 roff_hasregn(const struct roff *r, const char *name, size_t len)
3090 {
3091 	struct roffreg	*reg;
3092 	int		 val;
3093 
3094 	if ('.' == name[0] && 2 == len) {
3095 		val = roff_getregro(r, name + 1);
3096 		if (-1 != val)
3097 			return 1;
3098 	}
3099 
3100 	for (reg = r->regtab; reg; reg = reg->next)
3101 		if (len == reg->key.sz &&
3102 		    0 == strncmp(name, reg->key.p, len))
3103 			return 1;
3104 
3105 	return 0;
3106 }
3107 
3108 static void
3109 roff_freereg(struct roffreg *reg)
3110 {
3111 	struct roffreg	*old_reg;
3112 
3113 	while (NULL != reg) {
3114 		free(reg->key.p);
3115 		old_reg = reg;
3116 		reg = reg->next;
3117 		free(old_reg);
3118 	}
3119 }
3120 
3121 static int
3122 roff_nr(ROFF_ARGS)
3123 {
3124 	char		*key, *val, *step;
3125 	size_t		 keysz;
3126 	int		 iv, is, len;
3127 	char		 sign;
3128 
3129 	key = val = buf->buf + pos;
3130 	if (*key == '\0')
3131 		return ROFF_IGN;
3132 
3133 	keysz = roff_getname(r, &val, ln, pos);
3134 	if (key[keysz] == '\\' || key[keysz] == '\t')
3135 		return ROFF_IGN;
3136 
3137 	sign = *val;
3138 	if (sign == '+' || sign == '-')
3139 		val++;
3140 
3141 	len = 0;
3142 	if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3143 		return ROFF_IGN;
3144 
3145 	step = val + len;
3146 	while (isspace((unsigned char)*step))
3147 		step++;
3148 	if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3149 		is = INT_MIN;
3150 
3151 	roff_setregn(r, key, keysz, iv, sign, is);
3152 	return ROFF_IGN;
3153 }
3154 
3155 static int
3156 roff_rr(ROFF_ARGS)
3157 {
3158 	struct roffreg	*reg, **prev;
3159 	char		*name, *cp;
3160 	size_t		 namesz;
3161 
3162 	name = cp = buf->buf + pos;
3163 	if (*name == '\0')
3164 		return ROFF_IGN;
3165 	namesz = roff_getname(r, &cp, ln, pos);
3166 	name[namesz] = '\0';
3167 
3168 	prev = &r->regtab;
3169 	while (1) {
3170 		reg = *prev;
3171 		if (reg == NULL || !strcmp(name, reg->key.p))
3172 			break;
3173 		prev = &reg->next;
3174 	}
3175 	if (reg != NULL) {
3176 		*prev = reg->next;
3177 		free(reg->key.p);
3178 		free(reg);
3179 	}
3180 	return ROFF_IGN;
3181 }
3182 
3183 /* --- handler functions for roff requests -------------------------------- */
3184 
3185 static int
3186 roff_rm(ROFF_ARGS)
3187 {
3188 	const char	 *name;
3189 	char		 *cp;
3190 	size_t		  namesz;
3191 
3192 	cp = buf->buf + pos;
3193 	while (*cp != '\0') {
3194 		name = cp;
3195 		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3196 		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3197 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3198 		if (name[namesz] == '\\' || name[namesz] == '\t')
3199 			break;
3200 	}
3201 	return ROFF_IGN;
3202 }
3203 
3204 static int
3205 roff_it(ROFF_ARGS)
3206 {
3207 	int		 iv;
3208 
3209 	/* Parse the number of lines. */
3210 
3211 	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3212 		mandoc_msg(MANDOCERR_IT_NONUM,
3213 		    ln, ppos, "%s", buf->buf + 1);
3214 		return ROFF_IGN;
3215 	}
3216 
3217 	while (isspace((unsigned char)buf->buf[pos]))
3218 		pos++;
3219 
3220 	/*
3221 	 * Arm the input line trap.
3222 	 * Special-casing "an-trap" is an ugly workaround to cope
3223 	 * with DocBook stupidly fiddling with man(7) internals.
3224 	 */
3225 
3226 	roffit_lines = iv;
3227 	roffit_macro = mandoc_strdup(iv != 1 ||
3228 	    strcmp(buf->buf + pos, "an-trap") ?
3229 	    buf->buf + pos : "br");
3230 	return ROFF_IGN;
3231 }
3232 
3233 static int
3234 roff_Dd(ROFF_ARGS)
3235 {
3236 	int		 mask;
3237 	enum roff_tok	 t, te;
3238 
3239 	switch (tok) {
3240 	case ROFF_Dd:
3241 		tok = MDOC_Dd;
3242 		te = MDOC_MAX;
3243 		if (r->format == 0)
3244 			r->format = MPARSE_MDOC;
3245 		mask = MPARSE_MDOC | MPARSE_QUICK;
3246 		break;
3247 	case ROFF_TH:
3248 		tok = MAN_TH;
3249 		te = MAN_MAX;
3250 		if (r->format == 0)
3251 			r->format = MPARSE_MAN;
3252 		mask = MPARSE_QUICK;
3253 		break;
3254 	default:
3255 		abort();
3256 	}
3257 	if ((r->options & mask) == 0)
3258 		for (t = tok; t < te; t++)
3259 			roff_setstr(r, roff_name[t], NULL, 0);
3260 	return ROFF_CONT;
3261 }
3262 
3263 static int
3264 roff_TE(ROFF_ARGS)
3265 {
3266 	r->man->flags &= ~ROFF_NONOFILL;
3267 	if (r->tbl == NULL) {
3268 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3269 		return ROFF_IGN;
3270 	}
3271 	if (tbl_end(r->tbl, 0) == 0) {
3272 		r->tbl = NULL;
3273 		free(buf->buf);
3274 		buf->buf = mandoc_strdup(".sp");
3275 		buf->sz = 4;
3276 		*offs = 0;
3277 		return ROFF_REPARSE;
3278 	}
3279 	r->tbl = NULL;
3280 	return ROFF_IGN;
3281 }
3282 
3283 static int
3284 roff_T_(ROFF_ARGS)
3285 {
3286 
3287 	if (NULL == r->tbl)
3288 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3289 	else
3290 		tbl_restart(ln, ppos, r->tbl);
3291 
3292 	return ROFF_IGN;
3293 }
3294 
3295 /*
3296  * Handle in-line equation delimiters.
3297  */
3298 static int
3299 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3300 {
3301 	char		*cp1, *cp2;
3302 	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3303 
3304 	/*
3305 	 * Outside equations, look for an opening delimiter.
3306 	 * If we are inside an equation, we already know it is
3307 	 * in-line, or this function wouldn't have been called;
3308 	 * so look for a closing delimiter.
3309 	 */
3310 
3311 	cp1 = buf->buf + pos;
3312 	cp2 = strchr(cp1, r->eqn == NULL ?
3313 	    r->last_eqn->odelim : r->last_eqn->cdelim);
3314 	if (cp2 == NULL)
3315 		return ROFF_CONT;
3316 
3317 	*cp2++ = '\0';
3318 	bef_pr = bef_nl = aft_nl = aft_pr = "";
3319 
3320 	/* Handle preceding text, protecting whitespace. */
3321 
3322 	if (*buf->buf != '\0') {
3323 		if (r->eqn == NULL)
3324 			bef_pr = "\\&";
3325 		bef_nl = "\n";
3326 	}
3327 
3328 	/*
3329 	 * Prepare replacing the delimiter with an equation macro
3330 	 * and drop leading white space from the equation.
3331 	 */
3332 
3333 	if (r->eqn == NULL) {
3334 		while (*cp2 == ' ')
3335 			cp2++;
3336 		mac = ".EQ";
3337 	} else
3338 		mac = ".EN";
3339 
3340 	/* Handle following text, protecting whitespace. */
3341 
3342 	if (*cp2 != '\0') {
3343 		aft_nl = "\n";
3344 		if (r->eqn != NULL)
3345 			aft_pr = "\\&";
3346 	}
3347 
3348 	/* Do the actual replacement. */
3349 
3350 	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3351 	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3352 	free(buf->buf);
3353 	buf->buf = cp1;
3354 
3355 	/* Toggle the in-line state of the eqn subsystem. */
3356 
3357 	r->eqn_inline = r->eqn == NULL;
3358 	return ROFF_REPARSE;
3359 }
3360 
3361 static int
3362 roff_EQ(ROFF_ARGS)
3363 {
3364 	struct roff_node	*n;
3365 
3366 	if (r->man->meta.macroset == MACROSET_MAN)
3367 		man_breakscope(r->man, ROFF_EQ);
3368 	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3369 	if (ln > r->man->last->line)
3370 		n->flags |= NODE_LINE;
3371 	n->eqn = eqn_box_new();
3372 	roff_node_append(r->man, n);
3373 	r->man->next = ROFF_NEXT_SIBLING;
3374 
3375 	assert(r->eqn == NULL);
3376 	if (r->last_eqn == NULL)
3377 		r->last_eqn = eqn_alloc();
3378 	else
3379 		eqn_reset(r->last_eqn);
3380 	r->eqn = r->last_eqn;
3381 	r->eqn->node = n;
3382 
3383 	if (buf->buf[pos] != '\0')
3384 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3385 		    ".EQ %s", buf->buf + pos);
3386 
3387 	return ROFF_IGN;
3388 }
3389 
3390 static int
3391 roff_EN(ROFF_ARGS)
3392 {
3393 	if (r->eqn != NULL) {
3394 		eqn_parse(r->eqn);
3395 		r->eqn = NULL;
3396 	} else
3397 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3398 	if (buf->buf[pos] != '\0')
3399 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3400 		    "EN %s", buf->buf + pos);
3401 	return ROFF_IGN;
3402 }
3403 
3404 static int
3405 roff_TS(ROFF_ARGS)
3406 {
3407 	if (r->tbl != NULL) {
3408 		mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3409 		tbl_end(r->tbl, 0);
3410 	}
3411 	r->man->flags |= ROFF_NONOFILL;
3412 	r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3413 	if (r->last_tbl == NULL)
3414 		r->first_tbl = r->tbl;
3415 	r->last_tbl = r->tbl;
3416 	return ROFF_IGN;
3417 }
3418 
3419 static int
3420 roff_noarg(ROFF_ARGS)
3421 {
3422 	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3423 		man_breakscope(r->man, tok);
3424 	if (tok == ROFF_brp)
3425 		tok = ROFF_br;
3426 	roff_elem_alloc(r->man, ln, ppos, tok);
3427 	if (buf->buf[pos] != '\0')
3428 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3429 		   "%s %s", roff_name[tok], buf->buf + pos);
3430 	if (tok == ROFF_nf)
3431 		r->man->flags |= ROFF_NOFILL;
3432 	else if (tok == ROFF_fi)
3433 		r->man->flags &= ~ROFF_NOFILL;
3434 	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3435 	r->man->next = ROFF_NEXT_SIBLING;
3436 	return ROFF_IGN;
3437 }
3438 
3439 static int
3440 roff_onearg(ROFF_ARGS)
3441 {
3442 	struct roff_node	*n;
3443 	char			*cp;
3444 	int			 npos;
3445 
3446 	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3447 	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3448 	     tok == ROFF_ti))
3449 		man_breakscope(r->man, tok);
3450 
3451 	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3452 		r->man->last = roffce_node;
3453 		r->man->next = ROFF_NEXT_SIBLING;
3454 	}
3455 
3456 	roff_elem_alloc(r->man, ln, ppos, tok);
3457 	n = r->man->last;
3458 
3459 	cp = buf->buf + pos;
3460 	if (*cp != '\0') {
3461 		while (*cp != '\0' && *cp != ' ')
3462 			cp++;
3463 		while (*cp == ' ')
3464 			*cp++ = '\0';
3465 		if (*cp != '\0')
3466 			mandoc_msg(MANDOCERR_ARG_EXCESS,
3467 			    ln, (int)(cp - buf->buf),
3468 			    "%s ... %s", roff_name[tok], cp);
3469 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3470 	}
3471 
3472 	if (tok == ROFF_ce || tok == ROFF_rj) {
3473 		if (r->man->last->type == ROFFT_ELEM) {
3474 			roff_word_alloc(r->man, ln, pos, "1");
3475 			r->man->last->flags |= NODE_NOSRC;
3476 		}
3477 		npos = 0;
3478 		if (roff_evalnum(r, ln, r->man->last->string, &npos,
3479 		    &roffce_lines, 0) == 0) {
3480 			mandoc_msg(MANDOCERR_CE_NONUM,
3481 			    ln, pos, "ce %s", buf->buf + pos);
3482 			roffce_lines = 1;
3483 		}
3484 		if (roffce_lines < 1) {
3485 			r->man->last = r->man->last->parent;
3486 			roffce_node = NULL;
3487 			roffce_lines = 0;
3488 		} else
3489 			roffce_node = r->man->last->parent;
3490 	} else {
3491 		n->flags |= NODE_VALID | NODE_ENDED;
3492 		r->man->last = n;
3493 	}
3494 	n->flags |= NODE_LINE;
3495 	r->man->next = ROFF_NEXT_SIBLING;
3496 	return ROFF_IGN;
3497 }
3498 
3499 static int
3500 roff_manyarg(ROFF_ARGS)
3501 {
3502 	struct roff_node	*n;
3503 	char			*sp, *ep;
3504 
3505 	roff_elem_alloc(r->man, ln, ppos, tok);
3506 	n = r->man->last;
3507 
3508 	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3509 		while (*ep != '\0' && *ep != ' ')
3510 			ep++;
3511 		while (*ep == ' ')
3512 			*ep++ = '\0';
3513 		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3514 	}
3515 
3516 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3517 	r->man->last = n;
3518 	r->man->next = ROFF_NEXT_SIBLING;
3519 	return ROFF_IGN;
3520 }
3521 
3522 static int
3523 roff_als(ROFF_ARGS)
3524 {
3525 	char		*oldn, *newn, *end, *value;
3526 	size_t		 oldsz, newsz, valsz;
3527 
3528 	newn = oldn = buf->buf + pos;
3529 	if (*newn == '\0')
3530 		return ROFF_IGN;
3531 
3532 	newsz = roff_getname(r, &oldn, ln, pos);
3533 	if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3534 		return ROFF_IGN;
3535 
3536 	end = oldn;
3537 	oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3538 	if (oldsz == 0)
3539 		return ROFF_IGN;
3540 
3541 	valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3542 	    (int)oldsz, oldn);
3543 	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3544 	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3545 	free(value);
3546 	return ROFF_IGN;
3547 }
3548 
3549 /*
3550  * The .break request only makes sense inside conditionals,
3551  * and that case is already handled in roff_cond_sub().
3552  */
3553 static int
3554 roff_break(ROFF_ARGS)
3555 {
3556 	mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3557 	return ROFF_IGN;
3558 }
3559 
3560 static int
3561 roff_cc(ROFF_ARGS)
3562 {
3563 	const char	*p;
3564 
3565 	p = buf->buf + pos;
3566 
3567 	if (*p == '\0' || (r->control = *p++) == '.')
3568 		r->control = '\0';
3569 
3570 	if (*p != '\0')
3571 		mandoc_msg(MANDOCERR_ARG_EXCESS,
3572 		    ln, p - buf->buf, "cc ... %s", p);
3573 
3574 	return ROFF_IGN;
3575 }
3576 
3577 static int
3578 roff_char(ROFF_ARGS)
3579 {
3580 	const char	*p, *kp, *vp;
3581 	size_t		 ksz, vsz;
3582 	int		 font;
3583 
3584 	/* Parse the character to be replaced. */
3585 
3586 	kp = buf->buf + pos;
3587 	p = kp + 1;
3588 	if (*kp == '\0' || (*kp == '\\' &&
3589 	     mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3590 	    (*p != ' ' && *p != '\0')) {
3591 		mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3592 		return ROFF_IGN;
3593 	}
3594 	ksz = p - kp;
3595 	while (*p == ' ')
3596 		p++;
3597 
3598 	/*
3599 	 * If the replacement string contains a font escape sequence,
3600 	 * we have to restore the font at the end.
3601 	 */
3602 
3603 	vp = p;
3604 	vsz = strlen(p);
3605 	font = 0;
3606 	while (*p != '\0') {
3607 		if (*p++ != '\\')
3608 			continue;
3609 		switch (mandoc_escape(&p, NULL, NULL)) {
3610 		case ESCAPE_FONT:
3611 		case ESCAPE_FONTROMAN:
3612 		case ESCAPE_FONTITALIC:
3613 		case ESCAPE_FONTBOLD:
3614 		case ESCAPE_FONTBI:
3615 		case ESCAPE_FONTCR:
3616 		case ESCAPE_FONTCB:
3617 		case ESCAPE_FONTCI:
3618 		case ESCAPE_FONTPREV:
3619 			font++;
3620 			break;
3621 		default:
3622 			break;
3623 		}
3624 	}
3625 	if (font > 1)
3626 		mandoc_msg(MANDOCERR_CHAR_FONT,
3627 		    ln, (int)(vp - buf->buf), "%s", vp);
3628 
3629 	/*
3630 	 * Approximate the effect of .char using the .tr tables.
3631 	 * XXX In groff, .char and .tr interact differently.
3632 	 */
3633 
3634 	if (ksz == 1) {
3635 		if (r->xtab == NULL)
3636 			r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3637 		assert((unsigned int)*kp < 128);
3638 		free(r->xtab[(int)*kp].p);
3639 		r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3640 		    "%s%s", vp, font ? "\fP" : "");
3641 	} else {
3642 		roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3643 		if (font)
3644 			roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3645 	}
3646 	return ROFF_IGN;
3647 }
3648 
3649 static int
3650 roff_ec(ROFF_ARGS)
3651 {
3652 	const char	*p;
3653 
3654 	p = buf->buf + pos;
3655 	if (*p == '\0')
3656 		r->escape = '\\';
3657 	else {
3658 		r->escape = *p;
3659 		if (*++p != '\0')
3660 			mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3661 			    (int)(p - buf->buf), "ec ... %s", p);
3662 	}
3663 	return ROFF_IGN;
3664 }
3665 
3666 static int
3667 roff_eo(ROFF_ARGS)
3668 {
3669 	r->escape = '\0';
3670 	if (buf->buf[pos] != '\0')
3671 		mandoc_msg(MANDOCERR_ARG_SKIP,
3672 		    ln, pos, "eo %s", buf->buf + pos);
3673 	return ROFF_IGN;
3674 }
3675 
3676 static int
3677 roff_mc(ROFF_ARGS)
3678 {
3679 	struct roff_node	*n;
3680 	char			*cp;
3681 
3682 	/* Parse the first argument. */
3683 
3684 	cp = buf->buf + pos;
3685 	if (*cp != '\0')
3686 		cp++;
3687 	if (buf->buf[pos] == '\\') {
3688 		switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
3689 		case ESCAPE_SPECIAL:
3690 		case ESCAPE_UNICODE:
3691 		case ESCAPE_NUMBERED:
3692 			break;
3693 		default:
3694 			*cp = '\0';
3695 			mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
3696 			    "mc %s", buf->buf + pos);
3697 			buf->buf[pos] = '\0';
3698 			break;
3699 		}
3700 	}
3701 
3702 	/* Ignore additional arguments. */
3703 
3704 	while (*cp == ' ')
3705 		*cp++ = '\0';
3706 	if (*cp != '\0') {
3707 		mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
3708 		    "mc ... %s", cp);
3709 		*cp = '\0';
3710 	}
3711 
3712 	/* Create the .mc node. */
3713 
3714 	roff_elem_alloc(r->man, ln, ppos, tok);
3715 	n = r->man->last;
3716 	if (buf->buf[pos] != '\0')
3717 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3718 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3719 	r->man->last = n;
3720 	r->man->next = ROFF_NEXT_SIBLING;
3721 	return ROFF_IGN;
3722 }
3723 
3724 static int
3725 roff_nop(ROFF_ARGS)
3726 {
3727 	while (buf->buf[pos] == ' ')
3728 		pos++;
3729 	*offs = pos;
3730 	return ROFF_RERUN;
3731 }
3732 
3733 static int
3734 roff_tr(ROFF_ARGS)
3735 {
3736 	const char	*p, *first, *second;
3737 	size_t		 fsz, ssz;
3738 
3739 	p = buf->buf + pos;
3740 
3741 	if (*p == '\0') {
3742 		mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3743 		return ROFF_IGN;
3744 	}
3745 
3746 	while (*p != '\0') {
3747 		fsz = ssz = 1;
3748 
3749 		first = p++;
3750 		if (*first == '\\') {
3751 			if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3752 				return ROFF_IGN;
3753 			fsz = (size_t)(p - first);
3754 		}
3755 
3756 		second = p++;
3757 		if (*second == '\\') {
3758 			if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3759 				return ROFF_IGN;
3760 			ssz = (size_t)(p - second);
3761 		} else if (*second == '\0') {
3762 			mandoc_msg(MANDOCERR_TR_ODD, ln,
3763 			    (int)(first - buf->buf), "tr %s", first);
3764 			second = " ";
3765 			p--;
3766 		}
3767 
3768 		if (fsz > 1) {
3769 			roff_setstrn(&r->xmbtab, first, fsz,
3770 			    second, ssz, 0);
3771 			continue;
3772 		}
3773 
3774 		if (r->xtab == NULL)
3775 			r->xtab = mandoc_calloc(128,
3776 			    sizeof(struct roffstr));
3777 
3778 		free(r->xtab[(int)*first].p);
3779 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3780 		r->xtab[(int)*first].sz = ssz;
3781 	}
3782 
3783 	return ROFF_IGN;
3784 }
3785 
3786 /*
3787  * Implementation of the .return request.
3788  * There is no need to call roff_userret() from here.
3789  * The read module will call that after rewinding the reader stack
3790  * to the place from where the current macro was called.
3791  */
3792 static int
3793 roff_return(ROFF_ARGS)
3794 {
3795 	if (r->mstackpos >= 0)
3796 		return ROFF_IGN | ROFF_USERRET;
3797 
3798 	mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3799 	return ROFF_IGN;
3800 }
3801 
3802 static int
3803 roff_rn(ROFF_ARGS)
3804 {
3805 	const char	*value;
3806 	char		*oldn, *newn, *end;
3807 	size_t		 oldsz, newsz;
3808 	int		 deftype;
3809 
3810 	oldn = newn = buf->buf + pos;
3811 	if (*oldn == '\0')
3812 		return ROFF_IGN;
3813 
3814 	oldsz = roff_getname(r, &newn, ln, pos);
3815 	if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3816 		return ROFF_IGN;
3817 
3818 	end = newn;
3819 	newsz = roff_getname(r, &end, ln, newn - buf->buf);
3820 	if (newsz == 0)
3821 		return ROFF_IGN;
3822 
3823 	deftype = ROFFDEF_ANY;
3824 	value = roff_getstrn(r, oldn, oldsz, &deftype);
3825 	switch (deftype) {
3826 	case ROFFDEF_USER:
3827 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3828 		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3829 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3830 		break;
3831 	case ROFFDEF_PRE:
3832 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3833 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3834 		break;
3835 	case ROFFDEF_REN:
3836 		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3837 		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3838 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3839 		break;
3840 	case ROFFDEF_STD:
3841 		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3842 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3843 		break;
3844 	default:
3845 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3846 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3847 		break;
3848 	}
3849 	return ROFF_IGN;
3850 }
3851 
3852 static int
3853 roff_shift(ROFF_ARGS)
3854 {
3855 	struct mctx	*ctx;
3856 	int		 argpos, levels, i;
3857 
3858 	argpos = pos;
3859 	levels = 1;
3860 	if (buf->buf[pos] != '\0' &&
3861 	    roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3862 		mandoc_msg(MANDOCERR_CE_NONUM,
3863 		    ln, pos, "shift %s", buf->buf + pos);
3864 		levels = 1;
3865 	}
3866 	if (r->mstackpos < 0) {
3867 		mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3868 		return ROFF_IGN;
3869 	}
3870 	ctx = r->mstack + r->mstackpos;
3871 	if (levels > ctx->argc) {
3872 		mandoc_msg(MANDOCERR_SHIFT,
3873 		    ln, argpos, "%d, but max is %d", levels, ctx->argc);
3874 		levels = ctx->argc;
3875 	}
3876 	if (levels < 0) {
3877 		mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3878 		levels = 0;
3879 	}
3880 	if (levels == 0)
3881 		return ROFF_IGN;
3882 	for (i = 0; i < levels; i++)
3883 		free(ctx->argv[i]);
3884 	ctx->argc -= levels;
3885 	for (i = 0; i < ctx->argc; i++)
3886 		ctx->argv[i] = ctx->argv[i + levels];
3887 	return ROFF_IGN;
3888 }
3889 
3890 static int
3891 roff_so(ROFF_ARGS)
3892 {
3893 	char *name, *cp;
3894 
3895 	name = buf->buf + pos;
3896 	mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3897 
3898 	/*
3899 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3900 	 * opening anything that's not in our cwd or anything beneath
3901 	 * it.  Thus, explicitly disallow traversing up the file-system
3902 	 * or using absolute paths.
3903 	 */
3904 
3905 	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3906 		mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3907 		buf->sz = mandoc_asprintf(&cp,
3908 		    ".sp\nSee the file %s.\n.sp", name) + 1;
3909 		free(buf->buf);
3910 		buf->buf = cp;
3911 		*offs = 0;
3912 		return ROFF_REPARSE;
3913 	}
3914 
3915 	*offs = pos;
3916 	return ROFF_SO;
3917 }
3918 
3919 /* --- user defined strings and macros ------------------------------------ */
3920 
3921 static int
3922 roff_userdef(ROFF_ARGS)
3923 {
3924 	struct mctx	 *ctx;
3925 	char		 *arg, *ap, *dst, *src;
3926 	size_t		  sz;
3927 
3928 	/* If the macro is empty, ignore it altogether. */
3929 
3930 	if (*r->current_string == '\0')
3931 		return ROFF_IGN;
3932 
3933 	/* Initialize a new macro stack context. */
3934 
3935 	if (++r->mstackpos == r->mstacksz) {
3936 		r->mstack = mandoc_recallocarray(r->mstack,
3937 		    r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3938 		r->mstacksz += 8;
3939 	}
3940 	ctx = r->mstack + r->mstackpos;
3941 	ctx->argc = 0;
3942 
3943 	/*
3944 	 * Collect pointers to macro argument strings,
3945 	 * NUL-terminating them and escaping quotes.
3946 	 */
3947 
3948 	src = buf->buf + pos;
3949 	while (*src != '\0') {
3950 		if (ctx->argc == ctx->argsz) {
3951 			ctx->argsz += 8;
3952 			ctx->argv = mandoc_reallocarray(ctx->argv,
3953 			    ctx->argsz, sizeof(*ctx->argv));
3954 		}
3955 		arg = roff_getarg(r, &src, ln, &pos);
3956 		sz = 1;  /* For the terminating NUL. */
3957 		for (ap = arg; *ap != '\0'; ap++)
3958 			sz += *ap == '"' ? 4 : 1;
3959 		ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3960 		for (ap = arg; *ap != '\0'; ap++) {
3961 			if (*ap == '"') {
3962 				memcpy(dst, "\\(dq", 4);
3963 				dst += 4;
3964 			} else
3965 				*dst++ = *ap;
3966 		}
3967 		*dst = '\0';
3968 		free(arg);
3969 	}
3970 
3971 	/* Replace the macro invocation by the macro definition. */
3972 
3973 	free(buf->buf);
3974 	buf->buf = mandoc_strdup(r->current_string);
3975 	buf->sz = strlen(buf->buf) + 1;
3976 	*offs = 0;
3977 
3978 	return buf->buf[buf->sz - 2] == '\n' ?
3979 	    ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3980 }
3981 
3982 /*
3983  * Calling a high-level macro that was renamed with .rn.
3984  * r->current_string has already been set up by roff_parse().
3985  */
3986 static int
3987 roff_renamed(ROFF_ARGS)
3988 {
3989 	char	*nbuf;
3990 
3991 	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3992 	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3993 	free(buf->buf);
3994 	buf->buf = nbuf;
3995 	*offs = 0;
3996 	return ROFF_CONT;
3997 }
3998 
3999 /*
4000  * Measure the length in bytes of the roff identifier at *cpp
4001  * and advance the pointer to the next word.
4002  */
4003 static size_t
4004 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4005 {
4006 	char	 *name, *cp;
4007 	int	  namesz, inam, iend;
4008 
4009 	name = *cpp;
4010 	if (*name == '\0')
4011 		return 0;
4012 
4013 	/* Advance cp to the byte after the end of the name. */
4014 
4015 	cp = name;
4016 	namesz = 0;
4017 	for (;;) {
4018 		if (*cp == '\0')
4019 			break;
4020 		if (*cp == ' ' || *cp == '\t') {
4021 			cp++;
4022 			break;
4023 		}
4024 		if (*cp != '\\') {
4025 			if (name + namesz < cp) {
4026 				name[namesz] = *cp;
4027 				*cp = ' ';
4028 			}
4029 			namesz++;
4030 			cp++;
4031 			continue;
4032 		}
4033 		if (cp[1] == '{' || cp[1] == '}')
4034 			break;
4035 		if (roff_escape(cp, 0, 0, NULL, &inam,
4036 		    NULL, NULL, &iend) != ESCAPE_UNDEF) {
4037 			mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4038 			    "%.*s%.*s", namesz, name, iend, cp);
4039 			cp += iend;
4040 			break;
4041 		}
4042 
4043 		/*
4044 		 * In an identifier, \\, \., \G and so on
4045 		 * are reduced to \, ., G and so on,
4046 		 * vaguely similar to copy mode.
4047 		 */
4048 
4049 		name[namesz++] = cp[inam];
4050 		while (iend--) {
4051 			if (cp >= name + namesz)
4052 				*cp = ' ';
4053 			cp++;
4054 		}
4055 	}
4056 
4057 	/* Read past spaces. */
4058 
4059 	while (*cp == ' ')
4060 		cp++;
4061 
4062 	*cpp = cp;
4063 	return namesz;
4064 }
4065 
4066 /*
4067  * Store *string into the user-defined string called *name.
4068  * To clear an existing entry, call with (*r, *name, NULL, 0).
4069  * append == 0: replace mode
4070  * append == 1: single-line append mode
4071  * append == 2: multiline append mode, append '\n' after each call
4072  */
4073 static void
4074 roff_setstr(struct roff *r, const char *name, const char *string,
4075 	int append)
4076 {
4077 	size_t	 namesz;
4078 
4079 	namesz = strlen(name);
4080 	roff_setstrn(&r->strtab, name, namesz, string,
4081 	    string ? strlen(string) : 0, append);
4082 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4083 }
4084 
4085 static void
4086 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4087 		const char *string, size_t stringsz, int append)
4088 {
4089 	struct roffkv	*n;
4090 	char		*c;
4091 	int		 i;
4092 	size_t		 oldch, newch;
4093 
4094 	/* Search for an existing string with the same name. */
4095 	n = *r;
4096 
4097 	while (n && (namesz != n->key.sz ||
4098 			strncmp(n->key.p, name, namesz)))
4099 		n = n->next;
4100 
4101 	if (NULL == n) {
4102 		/* Create a new string table entry. */
4103 		n = mandoc_malloc(sizeof(struct roffkv));
4104 		n->key.p = mandoc_strndup(name, namesz);
4105 		n->key.sz = namesz;
4106 		n->val.p = NULL;
4107 		n->val.sz = 0;
4108 		n->next = *r;
4109 		*r = n;
4110 	} else if (0 == append) {
4111 		free(n->val.p);
4112 		n->val.p = NULL;
4113 		n->val.sz = 0;
4114 	}
4115 
4116 	if (NULL == string)
4117 		return;
4118 
4119 	/*
4120 	 * One additional byte for the '\n' in multiline mode,
4121 	 * and one for the terminating '\0'.
4122 	 */
4123 	newch = stringsz + (1 < append ? 2u : 1u);
4124 
4125 	if (NULL == n->val.p) {
4126 		n->val.p = mandoc_malloc(newch);
4127 		*n->val.p = '\0';
4128 		oldch = 0;
4129 	} else {
4130 		oldch = n->val.sz;
4131 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4132 	}
4133 
4134 	/* Skip existing content in the destination buffer. */
4135 	c = n->val.p + (int)oldch;
4136 
4137 	/* Append new content to the destination buffer. */
4138 	i = 0;
4139 	while (i < (int)stringsz) {
4140 		/*
4141 		 * Rudimentary roff copy mode:
4142 		 * Handle escaped backslashes.
4143 		 */
4144 		if ('\\' == string[i] && '\\' == string[i + 1])
4145 			i++;
4146 		*c++ = string[i++];
4147 	}
4148 
4149 	/* Append terminating bytes. */
4150 	if (1 < append)
4151 		*c++ = '\n';
4152 
4153 	*c = '\0';
4154 	n->val.sz = (int)(c - n->val.p);
4155 }
4156 
4157 static const char *
4158 roff_getstrn(struct roff *r, const char *name, size_t len,
4159     int *deftype)
4160 {
4161 	const struct roffkv	*n;
4162 	int			 found, i;
4163 	enum roff_tok		 tok;
4164 
4165 	found = 0;
4166 	for (n = r->strtab; n != NULL; n = n->next) {
4167 		if (strncmp(name, n->key.p, len) != 0 ||
4168 		    n->key.p[len] != '\0' || n->val.p == NULL)
4169 			continue;
4170 		if (*deftype & ROFFDEF_USER) {
4171 			*deftype = ROFFDEF_USER;
4172 			return n->val.p;
4173 		} else {
4174 			found = 1;
4175 			break;
4176 		}
4177 	}
4178 	for (n = r->rentab; n != NULL; n = n->next) {
4179 		if (strncmp(name, n->key.p, len) != 0 ||
4180 		    n->key.p[len] != '\0' || n->val.p == NULL)
4181 			continue;
4182 		if (*deftype & ROFFDEF_REN) {
4183 			*deftype = ROFFDEF_REN;
4184 			return n->val.p;
4185 		} else {
4186 			found = 1;
4187 			break;
4188 		}
4189 	}
4190 	for (i = 0; i < PREDEFS_MAX; i++) {
4191 		if (strncmp(name, predefs[i].name, len) != 0 ||
4192 		    predefs[i].name[len] != '\0')
4193 			continue;
4194 		if (*deftype & ROFFDEF_PRE) {
4195 			*deftype = ROFFDEF_PRE;
4196 			return predefs[i].str;
4197 		} else {
4198 			found = 1;
4199 			break;
4200 		}
4201 	}
4202 	if (r->man->meta.macroset != MACROSET_MAN) {
4203 		for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4204 			if (strncmp(name, roff_name[tok], len) != 0 ||
4205 			    roff_name[tok][len] != '\0')
4206 				continue;
4207 			if (*deftype & ROFFDEF_STD) {
4208 				*deftype = ROFFDEF_STD;
4209 				return NULL;
4210 			} else {
4211 				found = 1;
4212 				break;
4213 			}
4214 		}
4215 	}
4216 	if (r->man->meta.macroset != MACROSET_MDOC) {
4217 		for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4218 			if (strncmp(name, roff_name[tok], len) != 0 ||
4219 			    roff_name[tok][len] != '\0')
4220 				continue;
4221 			if (*deftype & ROFFDEF_STD) {
4222 				*deftype = ROFFDEF_STD;
4223 				return NULL;
4224 			} else {
4225 				found = 1;
4226 				break;
4227 			}
4228 		}
4229 	}
4230 
4231 	if (found == 0 && *deftype != ROFFDEF_ANY) {
4232 		if (*deftype & ROFFDEF_REN) {
4233 			/*
4234 			 * This might still be a request,
4235 			 * so do not treat it as undefined yet.
4236 			 */
4237 			*deftype = ROFFDEF_UNDEF;
4238 			return NULL;
4239 		}
4240 
4241 		/* Using an undefined string defines it to be empty. */
4242 
4243 		roff_setstrn(&r->strtab, name, len, "", 0, 0);
4244 		roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4245 	}
4246 
4247 	*deftype = 0;
4248 	return NULL;
4249 }
4250 
4251 static void
4252 roff_freestr(struct roffkv *r)
4253 {
4254 	struct roffkv	 *n, *nn;
4255 
4256 	for (n = r; n; n = nn) {
4257 		free(n->key.p);
4258 		free(n->val.p);
4259 		nn = n->next;
4260 		free(n);
4261 	}
4262 }
4263 
4264 /* --- accessors and utility functions ------------------------------------ */
4265 
4266 /*
4267  * Duplicate an input string, making the appropriate character
4268  * conversations (as stipulated by `tr') along the way.
4269  * Returns a heap-allocated string with all the replacements made.
4270  */
4271 char *
4272 roff_strdup(const struct roff *r, const char *p)
4273 {
4274 	const struct roffkv *cp;
4275 	char		*res;
4276 	const char	*pp;
4277 	size_t		 ssz, sz;
4278 	enum mandoc_esc	 esc;
4279 
4280 	if (NULL == r->xmbtab && NULL == r->xtab)
4281 		return mandoc_strdup(p);
4282 	else if ('\0' == *p)
4283 		return mandoc_strdup("");
4284 
4285 	/*
4286 	 * Step through each character looking for term matches
4287 	 * (remember that a `tr' can be invoked with an escape, which is
4288 	 * a glyph but the escape is multi-character).
4289 	 * We only do this if the character hash has been initialised
4290 	 * and the string is >0 length.
4291 	 */
4292 
4293 	res = NULL;
4294 	ssz = 0;
4295 
4296 	while ('\0' != *p) {
4297 		assert((unsigned int)*p < 128);
4298 		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4299 			sz = r->xtab[(int)*p].sz;
4300 			res = mandoc_realloc(res, ssz + sz + 1);
4301 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4302 			ssz += sz;
4303 			p++;
4304 			continue;
4305 		} else if ('\\' != *p) {
4306 			res = mandoc_realloc(res, ssz + 2);
4307 			res[ssz++] = *p++;
4308 			continue;
4309 		}
4310 
4311 		/* Search for term matches. */
4312 		for (cp = r->xmbtab; cp; cp = cp->next)
4313 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
4314 				break;
4315 
4316 		if (NULL != cp) {
4317 			/*
4318 			 * A match has been found.
4319 			 * Append the match to the array and move
4320 			 * forward by its keysize.
4321 			 */
4322 			res = mandoc_realloc(res,
4323 			    ssz + cp->val.sz + 1);
4324 			memcpy(res + ssz, cp->val.p, cp->val.sz);
4325 			ssz += cp->val.sz;
4326 			p += (int)cp->key.sz;
4327 			continue;
4328 		}
4329 
4330 		/*
4331 		 * Handle escapes carefully: we need to copy
4332 		 * over just the escape itself, or else we might
4333 		 * do replacements within the escape itself.
4334 		 * Make sure to pass along the bogus string.
4335 		 */
4336 		pp = p++;
4337 		esc = mandoc_escape(&p, NULL, NULL);
4338 		if (ESCAPE_ERROR == esc) {
4339 			sz = strlen(pp);
4340 			res = mandoc_realloc(res, ssz + sz + 1);
4341 			memcpy(res + ssz, pp, sz);
4342 			break;
4343 		}
4344 		/*
4345 		 * We bail out on bad escapes.
4346 		 * No need to warn: we already did so when
4347 		 * roff_expand() was called.
4348 		 */
4349 		sz = (int)(p - pp);
4350 		res = mandoc_realloc(res, ssz + sz + 1);
4351 		memcpy(res + ssz, pp, sz);
4352 		ssz += sz;
4353 	}
4354 
4355 	res[(int)ssz] = '\0';
4356 	return res;
4357 }
4358 
4359 int
4360 roff_getformat(const struct roff *r)
4361 {
4362 
4363 	return r->format;
4364 }
4365 
4366 /*
4367  * Find out whether a line is a macro line or not.
4368  * If it is, adjust the current position and return one; if it isn't,
4369  * return zero and don't change the current position.
4370  * If the control character has been set with `.cc', then let that grain
4371  * precedence.
4372  * This is slightly contrary to groff, where using the non-breaking
4373  * control character when `cc' has been invoked will cause the
4374  * non-breaking macro contents to be printed verbatim.
4375  */
4376 int
4377 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4378 {
4379 	int		pos;
4380 
4381 	pos = *ppos;
4382 
4383 	if (r->control != '\0' && cp[pos] == r->control)
4384 		pos++;
4385 	else if (r->control != '\0')
4386 		return 0;
4387 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4388 		pos += 2;
4389 	else if ('.' == cp[pos] || '\'' == cp[pos])
4390 		pos++;
4391 	else
4392 		return 0;
4393 
4394 	while (' ' == cp[pos] || '\t' == cp[pos])
4395 		pos++;
4396 
4397 	*ppos = pos;
4398 	return 1;
4399 }
4400