xref: /openbsd-src/usr.bin/mandoc/roff.c (revision d59bb9942320b767f2a19aaa7690c8c6e30b724c)
1 /*	$OpenBSD: roff.c,v 1.163 2017/03/03 13:55:06 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <ctype.h>
22 #include <limits.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 
27 #include "mandoc.h"
28 #include "mandoc_aux.h"
29 #include "roff.h"
30 #include "libmandoc.h"
31 #include "roff_int.h"
32 #include "libroff.h"
33 
34 /* Maximum number of string expansions per line, to break infinite loops. */
35 #define	EXPAND_LIMIT	1000
36 
37 /* --- data types --------------------------------------------------------- */
38 
39 enum	rofft {
40 	ROFF_ab,
41 	ROFF_ad,
42 	ROFF_af,
43 	ROFF_aln,
44 	ROFF_als,
45 	ROFF_am,
46 	ROFF_am1,
47 	ROFF_ami,
48 	ROFF_ami1,
49 	ROFF_as,
50 	ROFF_as1,
51 	ROFF_asciify,
52 	ROFF_backtrace,
53 	ROFF_bd,
54 	ROFF_bleedat,
55 	ROFF_blm,
56 	ROFF_box,
57 	ROFF_boxa,
58 	ROFF_bp,
59 	ROFF_BP,
60 	/* MAN_br, MDOC_br */
61 	ROFF_break,
62 	ROFF_breakchar,
63 	ROFF_brnl,
64 	ROFF_brp,
65 	ROFF_brpnl,
66 	ROFF_c2,
67 	ROFF_cc,
68 	ROFF_ce,
69 	ROFF_cf,
70 	ROFF_cflags,
71 	ROFF_ch,
72 	ROFF_char,
73 	ROFF_chop,
74 	ROFF_class,
75 	ROFF_close,
76 	ROFF_CL,
77 	ROFF_color,
78 	ROFF_composite,
79 	ROFF_continue,
80 	ROFF_cp,
81 	ROFF_cropat,
82 	ROFF_cs,
83 	ROFF_cu,
84 	ROFF_da,
85 	ROFF_dch,
86 	ROFF_Dd,
87 	ROFF_de,
88 	ROFF_de1,
89 	ROFF_defcolor,
90 	ROFF_dei,
91 	ROFF_dei1,
92 	ROFF_device,
93 	ROFF_devicem,
94 	ROFF_di,
95 	ROFF_do,
96 	ROFF_ds,
97 	ROFF_ds1,
98 	ROFF_dwh,
99 	ROFF_dt,
100 	ROFF_ec,
101 	ROFF_ecr,
102 	ROFF_ecs,
103 	ROFF_el,
104 	ROFF_em,
105 	ROFF_EN,
106 	ROFF_eo,
107 	ROFF_EP,
108 	ROFF_EQ,
109 	ROFF_errprint,
110 	ROFF_ev,
111 	ROFF_evc,
112 	ROFF_ex,
113 	ROFF_fallback,
114 	ROFF_fam,
115 	ROFF_fc,
116 	ROFF_fchar,
117 	ROFF_fcolor,
118 	ROFF_fdeferlig,
119 	ROFF_feature,
120 	/* MAN_fi; ignored in mdoc(7) */
121 	ROFF_fkern,
122 	ROFF_fl,
123 	ROFF_flig,
124 	ROFF_fp,
125 	ROFF_fps,
126 	ROFF_fschar,
127 	ROFF_fspacewidth,
128 	ROFF_fspecial,
129 	/* MAN_ft; ignored in mdoc(7) */
130 	ROFF_ftr,
131 	ROFF_fzoom,
132 	ROFF_gcolor,
133 	ROFF_hc,
134 	ROFF_hcode,
135 	ROFF_hidechar,
136 	ROFF_hla,
137 	ROFF_hlm,
138 	ROFF_hpf,
139 	ROFF_hpfa,
140 	ROFF_hpfcode,
141 	ROFF_hw,
142 	ROFF_hy,
143 	ROFF_hylang,
144 	ROFF_hylen,
145 	ROFF_hym,
146 	ROFF_hypp,
147 	ROFF_hys,
148 	ROFF_ie,
149 	ROFF_if,
150 	ROFF_ig,
151 	/* MAN_in; ignored in mdoc(7) */
152 	ROFF_index,
153 	ROFF_it,
154 	ROFF_itc,
155 	ROFF_IX,
156 	ROFF_kern,
157 	ROFF_kernafter,
158 	ROFF_kernbefore,
159 	ROFF_kernpair,
160 	ROFF_lc,
161 	ROFF_lc_ctype,
162 	ROFF_lds,
163 	ROFF_length,
164 	ROFF_letadj,
165 	ROFF_lf,
166 	ROFF_lg,
167 	ROFF_lhang,
168 	ROFF_linetabs,
169 	/* MAN_ll, MDOC_ll */
170 	ROFF_lnr,
171 	ROFF_lnrf,
172 	ROFF_lpfx,
173 	ROFF_ls,
174 	ROFF_lsm,
175 	ROFF_lt,
176 	ROFF_mc,
177 	ROFF_mediasize,
178 	ROFF_minss,
179 	ROFF_mk,
180 	ROFF_mso,
181 	ROFF_na,
182 	ROFF_ne,
183 	/* MAN_nf; ignored in mdoc(7) */
184 	ROFF_nh,
185 	ROFF_nhychar,
186 	ROFF_nm,
187 	ROFF_nn,
188 	ROFF_nop,
189 	ROFF_nr,
190 	ROFF_nrf,
191 	ROFF_nroff,
192 	ROFF_ns,
193 	ROFF_nx,
194 	ROFF_open,
195 	ROFF_opena,
196 	ROFF_os,
197 	ROFF_output,
198 	ROFF_padj,
199 	ROFF_papersize,
200 	ROFF_pc,
201 	ROFF_pev,
202 	ROFF_pi,
203 	ROFF_PI,
204 	ROFF_pl,
205 	ROFF_pm,
206 	ROFF_pn,
207 	ROFF_pnr,
208 	ROFF_po,
209 	ROFF_ps,
210 	ROFF_psbb,
211 	ROFF_pshape,
212 	ROFF_pso,
213 	ROFF_ptr,
214 	ROFF_pvs,
215 	ROFF_rchar,
216 	ROFF_rd,
217 	ROFF_recursionlimit,
218 	ROFF_return,
219 	ROFF_rfschar,
220 	ROFF_rhang,
221 	ROFF_rj,
222 	ROFF_rm,
223 	ROFF_rn,
224 	ROFF_rnn,
225 	ROFF_rr,
226 	ROFF_rs,
227 	ROFF_rt,
228 	ROFF_schar,
229 	ROFF_sentchar,
230 	ROFF_shc,
231 	ROFF_shift,
232 	ROFF_sizes,
233 	ROFF_so,
234 	/* MAN_sp, MDOC_sp */
235 	ROFF_spacewidth,
236 	ROFF_special,
237 	ROFF_spreadwarn,
238 	ROFF_ss,
239 	ROFF_sty,
240 	ROFF_substring,
241 	ROFF_sv,
242 	ROFF_sy,
243 	ROFF_T_,
244 	ROFF_ta,
245 	ROFF_tc,
246 	ROFF_TE,
247 	ROFF_TH,
248 	ROFF_ti,
249 	ROFF_tkf,
250 	ROFF_tl,
251 	ROFF_tm,
252 	ROFF_tm1,
253 	ROFF_tmc,
254 	ROFF_tr,
255 	ROFF_track,
256 	ROFF_transchar,
257 	ROFF_trf,
258 	ROFF_trimat,
259 	ROFF_trin,
260 	ROFF_trnt,
261 	ROFF_troff,
262 	ROFF_TS,
263 	ROFF_uf,
264 	ROFF_ul,
265 	ROFF_unformat,
266 	ROFF_unwatch,
267 	ROFF_unwatchn,
268 	ROFF_vpt,
269 	ROFF_vs,
270 	ROFF_warn,
271 	ROFF_warnscale,
272 	ROFF_watch,
273 	ROFF_watchlength,
274 	ROFF_watchn,
275 	ROFF_wh,
276 	ROFF_while,
277 	ROFF_write,
278 	ROFF_writec,
279 	ROFF_writem,
280 	ROFF_xflag,
281 	ROFF_cblock,
282 	ROFF_USERDEF,
283 	ROFF_MAX
284 };
285 
286 /*
287  * An incredibly-simple string buffer.
288  */
289 struct	roffstr {
290 	char		*p; /* nil-terminated buffer */
291 	size_t		 sz; /* saved strlen(p) */
292 };
293 
294 /*
295  * A key-value roffstr pair as part of a singly-linked list.
296  */
297 struct	roffkv {
298 	struct roffstr	 key;
299 	struct roffstr	 val;
300 	struct roffkv	*next; /* next in list */
301 };
302 
303 /*
304  * A single number register as part of a singly-linked list.
305  */
306 struct	roffreg {
307 	struct roffstr	 key;
308 	int		 val;
309 	struct roffreg	*next;
310 };
311 
312 struct	roff {
313 	struct mparse	*parse; /* parse point */
314 	struct roffnode	*last; /* leaf of stack */
315 	int		*rstack; /* stack of inverted `ie' values */
316 	struct roffreg	*regtab; /* number registers */
317 	struct roffkv	*strtab; /* user-defined strings & macros */
318 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
319 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
320 	const char	*current_string; /* value of last called user macro */
321 	struct tbl_node	*first_tbl; /* first table parsed */
322 	struct tbl_node	*last_tbl; /* last table parsed */
323 	struct tbl_node	*tbl; /* current table being parsed */
324 	struct eqn_node	*last_eqn; /* last equation parsed */
325 	struct eqn_node	*first_eqn; /* first equation parsed */
326 	struct eqn_node	*eqn; /* current equation being parsed */
327 	int		 eqn_inline; /* current equation is inline */
328 	int		 options; /* parse options */
329 	int		 rstacksz; /* current size limit of rstack */
330 	int		 rstackpos; /* position in rstack */
331 	int		 format; /* current file in mdoc or man format */
332 	int		 argc; /* number of args of the last macro */
333 	char		 control; /* control character */
334 };
335 
336 struct	roffnode {
337 	enum rofft	 tok; /* type of node */
338 	struct roffnode	*parent; /* up one in stack */
339 	int		 line; /* parse line */
340 	int		 col; /* parse col */
341 	char		*name; /* node name, e.g. macro name */
342 	char		*end; /* end-rules: custom token */
343 	int		 endspan; /* end-rules: next-line or infty */
344 	int		 rule; /* current evaluation rule */
345 };
346 
347 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
348 			 enum rofft tok, /* tok of macro */ \
349 			 struct buf *buf, /* input buffer */ \
350 			 int ln, /* parse line */ \
351 			 int ppos, /* original pos in buffer */ \
352 			 int pos, /* current pos in buffer */ \
353 			 int *offs /* reset offset of buffer data */
354 
355 typedef	enum rofferr (*roffproc)(ROFF_ARGS);
356 
357 struct	roffmac {
358 	const char	*name; /* macro name */
359 	roffproc	 proc; /* process new macro */
360 	roffproc	 text; /* process as child text of macro */
361 	roffproc	 sub; /* process as child of macro */
362 	int		 flags;
363 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
364 	struct roffmac	*next;
365 };
366 
367 struct	predef {
368 	const char	*name; /* predefined input name */
369 	const char	*str; /* replacement symbol */
370 };
371 
372 #define	PREDEF(__name, __str) \
373 	{ (__name), (__str) },
374 
375 /* --- function prototypes ------------------------------------------------ */
376 
377 static	enum rofft	 roffhash_find(const char *, size_t);
378 static	void		 roffhash_init(void);
379 static	void		 roffnode_cleanscope(struct roff *);
380 static	void		 roffnode_pop(struct roff *);
381 static	void		 roffnode_push(struct roff *, enum rofft,
382 				const char *, int, int);
383 static	enum rofferr	 roff_block(ROFF_ARGS);
384 static	enum rofferr	 roff_block_text(ROFF_ARGS);
385 static	enum rofferr	 roff_block_sub(ROFF_ARGS);
386 static	enum rofferr	 roff_brp(ROFF_ARGS);
387 static	enum rofferr	 roff_cblock(ROFF_ARGS);
388 static	enum rofferr	 roff_cc(ROFF_ARGS);
389 static	void		 roff_ccond(struct roff *, int, int);
390 static	enum rofferr	 roff_cond(ROFF_ARGS);
391 static	enum rofferr	 roff_cond_text(ROFF_ARGS);
392 static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
393 static	enum rofferr	 roff_ds(ROFF_ARGS);
394 static	enum rofferr	 roff_eqndelim(struct roff *, struct buf *, int);
395 static	int		 roff_evalcond(struct roff *r, int, char *, int *);
396 static	int		 roff_evalnum(struct roff *, int,
397 				const char *, int *, int *, int);
398 static	int		 roff_evalpar(struct roff *, int,
399 				const char *, int *, int *, int);
400 static	int		 roff_evalstrcond(const char *, int *);
401 static	void		 roff_free1(struct roff *);
402 static	void		 roff_freereg(struct roffreg *);
403 static	void		 roff_freestr(struct roffkv *);
404 static	size_t		 roff_getname(struct roff *, char **, int, int);
405 static	int		 roff_getnum(const char *, int *, int *, int);
406 static	int		 roff_getop(const char *, int *, char *);
407 static	int		 roff_getregn(const struct roff *,
408 				const char *, size_t);
409 static	int		 roff_getregro(const struct roff *,
410 				const char *name);
411 static	const char	*roff_getstrn(const struct roff *,
412 				const char *, size_t);
413 static	int		 roff_hasregn(const struct roff *,
414 				const char *, size_t);
415 static	enum rofferr	 roff_insec(ROFF_ARGS);
416 static	enum rofferr	 roff_it(ROFF_ARGS);
417 static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
418 static	void		 roff_man_alloc1(struct roff_man *);
419 static	void		 roff_man_free1(struct roff_man *);
420 static	enum rofferr	 roff_nr(ROFF_ARGS);
421 static	enum rofft	 roff_parse(struct roff *, char *, int *,
422 				int, int);
423 static	enum rofferr	 roff_parsetext(struct buf *, int, int *);
424 static	enum rofferr	 roff_res(struct roff *, struct buf *, int, int);
425 static	enum rofferr	 roff_rm(ROFF_ARGS);
426 static	enum rofferr	 roff_rr(ROFF_ARGS);
427 static	void		 roff_setstr(struct roff *,
428 				const char *, const char *, int);
429 static	void		 roff_setstrn(struct roffkv **, const char *,
430 				size_t, const char *, size_t, int);
431 static	enum rofferr	 roff_so(ROFF_ARGS);
432 static	enum rofferr	 roff_tr(ROFF_ARGS);
433 static	enum rofferr	 roff_Dd(ROFF_ARGS);
434 static	enum rofferr	 roff_TH(ROFF_ARGS);
435 static	enum rofferr	 roff_TE(ROFF_ARGS);
436 static	enum rofferr	 roff_TS(ROFF_ARGS);
437 static	enum rofferr	 roff_EQ(ROFF_ARGS);
438 static	enum rofferr	 roff_EN(ROFF_ARGS);
439 static	enum rofferr	 roff_T_(ROFF_ARGS);
440 static	enum rofferr	 roff_unsupp(ROFF_ARGS);
441 static	enum rofferr	 roff_userdef(ROFF_ARGS);
442 
443 /* --- constant data ------------------------------------------------------ */
444 
445 /* See roffhash_find() */
446 
447 #define	ASCII_HI	 126
448 #define	ASCII_LO	 33
449 #define	HASHWIDTH	(ASCII_HI - ASCII_LO + 1)
450 
451 #define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
452 #define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
453 
454 static	struct roffmac	*hash[HASHWIDTH];
455 
456 static	struct roffmac	 roffs[ROFF_MAX] = {
457 	{ "ab", roff_unsupp, NULL, NULL, 0, NULL },
458 	{ "ad", roff_line_ignore, NULL, NULL, 0, NULL },
459 	{ "af", roff_line_ignore, NULL, NULL, 0, NULL },
460 	{ "aln", roff_unsupp, NULL, NULL, 0, NULL },
461 	{ "als", roff_unsupp, NULL, NULL, 0, NULL },
462 	{ "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
463 	{ "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
464 	{ "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
465 	{ "ami1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
466 	{ "as", roff_ds, NULL, NULL, 0, NULL },
467 	{ "as1", roff_ds, NULL, NULL, 0, NULL },
468 	{ "asciify", roff_unsupp, NULL, NULL, 0, NULL },
469 	{ "backtrace", roff_line_ignore, NULL, NULL, 0, NULL },
470 	{ "bd", roff_line_ignore, NULL, NULL, 0, NULL },
471 	{ "bleedat", roff_line_ignore, NULL, NULL, 0, NULL },
472 	{ "blm", roff_unsupp, NULL, NULL, 0, NULL },
473 	{ "box", roff_unsupp, NULL, NULL, 0, NULL },
474 	{ "boxa", roff_unsupp, NULL, NULL, 0, NULL },
475 	{ "bp", roff_line_ignore, NULL, NULL, 0, NULL },
476 	{ "BP", roff_unsupp, NULL, NULL, 0, NULL },
477 	{ "break", roff_unsupp, NULL, NULL, 0, NULL },
478 	{ "breakchar", roff_line_ignore, NULL, NULL, 0, NULL },
479 	{ "brnl", roff_line_ignore, NULL, NULL, 0, NULL },
480 	{ "brp", roff_brp, NULL, NULL, 0, NULL },
481 	{ "brpnl", roff_line_ignore, NULL, NULL, 0, NULL },
482 	{ "c2", roff_unsupp, NULL, NULL, 0, NULL },
483 	{ "cc", roff_cc, NULL, NULL, 0, NULL },
484 	{ "ce", roff_line_ignore, NULL, NULL, 0, NULL },
485 	{ "cf", roff_insec, NULL, NULL, 0, NULL },
486 	{ "cflags", roff_line_ignore, NULL, NULL, 0, NULL },
487 	{ "ch", roff_line_ignore, NULL, NULL, 0, NULL },
488 	{ "char", roff_unsupp, NULL, NULL, 0, NULL },
489 	{ "chop", roff_unsupp, NULL, NULL, 0, NULL },
490 	{ "class", roff_line_ignore, NULL, NULL, 0, NULL },
491 	{ "close", roff_insec, NULL, NULL, 0, NULL },
492 	{ "CL", roff_unsupp, NULL, NULL, 0, NULL },
493 	{ "color", roff_line_ignore, NULL, NULL, 0, NULL },
494 	{ "composite", roff_unsupp, NULL, NULL, 0, NULL },
495 	{ "continue", roff_unsupp, NULL, NULL, 0, NULL },
496 	{ "cp", roff_line_ignore, NULL, NULL, 0, NULL },
497 	{ "cropat", roff_line_ignore, NULL, NULL, 0, NULL },
498 	{ "cs", roff_line_ignore, NULL, NULL, 0, NULL },
499 	{ "cu", roff_line_ignore, NULL, NULL, 0, NULL },
500 	{ "da", roff_unsupp, NULL, NULL, 0, NULL },
501 	{ "dch", roff_unsupp, NULL, NULL, 0, NULL },
502 	{ "Dd", roff_Dd, NULL, NULL, 0, NULL },
503 	{ "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
504 	{ "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
505 	{ "defcolor", roff_line_ignore, NULL, NULL, 0, NULL },
506 	{ "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
507 	{ "dei1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
508 	{ "device", roff_unsupp, NULL, NULL, 0, NULL },
509 	{ "devicem", roff_unsupp, NULL, NULL, 0, NULL },
510 	{ "di", roff_unsupp, NULL, NULL, 0, NULL },
511 	{ "do", roff_unsupp, NULL, NULL, 0, NULL },
512 	{ "ds", roff_ds, NULL, NULL, 0, NULL },
513 	{ "ds1", roff_ds, NULL, NULL, 0, NULL },
514 	{ "dwh", roff_unsupp, NULL, NULL, 0, NULL },
515 	{ "dt", roff_unsupp, NULL, NULL, 0, NULL },
516 	{ "ec", roff_unsupp, NULL, NULL, 0, NULL },
517 	{ "ecr", roff_unsupp, NULL, NULL, 0, NULL },
518 	{ "ecs", roff_unsupp, NULL, NULL, 0, NULL },
519 	{ "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
520 	{ "em", roff_unsupp, NULL, NULL, 0, NULL },
521 	{ "EN", roff_EN, NULL, NULL, 0, NULL },
522 	{ "eo", roff_unsupp, NULL, NULL, 0, NULL },
523 	{ "EP", roff_unsupp, NULL, NULL, 0, NULL },
524 	{ "EQ", roff_EQ, NULL, NULL, 0, NULL },
525 	{ "errprint", roff_line_ignore, NULL, NULL, 0, NULL },
526 	{ "ev", roff_unsupp, NULL, NULL, 0, NULL },
527 	{ "evc", roff_unsupp, NULL, NULL, 0, NULL },
528 	{ "ex", roff_unsupp, NULL, NULL, 0, NULL },
529 	{ "fallback", roff_line_ignore, NULL, NULL, 0, NULL },
530 	{ "fam", roff_line_ignore, NULL, NULL, 0, NULL },
531 	{ "fc", roff_unsupp, NULL, NULL, 0, NULL },
532 	{ "fchar", roff_unsupp, NULL, NULL, 0, NULL },
533 	{ "fcolor", roff_line_ignore, NULL, NULL, 0, NULL },
534 	{ "fdeferlig", roff_line_ignore, NULL, NULL, 0, NULL },
535 	{ "feature", roff_line_ignore, NULL, NULL, 0, NULL },
536 	{ "fkern", roff_line_ignore, NULL, NULL, 0, NULL },
537 	{ "fl", roff_line_ignore, NULL, NULL, 0, NULL },
538 	{ "flig", roff_line_ignore, NULL, NULL, 0, NULL },
539 	{ "fp", roff_line_ignore, NULL, NULL, 0, NULL },
540 	{ "fps", roff_line_ignore, NULL, NULL, 0, NULL },
541 	{ "fschar", roff_unsupp, NULL, NULL, 0, NULL },
542 	{ "fspacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
543 	{ "fspecial", roff_line_ignore, NULL, NULL, 0, NULL },
544 	{ "ftr", roff_line_ignore, NULL, NULL, 0, NULL },
545 	{ "fzoom", roff_line_ignore, NULL, NULL, 0, NULL },
546 	{ "gcolor", roff_line_ignore, NULL, NULL, 0, NULL },
547 	{ "hc", roff_line_ignore, NULL, NULL, 0, NULL },
548 	{ "hcode", roff_line_ignore, NULL, NULL, 0, NULL },
549 	{ "hidechar", roff_line_ignore, NULL, NULL, 0, NULL },
550 	{ "hla", roff_line_ignore, NULL, NULL, 0, NULL },
551 	{ "hlm", roff_line_ignore, NULL, NULL, 0, NULL },
552 	{ "hpf", roff_line_ignore, NULL, NULL, 0, NULL },
553 	{ "hpfa", roff_line_ignore, NULL, NULL, 0, NULL },
554 	{ "hpfcode", roff_line_ignore, NULL, NULL, 0, NULL },
555 	{ "hw", roff_line_ignore, NULL, NULL, 0, NULL },
556 	{ "hy", roff_line_ignore, NULL, NULL, 0, NULL },
557 	{ "hylang", roff_line_ignore, NULL, NULL, 0, NULL },
558 	{ "hylen", roff_line_ignore, NULL, NULL, 0, NULL },
559 	{ "hym", roff_line_ignore, NULL, NULL, 0, NULL },
560 	{ "hypp", roff_line_ignore, NULL, NULL, 0, NULL },
561 	{ "hys", roff_line_ignore, NULL, NULL, 0, NULL },
562 	{ "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
563 	{ "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
564 	{ "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
565 	{ "index", roff_unsupp, NULL, NULL, 0, NULL },
566 	{ "it", roff_it, NULL, NULL, 0, NULL },
567 	{ "itc", roff_unsupp, NULL, NULL, 0, NULL },
568 	{ "IX", roff_line_ignore, NULL, NULL, 0, NULL },
569 	{ "kern", roff_line_ignore, NULL, NULL, 0, NULL },
570 	{ "kernafter", roff_line_ignore, NULL, NULL, 0, NULL },
571 	{ "kernbefore", roff_line_ignore, NULL, NULL, 0, NULL },
572 	{ "kernpair", roff_line_ignore, NULL, NULL, 0, NULL },
573 	{ "lc", roff_unsupp, NULL, NULL, 0, NULL },
574 	{ "lc_ctype", roff_unsupp, NULL, NULL, 0, NULL },
575 	{ "lds", roff_unsupp, NULL, NULL, 0, NULL },
576 	{ "length", roff_unsupp, NULL, NULL, 0, NULL },
577 	{ "letadj", roff_line_ignore, NULL, NULL, 0, NULL },
578 	{ "lf", roff_insec, NULL, NULL, 0, NULL },
579 	{ "lg", roff_line_ignore, NULL, NULL, 0, NULL },
580 	{ "lhang", roff_line_ignore, NULL, NULL, 0, NULL },
581 	{ "linetabs", roff_unsupp, NULL, NULL, 0, NULL },
582 	{ "lnr", roff_unsupp, NULL, NULL, 0, NULL },
583 	{ "lnrf", roff_unsupp, NULL, NULL, 0, NULL },
584 	{ "lpfx", roff_unsupp, NULL, NULL, 0, NULL },
585 	{ "ls", roff_line_ignore, NULL, NULL, 0, NULL },
586 	{ "lsm", roff_unsupp, NULL, NULL, 0, NULL },
587 	{ "lt", roff_line_ignore, NULL, NULL, 0, NULL },
588 	{ "mc", roff_line_ignore, NULL, NULL, 0, NULL },
589 	{ "mediasize", roff_line_ignore, NULL, NULL, 0, NULL },
590 	{ "minss", roff_line_ignore, NULL, NULL, 0, NULL },
591 	{ "mk", roff_line_ignore, NULL, NULL, 0, NULL },
592 	{ "mso", roff_insec, NULL, NULL, 0, NULL },
593 	{ "na", roff_line_ignore, NULL, NULL, 0, NULL },
594 	{ "ne", roff_line_ignore, NULL, NULL, 0, NULL },
595 	{ "nh", roff_line_ignore, NULL, NULL, 0, NULL },
596 	{ "nhychar", roff_line_ignore, NULL, NULL, 0, NULL },
597 	{ "nm", roff_unsupp, NULL, NULL, 0, NULL },
598 	{ "nn", roff_unsupp, NULL, NULL, 0, NULL },
599 	{ "nop", roff_unsupp, NULL, NULL, 0, NULL },
600 	{ "nr", roff_nr, NULL, NULL, 0, NULL },
601 	{ "nrf", roff_unsupp, NULL, NULL, 0, NULL },
602 	{ "nroff", roff_line_ignore, NULL, NULL, 0, NULL },
603 	{ "ns", roff_line_ignore, NULL, NULL, 0, NULL },
604 	{ "nx", roff_insec, NULL, NULL, 0, NULL },
605 	{ "open", roff_insec, NULL, NULL, 0, NULL },
606 	{ "opena", roff_insec, NULL, NULL, 0, NULL },
607 	{ "os", roff_line_ignore, NULL, NULL, 0, NULL },
608 	{ "output", roff_unsupp, NULL, NULL, 0, NULL },
609 	{ "padj", roff_line_ignore, NULL, NULL, 0, NULL },
610 	{ "papersize", roff_line_ignore, NULL, NULL, 0, NULL },
611 	{ "pc", roff_line_ignore, NULL, NULL, 0, NULL },
612 	{ "pev", roff_line_ignore, NULL, NULL, 0, NULL },
613 	{ "pi", roff_insec, NULL, NULL, 0, NULL },
614 	{ "PI", roff_unsupp, NULL, NULL, 0, NULL },
615 	{ "pl", roff_line_ignore, NULL, NULL, 0, NULL },
616 	{ "pm", roff_line_ignore, NULL, NULL, 0, NULL },
617 	{ "pn", roff_line_ignore, NULL, NULL, 0, NULL },
618 	{ "pnr", roff_line_ignore, NULL, NULL, 0, NULL },
619 	{ "po", roff_line_ignore, NULL, NULL, 0, NULL },
620 	{ "ps", roff_line_ignore, NULL, NULL, 0, NULL },
621 	{ "psbb", roff_unsupp, NULL, NULL, 0, NULL },
622 	{ "pshape", roff_unsupp, NULL, NULL, 0, NULL },
623 	{ "pso", roff_insec, NULL, NULL, 0, NULL },
624 	{ "ptr", roff_line_ignore, NULL, NULL, 0, NULL },
625 	{ "pvs", roff_line_ignore, NULL, NULL, 0, NULL },
626 	{ "rchar", roff_unsupp, NULL, NULL, 0, NULL },
627 	{ "rd", roff_line_ignore, NULL, NULL, 0, NULL },
628 	{ "recursionlimit", roff_line_ignore, NULL, NULL, 0, NULL },
629 	{ "return", roff_unsupp, NULL, NULL, 0, NULL },
630 	{ "rfschar", roff_unsupp, NULL, NULL, 0, NULL },
631 	{ "rhang", roff_line_ignore, NULL, NULL, 0, NULL },
632 	{ "rj", roff_line_ignore, NULL, NULL, 0, NULL },
633 	{ "rm", roff_rm, NULL, NULL, 0, NULL },
634 	{ "rn", roff_unsupp, NULL, NULL, 0, NULL },
635 	{ "rnn", roff_unsupp, NULL, NULL, 0, NULL },
636 	{ "rr", roff_rr, NULL, NULL, 0, NULL },
637 	{ "rs", roff_line_ignore, NULL, NULL, 0, NULL },
638 	{ "rt", roff_line_ignore, NULL, NULL, 0, NULL },
639 	{ "schar", roff_unsupp, NULL, NULL, 0, NULL },
640 	{ "sentchar", roff_line_ignore, NULL, NULL, 0, NULL },
641 	{ "shc", roff_line_ignore, NULL, NULL, 0, NULL },
642 	{ "shift", roff_unsupp, NULL, NULL, 0, NULL },
643 	{ "sizes", roff_line_ignore, NULL, NULL, 0, NULL },
644 	{ "so", roff_so, NULL, NULL, 0, NULL },
645 	{ "spacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
646 	{ "special", roff_line_ignore, NULL, NULL, 0, NULL },
647 	{ "spreadwarn", roff_line_ignore, NULL, NULL, 0, NULL },
648 	{ "ss", roff_line_ignore, NULL, NULL, 0, NULL },
649 	{ "sty", roff_line_ignore, NULL, NULL, 0, NULL },
650 	{ "substring", roff_unsupp, NULL, NULL, 0, NULL },
651 	{ "sv", roff_line_ignore, NULL, NULL, 0, NULL },
652 	{ "sy", roff_insec, NULL, NULL, 0, NULL },
653 	{ "T&", roff_T_, NULL, NULL, 0, NULL },
654 	{ "ta", roff_unsupp, NULL, NULL, 0, NULL },
655 	{ "tc", roff_unsupp, NULL, NULL, 0, NULL },
656 	{ "TE", roff_TE, NULL, NULL, 0, NULL },
657 	{ "TH", roff_TH, NULL, NULL, 0, NULL },
658 	{ "ti", roff_unsupp, NULL, NULL, 0, NULL },
659 	{ "tkf", roff_line_ignore, NULL, NULL, 0, NULL },
660 	{ "tl", roff_unsupp, NULL, NULL, 0, NULL },
661 	{ "tm", roff_line_ignore, NULL, NULL, 0, NULL },
662 	{ "tm1", roff_line_ignore, NULL, NULL, 0, NULL },
663 	{ "tmc", roff_line_ignore, NULL, NULL, 0, NULL },
664 	{ "tr", roff_tr, NULL, NULL, 0, NULL },
665 	{ "track", roff_line_ignore, NULL, NULL, 0, NULL },
666 	{ "transchar", roff_line_ignore, NULL, NULL, 0, NULL },
667 	{ "trf", roff_insec, NULL, NULL, 0, NULL },
668 	{ "trimat", roff_line_ignore, NULL, NULL, 0, NULL },
669 	{ "trin", roff_unsupp, NULL, NULL, 0, NULL },
670 	{ "trnt", roff_unsupp, NULL, NULL, 0, NULL },
671 	{ "troff", roff_line_ignore, NULL, NULL, 0, NULL },
672 	{ "TS", roff_TS, NULL, NULL, 0, NULL },
673 	{ "uf", roff_line_ignore, NULL, NULL, 0, NULL },
674 	{ "ul", roff_line_ignore, NULL, NULL, 0, NULL },
675 	{ "unformat", roff_unsupp, NULL, NULL, 0, NULL },
676 	{ "unwatch", roff_line_ignore, NULL, NULL, 0, NULL },
677 	{ "unwatchn", roff_line_ignore, NULL, NULL, 0, NULL },
678 	{ "vpt", roff_line_ignore, NULL, NULL, 0, NULL },
679 	{ "vs", roff_line_ignore, NULL, NULL, 0, NULL },
680 	{ "warn", roff_line_ignore, NULL, NULL, 0, NULL },
681 	{ "warnscale", roff_line_ignore, NULL, NULL, 0, NULL },
682 	{ "watch", roff_line_ignore, NULL, NULL, 0, NULL },
683 	{ "watchlength", roff_line_ignore, NULL, NULL, 0, NULL },
684 	{ "watchn", roff_line_ignore, NULL, NULL, 0, NULL },
685 	{ "wh", roff_unsupp, NULL, NULL, 0, NULL },
686 	{ "while", roff_unsupp, NULL, NULL, 0, NULL },
687 	{ "write", roff_insec, NULL, NULL, 0, NULL },
688 	{ "writec", roff_insec, NULL, NULL, 0, NULL },
689 	{ "writem", roff_insec, NULL, NULL, 0, NULL },
690 	{ "xflag", roff_line_ignore, NULL, NULL, 0, NULL },
691 	{ ".", roff_cblock, NULL, NULL, 0, NULL },
692 	{ NULL, roff_userdef, NULL, NULL, 0, NULL },
693 };
694 
695 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
696 const	char *const __mdoc_reserved[] = {
697 	"Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
698 	"Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
699 	"Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
700 	"Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
701 	"Dt", "Dv", "Dx", "D1",
702 	"Ec", "Ed", "Ef", "Ek", "El", "Em",
703 	"En", "Eo", "Er", "Es", "Ev", "Ex",
704 	"Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
705 	"Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
706 	"Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
707 	"Oc", "Oo", "Op", "Os", "Ot", "Ox",
708 	"Pa", "Pc", "Pf", "Po", "Pp", "Pq",
709 	"Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
710 	"Sc", "Sh", "Sm", "So", "Sq",
711 	"Ss", "St", "Sx", "Sy",
712 	"Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
713 	"%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
714 	"%P", "%Q", "%R", "%T", "%U", "%V",
715 	NULL
716 };
717 
718 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
719 const	char *const __man_reserved[] = {
720 	"AT", "B", "BI", "BR", "DT",
721 	"EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
722 	"LP", "OP", "P", "PD", "PP",
723 	"R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
724 	"TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
725 	NULL
726 };
727 
728 /* Array of injected predefined strings. */
729 #define	PREDEFS_MAX	 38
730 static	const struct predef predefs[PREDEFS_MAX] = {
731 #include "predefs.in"
732 };
733 
734 /* See roffhash_find() */
735 #define	ROFF_HASH(p)	(p[0] - ASCII_LO)
736 
737 static	int	 roffit_lines;  /* number of lines to delay */
738 static	char	*roffit_macro;  /* nil-terminated macro line */
739 
740 
741 /* --- request table ------------------------------------------------------ */
742 
743 static void
744 roffhash_init(void)
745 {
746 	struct roffmac	 *n;
747 	int		  buc, i;
748 
749 	for (i = 0; i < (int)ROFF_USERDEF; i++) {
750 		assert(roffs[i].name[0] >= ASCII_LO);
751 		assert(roffs[i].name[0] <= ASCII_HI);
752 
753 		buc = ROFF_HASH(roffs[i].name);
754 
755 		if (NULL != (n = hash[buc])) {
756 			for ( ; n->next; n = n->next)
757 				/* Do nothing. */ ;
758 			n->next = &roffs[i];
759 		} else
760 			hash[buc] = &roffs[i];
761 	}
762 }
763 
764 /*
765  * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
766  * the nil-terminated string name could be found.
767  */
768 static enum rofft
769 roffhash_find(const char *p, size_t s)
770 {
771 	int		 buc;
772 	struct roffmac	*n;
773 
774 	/*
775 	 * libroff has an extremely simple hashtable, for the time
776 	 * being, which simply keys on the first character, which must
777 	 * be printable, then walks a chain.  It works well enough until
778 	 * optimised.
779 	 */
780 
781 	if (p[0] < ASCII_LO || p[0] > ASCII_HI)
782 		return ROFF_MAX;
783 
784 	buc = ROFF_HASH(p);
785 
786 	if (NULL == (n = hash[buc]))
787 		return ROFF_MAX;
788 	for ( ; n; n = n->next)
789 		if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
790 			return (enum rofft)(n - roffs);
791 
792 	return ROFF_MAX;
793 }
794 
795 /* --- stack of request blocks -------------------------------------------- */
796 
797 /*
798  * Pop the current node off of the stack of roff instructions currently
799  * pending.
800  */
801 static void
802 roffnode_pop(struct roff *r)
803 {
804 	struct roffnode	*p;
805 
806 	assert(r->last);
807 	p = r->last;
808 
809 	r->last = r->last->parent;
810 	free(p->name);
811 	free(p->end);
812 	free(p);
813 }
814 
815 /*
816  * Push a roff node onto the instruction stack.  This must later be
817  * removed with roffnode_pop().
818  */
819 static void
820 roffnode_push(struct roff *r, enum rofft tok, const char *name,
821 		int line, int col)
822 {
823 	struct roffnode	*p;
824 
825 	p = mandoc_calloc(1, sizeof(struct roffnode));
826 	p->tok = tok;
827 	if (name)
828 		p->name = mandoc_strdup(name);
829 	p->parent = r->last;
830 	p->line = line;
831 	p->col = col;
832 	p->rule = p->parent ? p->parent->rule : 0;
833 
834 	r->last = p;
835 }
836 
837 /* --- roff parser state data management ---------------------------------- */
838 
839 static void
840 roff_free1(struct roff *r)
841 {
842 	struct tbl_node	*tbl;
843 	struct eqn_node	*e;
844 	int		 i;
845 
846 	while (NULL != (tbl = r->first_tbl)) {
847 		r->first_tbl = tbl->next;
848 		tbl_free(tbl);
849 	}
850 	r->first_tbl = r->last_tbl = r->tbl = NULL;
851 
852 	while (NULL != (e = r->first_eqn)) {
853 		r->first_eqn = e->next;
854 		eqn_free(e);
855 	}
856 	r->first_eqn = r->last_eqn = r->eqn = NULL;
857 
858 	while (r->last)
859 		roffnode_pop(r);
860 
861 	free (r->rstack);
862 	r->rstack = NULL;
863 	r->rstacksz = 0;
864 	r->rstackpos = -1;
865 
866 	roff_freereg(r->regtab);
867 	r->regtab = NULL;
868 
869 	roff_freestr(r->strtab);
870 	roff_freestr(r->xmbtab);
871 	r->strtab = r->xmbtab = NULL;
872 
873 	if (r->xtab)
874 		for (i = 0; i < 128; i++)
875 			free(r->xtab[i].p);
876 	free(r->xtab);
877 	r->xtab = NULL;
878 }
879 
880 void
881 roff_reset(struct roff *r)
882 {
883 
884 	roff_free1(r);
885 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
886 	r->control = 0;
887 }
888 
889 void
890 roff_free(struct roff *r)
891 {
892 
893 	roff_free1(r);
894 	free(r);
895 }
896 
897 struct roff *
898 roff_alloc(struct mparse *parse, int options)
899 {
900 	struct roff	*r;
901 
902 	r = mandoc_calloc(1, sizeof(struct roff));
903 	r->parse = parse;
904 	r->options = options;
905 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
906 	r->rstackpos = -1;
907 
908 	roffhash_init();
909 
910 	return r;
911 }
912 
913 /* --- syntax tree state data management ---------------------------------- */
914 
915 static void
916 roff_man_free1(struct roff_man *man)
917 {
918 
919 	if (man->first != NULL)
920 		roff_node_delete(man, man->first);
921 	free(man->meta.msec);
922 	free(man->meta.vol);
923 	free(man->meta.os);
924 	free(man->meta.arch);
925 	free(man->meta.title);
926 	free(man->meta.name);
927 	free(man->meta.date);
928 }
929 
930 static void
931 roff_man_alloc1(struct roff_man *man)
932 {
933 
934 	memset(&man->meta, 0, sizeof(man->meta));
935 	man->first = mandoc_calloc(1, sizeof(*man->first));
936 	man->first->type = ROFFT_ROOT;
937 	man->last = man->first;
938 	man->last_es = NULL;
939 	man->flags = 0;
940 	man->macroset = MACROSET_NONE;
941 	man->lastsec = man->lastnamed = SEC_NONE;
942 	man->next = ROFF_NEXT_CHILD;
943 }
944 
945 void
946 roff_man_reset(struct roff_man *man)
947 {
948 
949 	roff_man_free1(man);
950 	roff_man_alloc1(man);
951 }
952 
953 void
954 roff_man_free(struct roff_man *man)
955 {
956 
957 	roff_man_free1(man);
958 	free(man);
959 }
960 
961 struct roff_man *
962 roff_man_alloc(struct roff *roff, struct mparse *parse,
963 	const char *defos, int quick)
964 {
965 	struct roff_man *man;
966 
967 	man = mandoc_calloc(1, sizeof(*man));
968 	man->parse = parse;
969 	man->roff = roff;
970 	man->defos = defos;
971 	man->quick = quick;
972 	roff_man_alloc1(man);
973 	return man;
974 }
975 
976 /* --- syntax tree handling ----------------------------------------------- */
977 
978 struct roff_node *
979 roff_node_alloc(struct roff_man *man, int line, int pos,
980 	enum roff_type type, int tok)
981 {
982 	struct roff_node	*n;
983 
984 	n = mandoc_calloc(1, sizeof(*n));
985 	n->line = line;
986 	n->pos = pos;
987 	n->tok = tok;
988 	n->type = type;
989 	n->sec = man->lastsec;
990 
991 	if (man->flags & MDOC_SYNOPSIS)
992 		n->flags |= NODE_SYNPRETTY;
993 	else
994 		n->flags &= ~NODE_SYNPRETTY;
995 	if (man->flags & MDOC_NEWLINE)
996 		n->flags |= NODE_LINE;
997 	man->flags &= ~MDOC_NEWLINE;
998 
999 	return n;
1000 }
1001 
1002 void
1003 roff_node_append(struct roff_man *man, struct roff_node *n)
1004 {
1005 
1006 	switch (man->next) {
1007 	case ROFF_NEXT_SIBLING:
1008 		if (man->last->next != NULL) {
1009 			n->next = man->last->next;
1010 			man->last->next->prev = n;
1011 		} else
1012 			man->last->parent->last = n;
1013 		man->last->next = n;
1014 		n->prev = man->last;
1015 		n->parent = man->last->parent;
1016 		break;
1017 	case ROFF_NEXT_CHILD:
1018 		if (man->last->child != NULL) {
1019 			n->next = man->last->child;
1020 			man->last->child->prev = n;
1021 		} else
1022 			man->last->last = n;
1023 		man->last->child = n;
1024 		n->parent = man->last;
1025 		break;
1026 	default:
1027 		abort();
1028 	}
1029 	man->last = n;
1030 
1031 	switch (n->type) {
1032 	case ROFFT_HEAD:
1033 		n->parent->head = n;
1034 		break;
1035 	case ROFFT_BODY:
1036 		if (n->end != ENDBODY_NOT)
1037 			return;
1038 		n->parent->body = n;
1039 		break;
1040 	case ROFFT_TAIL:
1041 		n->parent->tail = n;
1042 		break;
1043 	default:
1044 		return;
1045 	}
1046 
1047 	/*
1048 	 * Copy over the normalised-data pointer of our parent.  Not
1049 	 * everybody has one, but copying a null pointer is fine.
1050 	 */
1051 
1052 	n->norm = n->parent->norm;
1053 	assert(n->parent->type == ROFFT_BLOCK);
1054 }
1055 
1056 void
1057 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
1058 {
1059 	struct roff_node	*n;
1060 
1061 	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
1062 	n->string = roff_strdup(man->roff, word);
1063 	roff_node_append(man, n);
1064 	n->flags |= NODE_VALID | NODE_ENDED;
1065 	man->next = ROFF_NEXT_SIBLING;
1066 }
1067 
1068 void
1069 roff_word_append(struct roff_man *man, const char *word)
1070 {
1071 	struct roff_node	*n;
1072 	char			*addstr, *newstr;
1073 
1074 	n = man->last;
1075 	addstr = roff_strdup(man->roff, word);
1076 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
1077 	free(addstr);
1078 	free(n->string);
1079 	n->string = newstr;
1080 	man->next = ROFF_NEXT_SIBLING;
1081 }
1082 
1083 void
1084 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
1085 {
1086 	struct roff_node	*n;
1087 
1088 	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
1089 	roff_node_append(man, n);
1090 	man->next = ROFF_NEXT_CHILD;
1091 }
1092 
1093 struct roff_node *
1094 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1095 {
1096 	struct roff_node	*n;
1097 
1098 	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1099 	roff_node_append(man, n);
1100 	man->next = ROFF_NEXT_CHILD;
1101 	return n;
1102 }
1103 
1104 struct roff_node *
1105 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1106 {
1107 	struct roff_node	*n;
1108 
1109 	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1110 	roff_node_append(man, n);
1111 	man->next = ROFF_NEXT_CHILD;
1112 	return n;
1113 }
1114 
1115 struct roff_node *
1116 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1117 {
1118 	struct roff_node	*n;
1119 
1120 	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1121 	roff_node_append(man, n);
1122 	man->next = ROFF_NEXT_CHILD;
1123 	return n;
1124 }
1125 
1126 void
1127 roff_addeqn(struct roff_man *man, const struct eqn *eqn)
1128 {
1129 	struct roff_node	*n;
1130 
1131 	n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE);
1132 	n->eqn = eqn;
1133 	if (eqn->ln > man->last->line)
1134 		n->flags |= NODE_LINE;
1135 	roff_node_append(man, n);
1136 	man->next = ROFF_NEXT_SIBLING;
1137 }
1138 
1139 void
1140 roff_addtbl(struct roff_man *man, const struct tbl_span *tbl)
1141 {
1142 	struct roff_node	*n;
1143 
1144 	if (man->macroset == MACROSET_MAN)
1145 		man_breakscope(man, TOKEN_NONE);
1146 	n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1147 	n->span = tbl;
1148 	roff_node_append(man, n);
1149 	n->flags |= NODE_VALID | NODE_ENDED;
1150 	man->next = ROFF_NEXT_SIBLING;
1151 }
1152 
1153 void
1154 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1155 {
1156 
1157 	/* Adjust siblings. */
1158 
1159 	if (n->prev)
1160 		n->prev->next = n->next;
1161 	if (n->next)
1162 		n->next->prev = n->prev;
1163 
1164 	/* Adjust parent. */
1165 
1166 	if (n->parent != NULL) {
1167 		if (n->parent->child == n)
1168 			n->parent->child = n->next;
1169 		if (n->parent->last == n)
1170 			n->parent->last = n->prev;
1171 	}
1172 
1173 	/* Adjust parse point. */
1174 
1175 	if (man == NULL)
1176 		return;
1177 	if (man->last == n) {
1178 		if (n->prev == NULL) {
1179 			man->last = n->parent;
1180 			man->next = ROFF_NEXT_CHILD;
1181 		} else {
1182 			man->last = n->prev;
1183 			man->next = ROFF_NEXT_SIBLING;
1184 		}
1185 	}
1186 	if (man->first == n)
1187 		man->first = NULL;
1188 }
1189 
1190 void
1191 roff_node_free(struct roff_node *n)
1192 {
1193 
1194 	if (n->args != NULL)
1195 		mdoc_argv_free(n->args);
1196 	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1197 		free(n->norm);
1198 	free(n->string);
1199 	free(n);
1200 }
1201 
1202 void
1203 roff_node_delete(struct roff_man *man, struct roff_node *n)
1204 {
1205 
1206 	while (n->child != NULL)
1207 		roff_node_delete(man, n->child);
1208 	roff_node_unlink(man, n);
1209 	roff_node_free(n);
1210 }
1211 
1212 void
1213 deroff(char **dest, const struct roff_node *n)
1214 {
1215 	char	*cp;
1216 	size_t	 sz;
1217 
1218 	if (n->type != ROFFT_TEXT) {
1219 		for (n = n->child; n != NULL; n = n->next)
1220 			deroff(dest, n);
1221 		return;
1222 	}
1223 
1224 	/* Skip leading whitespace. */
1225 
1226 	for (cp = n->string; *cp != '\0'; cp++) {
1227 		if (cp[0] == '\\' && cp[1] != '\0' &&
1228 		    strchr(" %&0^|~", cp[1]) != NULL)
1229 			cp++;
1230 		else if ( ! isspace((unsigned char)*cp))
1231 			break;
1232 	}
1233 
1234 	/* Skip trailing backslash. */
1235 
1236 	sz = strlen(cp);
1237 	if (sz > 0 && cp[sz - 1] == '\\')
1238 		sz--;
1239 
1240 	/* Skip trailing whitespace. */
1241 
1242 	for (; sz; sz--)
1243 		if ( ! isspace((unsigned char)cp[sz-1]))
1244 			break;
1245 
1246 	/* Skip empty strings. */
1247 
1248 	if (sz == 0)
1249 		return;
1250 
1251 	if (*dest == NULL) {
1252 		*dest = mandoc_strndup(cp, sz);
1253 		return;
1254 	}
1255 
1256 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1257 	free(*dest);
1258 	*dest = cp;
1259 }
1260 
1261 /* --- main functions of the roff parser ---------------------------------- */
1262 
1263 /*
1264  * In the current line, expand escape sequences that tend to get
1265  * used in numerical expressions and conditional requests.
1266  * Also check the syntax of the remaining escape sequences.
1267  */
1268 static enum rofferr
1269 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1270 {
1271 	char		 ubuf[24]; /* buffer to print the number */
1272 	const char	*start;	/* start of the string to process */
1273 	char		*stesc;	/* start of an escape sequence ('\\') */
1274 	const char	*stnam;	/* start of the name, after "[(*" */
1275 	const char	*cp;	/* end of the name, e.g. before ']' */
1276 	const char	*res;	/* the string to be substituted */
1277 	char		*nbuf;	/* new buffer to copy buf->buf to */
1278 	size_t		 maxl;  /* expected length of the escape name */
1279 	size_t		 naml;	/* actual length of the escape name */
1280 	enum mandoc_esc	 esc;	/* type of the escape sequence */
1281 	int		 inaml;	/* length returned from mandoc_escape() */
1282 	int		 expand_count;	/* to avoid infinite loops */
1283 	int		 npos;	/* position in numeric expression */
1284 	int		 arg_complete; /* argument not interrupted by eol */
1285 	char		 term;	/* character terminating the escape */
1286 
1287 	expand_count = 0;
1288 	start = buf->buf + pos;
1289 	stesc = strchr(start, '\0') - 1;
1290 	while (stesc-- > start) {
1291 
1292 		/* Search backwards for the next backslash. */
1293 
1294 		if (*stesc != '\\')
1295 			continue;
1296 
1297 		/* If it is escaped, skip it. */
1298 
1299 		for (cp = stesc - 1; cp >= start; cp--)
1300 			if (*cp != '\\')
1301 				break;
1302 
1303 		if ((stesc - cp) % 2 == 0) {
1304 			stesc = (char *)cp;
1305 			continue;
1306 		}
1307 
1308 		/* Decide whether to expand or to check only. */
1309 
1310 		term = '\0';
1311 		cp = stesc + 1;
1312 		switch (*cp) {
1313 		case '*':
1314 			res = NULL;
1315 			break;
1316 		case 'B':
1317 		case 'w':
1318 			term = cp[1];
1319 			/* FALLTHROUGH */
1320 		case 'n':
1321 			res = ubuf;
1322 			break;
1323 		default:
1324 			esc = mandoc_escape(&cp, &stnam, &inaml);
1325 			if (esc == ESCAPE_ERROR ||
1326 			    (esc == ESCAPE_SPECIAL &&
1327 			     mchars_spec2cp(stnam, inaml) < 0))
1328 				mandoc_vmsg(MANDOCERR_ESC_BAD,
1329 				    r->parse, ln, (int)(stesc - buf->buf),
1330 				    "%.*s", (int)(cp - stesc), stesc);
1331 			continue;
1332 		}
1333 
1334 		if (EXPAND_LIMIT < ++expand_count) {
1335 			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1336 			    ln, (int)(stesc - buf->buf), NULL);
1337 			return ROFF_IGN;
1338 		}
1339 
1340 		/*
1341 		 * The third character decides the length
1342 		 * of the name of the string or register.
1343 		 * Save a pointer to the name.
1344 		 */
1345 
1346 		if (term == '\0') {
1347 			switch (*++cp) {
1348 			case '\0':
1349 				maxl = 0;
1350 				break;
1351 			case '(':
1352 				cp++;
1353 				maxl = 2;
1354 				break;
1355 			case '[':
1356 				cp++;
1357 				term = ']';
1358 				maxl = 0;
1359 				break;
1360 			default:
1361 				maxl = 1;
1362 				break;
1363 			}
1364 		} else {
1365 			cp += 2;
1366 			maxl = 0;
1367 		}
1368 		stnam = cp;
1369 
1370 		/* Advance to the end of the name. */
1371 
1372 		naml = 0;
1373 		arg_complete = 1;
1374 		while (maxl == 0 || naml < maxl) {
1375 			if (*cp == '\0') {
1376 				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1377 				    ln, (int)(stesc - buf->buf), stesc);
1378 				arg_complete = 0;
1379 				break;
1380 			}
1381 			if (maxl == 0 && *cp == term) {
1382 				cp++;
1383 				break;
1384 			}
1385 			if (*cp++ != '\\' || stesc[1] != 'w') {
1386 				naml++;
1387 				continue;
1388 			}
1389 			switch (mandoc_escape(&cp, NULL, NULL)) {
1390 			case ESCAPE_SPECIAL:
1391 			case ESCAPE_UNICODE:
1392 			case ESCAPE_NUMBERED:
1393 			case ESCAPE_OVERSTRIKE:
1394 				naml++;
1395 				break;
1396 			default:
1397 				break;
1398 			}
1399 		}
1400 
1401 		/*
1402 		 * Retrieve the replacement string; if it is
1403 		 * undefined, resume searching for escapes.
1404 		 */
1405 
1406 		switch (stesc[1]) {
1407 		case '*':
1408 			if (arg_complete)
1409 				res = roff_getstrn(r, stnam, naml);
1410 			break;
1411 		case 'B':
1412 			npos = 0;
1413 			ubuf[0] = arg_complete &&
1414 			    roff_evalnum(r, ln, stnam, &npos,
1415 			      NULL, ROFFNUM_SCALE) &&
1416 			    stnam + npos + 1 == cp ? '1' : '0';
1417 			ubuf[1] = '\0';
1418 			break;
1419 		case 'n':
1420 			if (arg_complete)
1421 				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1422 				    roff_getregn(r, stnam, naml));
1423 			else
1424 				ubuf[0] = '\0';
1425 			break;
1426 		case 'w':
1427 			/* use even incomplete args */
1428 			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1429 			    24 * (int)naml);
1430 			break;
1431 		}
1432 
1433 		if (res == NULL) {
1434 			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1435 			    r->parse, ln, (int)(stesc - buf->buf),
1436 			    "%.*s", (int)naml, stnam);
1437 			res = "";
1438 		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1439 			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1440 			    ln, (int)(stesc - buf->buf), NULL);
1441 			return ROFF_IGN;
1442 		}
1443 
1444 		/* Replace the escape sequence by the string. */
1445 
1446 		*stesc = '\0';
1447 		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1448 		    buf->buf, res, cp) + 1;
1449 
1450 		/* Prepare for the next replacement. */
1451 
1452 		start = nbuf + pos;
1453 		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1454 		free(buf->buf);
1455 		buf->buf = nbuf;
1456 	}
1457 	return ROFF_CONT;
1458 }
1459 
1460 /*
1461  * Process text streams.
1462  */
1463 static enum rofferr
1464 roff_parsetext(struct buf *buf, int pos, int *offs)
1465 {
1466 	size_t		 sz;
1467 	const char	*start;
1468 	char		*p;
1469 	int		 isz;
1470 	enum mandoc_esc	 esc;
1471 
1472 	/* Spring the input line trap. */
1473 
1474 	if (roffit_lines == 1) {
1475 		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1476 		free(buf->buf);
1477 		buf->buf = p;
1478 		buf->sz = isz + 1;
1479 		*offs = 0;
1480 		free(roffit_macro);
1481 		roffit_lines = 0;
1482 		return ROFF_REPARSE;
1483 	} else if (roffit_lines > 1)
1484 		--roffit_lines;
1485 
1486 	/* Convert all breakable hyphens into ASCII_HYPH. */
1487 
1488 	start = p = buf->buf + pos;
1489 
1490 	while (*p != '\0') {
1491 		sz = strcspn(p, "-\\");
1492 		p += sz;
1493 
1494 		if (*p == '\0')
1495 			break;
1496 
1497 		if (*p == '\\') {
1498 			/* Skip over escapes. */
1499 			p++;
1500 			esc = mandoc_escape((const char **)&p, NULL, NULL);
1501 			if (esc == ESCAPE_ERROR)
1502 				break;
1503 			while (*p == '-')
1504 				p++;
1505 			continue;
1506 		} else if (p == start) {
1507 			p++;
1508 			continue;
1509 		}
1510 
1511 		if (isalpha((unsigned char)p[-1]) &&
1512 		    isalpha((unsigned char)p[1]))
1513 			*p = ASCII_HYPH;
1514 		p++;
1515 	}
1516 	return ROFF_CONT;
1517 }
1518 
1519 enum rofferr
1520 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1521 {
1522 	enum rofft	 t;
1523 	enum rofferr	 e;
1524 	int		 pos;	/* parse point */
1525 	int		 spos;	/* saved parse point for messages */
1526 	int		 ppos;	/* original offset in buf->buf */
1527 	int		 ctl;	/* macro line (boolean) */
1528 
1529 	ppos = pos = *offs;
1530 
1531 	/* Handle in-line equation delimiters. */
1532 
1533 	if (r->tbl == NULL &&
1534 	    r->last_eqn != NULL && r->last_eqn->delim &&
1535 	    (r->eqn == NULL || r->eqn_inline)) {
1536 		e = roff_eqndelim(r, buf, pos);
1537 		if (e == ROFF_REPARSE)
1538 			return e;
1539 		assert(e == ROFF_CONT);
1540 	}
1541 
1542 	/* Expand some escape sequences. */
1543 
1544 	e = roff_res(r, buf, ln, pos);
1545 	if (e == ROFF_IGN)
1546 		return e;
1547 	assert(e == ROFF_CONT);
1548 
1549 	ctl = roff_getcontrol(r, buf->buf, &pos);
1550 
1551 	/*
1552 	 * First, if a scope is open and we're not a macro, pass the
1553 	 * text through the macro's filter.
1554 	 * Equations process all content themselves.
1555 	 * Tables process almost all content themselves, but we want
1556 	 * to warn about macros before passing it there.
1557 	 */
1558 
1559 	if (r->last != NULL && ! ctl) {
1560 		t = r->last->tok;
1561 		assert(roffs[t].text);
1562 		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1563 		assert(e == ROFF_IGN || e == ROFF_CONT);
1564 		if (e != ROFF_CONT)
1565 			return e;
1566 	}
1567 	if (r->eqn != NULL)
1568 		return eqn_read(&r->eqn, ln, buf->buf, ppos, offs);
1569 	if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1570 		return tbl_read(r->tbl, ln, buf->buf, ppos);
1571 	if ( ! ctl)
1572 		return roff_parsetext(buf, pos, offs);
1573 
1574 	/* Skip empty request lines. */
1575 
1576 	if (buf->buf[pos] == '"') {
1577 		mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1578 		    ln, pos, NULL);
1579 		return ROFF_IGN;
1580 	} else if (buf->buf[pos] == '\0')
1581 		return ROFF_IGN;
1582 
1583 	/*
1584 	 * If a scope is open, go to the child handler for that macro,
1585 	 * as it may want to preprocess before doing anything with it.
1586 	 * Don't do so if an equation is open.
1587 	 */
1588 
1589 	if (r->last) {
1590 		t = r->last->tok;
1591 		assert(roffs[t].sub);
1592 		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1593 	}
1594 
1595 	/* No scope is open.  This is a new request or macro. */
1596 
1597 	spos = pos;
1598 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1599 
1600 	/* Tables ignore most macros. */
1601 
1602 	if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) {
1603 		mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1604 		    ln, pos, buf->buf + spos);
1605 		if (t == ROFF_TS)
1606 			return ROFF_IGN;
1607 		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1608 			pos++;
1609 		while (buf->buf[pos] == ' ')
1610 			pos++;
1611 		return tbl_read(r->tbl, ln, buf->buf, pos);
1612 	}
1613 
1614 	/*
1615 	 * This is neither a roff request nor a user-defined macro.
1616 	 * Let the standard macro set parsers handle it.
1617 	 */
1618 
1619 	if (t == ROFF_MAX)
1620 		return ROFF_CONT;
1621 
1622 	/* Execute a roff request or a user defined macro. */
1623 
1624 	assert(roffs[t].proc);
1625 	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1626 }
1627 
1628 void
1629 roff_endparse(struct roff *r)
1630 {
1631 
1632 	if (r->last)
1633 		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1634 		    r->last->line, r->last->col,
1635 		    roffs[r->last->tok].name);
1636 
1637 	if (r->eqn) {
1638 		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1639 		    r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1640 		eqn_end(&r->eqn);
1641 	}
1642 
1643 	if (r->tbl) {
1644 		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1645 		    r->tbl->line, r->tbl->pos, "TS");
1646 		tbl_end(&r->tbl);
1647 	}
1648 }
1649 
1650 /*
1651  * Parse a roff node's type from the input buffer.  This must be in the
1652  * form of ".foo xxx" in the usual way.
1653  */
1654 static enum rofft
1655 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1656 {
1657 	char		*cp;
1658 	const char	*mac;
1659 	size_t		 maclen;
1660 	enum rofft	 t;
1661 
1662 	cp = buf + *pos;
1663 
1664 	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1665 		return ROFF_MAX;
1666 
1667 	mac = cp;
1668 	maclen = roff_getname(r, &cp, ln, ppos);
1669 
1670 	t = (r->current_string = roff_getstrn(r, mac, maclen))
1671 	    ? ROFF_USERDEF : roffhash_find(mac, maclen);
1672 
1673 	if (ROFF_MAX != t)
1674 		*pos = cp - buf;
1675 
1676 	return t;
1677 }
1678 
1679 /* --- handling of request blocks ----------------------------------------- */
1680 
1681 static enum rofferr
1682 roff_cblock(ROFF_ARGS)
1683 {
1684 
1685 	/*
1686 	 * A block-close `..' should only be invoked as a child of an
1687 	 * ignore macro, otherwise raise a warning and just ignore it.
1688 	 */
1689 
1690 	if (r->last == NULL) {
1691 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1692 		    ln, ppos, "..");
1693 		return ROFF_IGN;
1694 	}
1695 
1696 	switch (r->last->tok) {
1697 	case ROFF_am:
1698 		/* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1699 	case ROFF_ami:
1700 	case ROFF_de:
1701 		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1702 	case ROFF_dei:
1703 	case ROFF_ig:
1704 		break;
1705 	default:
1706 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1707 		    ln, ppos, "..");
1708 		return ROFF_IGN;
1709 	}
1710 
1711 	if (buf->buf[pos] != '\0')
1712 		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1713 		    ".. %s", buf->buf + pos);
1714 
1715 	roffnode_pop(r);
1716 	roffnode_cleanscope(r);
1717 	return ROFF_IGN;
1718 
1719 }
1720 
1721 static void
1722 roffnode_cleanscope(struct roff *r)
1723 {
1724 
1725 	while (r->last) {
1726 		if (--r->last->endspan != 0)
1727 			break;
1728 		roffnode_pop(r);
1729 	}
1730 }
1731 
1732 static void
1733 roff_ccond(struct roff *r, int ln, int ppos)
1734 {
1735 
1736 	if (NULL == r->last) {
1737 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1738 		    ln, ppos, "\\}");
1739 		return;
1740 	}
1741 
1742 	switch (r->last->tok) {
1743 	case ROFF_el:
1744 	case ROFF_ie:
1745 	case ROFF_if:
1746 		break;
1747 	default:
1748 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1749 		    ln, ppos, "\\}");
1750 		return;
1751 	}
1752 
1753 	if (r->last->endspan > -1) {
1754 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1755 		    ln, ppos, "\\}");
1756 		return;
1757 	}
1758 
1759 	roffnode_pop(r);
1760 	roffnode_cleanscope(r);
1761 	return;
1762 }
1763 
1764 static enum rofferr
1765 roff_block(ROFF_ARGS)
1766 {
1767 	const char	*name;
1768 	char		*iname, *cp;
1769 	size_t		 namesz;
1770 
1771 	/* Ignore groff compatibility mode for now. */
1772 
1773 	if (tok == ROFF_de1)
1774 		tok = ROFF_de;
1775 	else if (tok == ROFF_dei1)
1776 		tok = ROFF_dei;
1777 	else if (tok == ROFF_am1)
1778 		tok = ROFF_am;
1779 	else if (tok == ROFF_ami1)
1780 		tok = ROFF_ami;
1781 
1782 	/* Parse the macro name argument. */
1783 
1784 	cp = buf->buf + pos;
1785 	if (tok == ROFF_ig) {
1786 		iname = NULL;
1787 		namesz = 0;
1788 	} else {
1789 		iname = cp;
1790 		namesz = roff_getname(r, &cp, ln, ppos);
1791 		iname[namesz] = '\0';
1792 	}
1793 
1794 	/* Resolve the macro name argument if it is indirect. */
1795 
1796 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1797 		if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1798 			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1799 			    r->parse, ln, (int)(iname - buf->buf),
1800 			    "%.*s", (int)namesz, iname);
1801 			namesz = 0;
1802 		} else
1803 			namesz = strlen(name);
1804 	} else
1805 		name = iname;
1806 
1807 	if (namesz == 0 && tok != ROFF_ig) {
1808 		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1809 		    ln, ppos, roffs[tok].name);
1810 		return ROFF_IGN;
1811 	}
1812 
1813 	roffnode_push(r, tok, name, ln, ppos);
1814 
1815 	/*
1816 	 * At the beginning of a `de' macro, clear the existing string
1817 	 * with the same name, if there is one.  New content will be
1818 	 * appended from roff_block_text() in multiline mode.
1819 	 */
1820 
1821 	if (tok == ROFF_de || tok == ROFF_dei)
1822 		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1823 
1824 	if (*cp == '\0')
1825 		return ROFF_IGN;
1826 
1827 	/* Get the custom end marker. */
1828 
1829 	iname = cp;
1830 	namesz = roff_getname(r, &cp, ln, ppos);
1831 
1832 	/* Resolve the end marker if it is indirect. */
1833 
1834 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1835 		if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1836 			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1837 			    r->parse, ln, (int)(iname - buf->buf),
1838 			    "%.*s", (int)namesz, iname);
1839 			namesz = 0;
1840 		} else
1841 			namesz = strlen(name);
1842 	} else
1843 		name = iname;
1844 
1845 	if (namesz)
1846 		r->last->end = mandoc_strndup(name, namesz);
1847 
1848 	if (*cp != '\0')
1849 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1850 		    ln, pos, ".%s ... %s", roffs[tok].name, cp);
1851 
1852 	return ROFF_IGN;
1853 }
1854 
1855 static enum rofferr
1856 roff_block_sub(ROFF_ARGS)
1857 {
1858 	enum rofft	t;
1859 	int		i, j;
1860 
1861 	/*
1862 	 * First check whether a custom macro exists at this level.  If
1863 	 * it does, then check against it.  This is some of groff's
1864 	 * stranger behaviours.  If we encountered a custom end-scope
1865 	 * tag and that tag also happens to be a "real" macro, then we
1866 	 * need to try interpreting it again as a real macro.  If it's
1867 	 * not, then return ignore.  Else continue.
1868 	 */
1869 
1870 	if (r->last->end) {
1871 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
1872 			if (buf->buf[i] != r->last->end[j])
1873 				break;
1874 
1875 		if (r->last->end[j] == '\0' &&
1876 		    (buf->buf[i] == '\0' ||
1877 		     buf->buf[i] == ' ' ||
1878 		     buf->buf[i] == '\t')) {
1879 			roffnode_pop(r);
1880 			roffnode_cleanscope(r);
1881 
1882 			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1883 				i++;
1884 
1885 			pos = i;
1886 			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1887 			    ROFF_MAX)
1888 				return ROFF_RERUN;
1889 			return ROFF_IGN;
1890 		}
1891 	}
1892 
1893 	/*
1894 	 * If we have no custom end-query or lookup failed, then try
1895 	 * pulling it out of the hashtable.
1896 	 */
1897 
1898 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1899 
1900 	if (t != ROFF_cblock) {
1901 		if (tok != ROFF_ig)
1902 			roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1903 		return ROFF_IGN;
1904 	}
1905 
1906 	assert(roffs[t].proc);
1907 	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1908 }
1909 
1910 static enum rofferr
1911 roff_block_text(ROFF_ARGS)
1912 {
1913 
1914 	if (tok != ROFF_ig)
1915 		roff_setstr(r, r->last->name, buf->buf + pos, 2);
1916 
1917 	return ROFF_IGN;
1918 }
1919 
1920 static enum rofferr
1921 roff_cond_sub(ROFF_ARGS)
1922 {
1923 	enum rofft	 t;
1924 	char		*ep;
1925 	int		 rr;
1926 
1927 	rr = r->last->rule;
1928 	roffnode_cleanscope(r);
1929 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1930 
1931 	/*
1932 	 * Fully handle known macros when they are structurally
1933 	 * required or when the conditional evaluated to true.
1934 	 */
1935 
1936 	if ((t != ROFF_MAX) &&
1937 	    (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
1938 		assert(roffs[t].proc);
1939 		return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1940 	}
1941 
1942 	/*
1943 	 * If `\}' occurs on a macro line without a preceding macro,
1944 	 * drop the line completely.
1945 	 */
1946 
1947 	ep = buf->buf + pos;
1948 	if (ep[0] == '\\' && ep[1] == '}')
1949 		rr = 0;
1950 
1951 	/* Always check for the closing delimiter `\}'. */
1952 
1953 	while ((ep = strchr(ep, '\\')) != NULL) {
1954 		if (*(++ep) == '}') {
1955 			*ep = '&';
1956 			roff_ccond(r, ln, ep - buf->buf - 1);
1957 		}
1958 		if (*ep != '\0')
1959 			++ep;
1960 	}
1961 	return rr ? ROFF_CONT : ROFF_IGN;
1962 }
1963 
1964 static enum rofferr
1965 roff_cond_text(ROFF_ARGS)
1966 {
1967 	char		*ep;
1968 	int		 rr;
1969 
1970 	rr = r->last->rule;
1971 	roffnode_cleanscope(r);
1972 
1973 	ep = buf->buf + pos;
1974 	while ((ep = strchr(ep, '\\')) != NULL) {
1975 		if (*(++ep) == '}') {
1976 			*ep = '&';
1977 			roff_ccond(r, ln, ep - buf->buf - 1);
1978 		}
1979 		if (*ep != '\0')
1980 			++ep;
1981 	}
1982 	return rr ? ROFF_CONT : ROFF_IGN;
1983 }
1984 
1985 /* --- handling of numeric and conditional expressions -------------------- */
1986 
1987 /*
1988  * Parse a single signed integer number.  Stop at the first non-digit.
1989  * If there is at least one digit, return success and advance the
1990  * parse point, else return failure and let the parse point unchanged.
1991  * Ignore overflows, treat them just like the C language.
1992  */
1993 static int
1994 roff_getnum(const char *v, int *pos, int *res, int flags)
1995 {
1996 	int	 myres, scaled, n, p;
1997 
1998 	if (NULL == res)
1999 		res = &myres;
2000 
2001 	p = *pos;
2002 	n = v[p] == '-';
2003 	if (n || v[p] == '+')
2004 		p++;
2005 
2006 	if (flags & ROFFNUM_WHITE)
2007 		while (isspace((unsigned char)v[p]))
2008 			p++;
2009 
2010 	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2011 		*res = 10 * *res + v[p] - '0';
2012 	if (p == *pos + n)
2013 		return 0;
2014 
2015 	if (n)
2016 		*res = -*res;
2017 
2018 	/* Each number may be followed by one optional scaling unit. */
2019 
2020 	switch (v[p]) {
2021 	case 'f':
2022 		scaled = *res * 65536;
2023 		break;
2024 	case 'i':
2025 		scaled = *res * 240;
2026 		break;
2027 	case 'c':
2028 		scaled = *res * 240 / 2.54;
2029 		break;
2030 	case 'v':
2031 	case 'P':
2032 		scaled = *res * 40;
2033 		break;
2034 	case 'm':
2035 	case 'n':
2036 		scaled = *res * 24;
2037 		break;
2038 	case 'p':
2039 		scaled = *res * 10 / 3;
2040 		break;
2041 	case 'u':
2042 		scaled = *res;
2043 		break;
2044 	case 'M':
2045 		scaled = *res * 6 / 25;
2046 		break;
2047 	default:
2048 		scaled = *res;
2049 		p--;
2050 		break;
2051 	}
2052 	if (flags & ROFFNUM_SCALE)
2053 		*res = scaled;
2054 
2055 	*pos = p + 1;
2056 	return 1;
2057 }
2058 
2059 /*
2060  * Evaluate a string comparison condition.
2061  * The first character is the delimiter.
2062  * Succeed if the string up to its second occurrence
2063  * matches the string up to its third occurence.
2064  * Advance the cursor after the third occurrence
2065  * or lacking that, to the end of the line.
2066  */
2067 static int
2068 roff_evalstrcond(const char *v, int *pos)
2069 {
2070 	const char	*s1, *s2, *s3;
2071 	int		 match;
2072 
2073 	match = 0;
2074 	s1 = v + *pos;		/* initial delimiter */
2075 	s2 = s1 + 1;		/* for scanning the first string */
2076 	s3 = strchr(s2, *s1);	/* for scanning the second string */
2077 
2078 	if (NULL == s3)		/* found no middle delimiter */
2079 		goto out;
2080 
2081 	while ('\0' != *++s3) {
2082 		if (*s2 != *s3) {  /* mismatch */
2083 			s3 = strchr(s3, *s1);
2084 			break;
2085 		}
2086 		if (*s3 == *s1) {  /* found the final delimiter */
2087 			match = 1;
2088 			break;
2089 		}
2090 		s2++;
2091 	}
2092 
2093 out:
2094 	if (NULL == s3)
2095 		s3 = strchr(s2, '\0');
2096 	else if (*s3 != '\0')
2097 		s3++;
2098 	*pos = s3 - v;
2099 	return match;
2100 }
2101 
2102 /*
2103  * Evaluate an optionally negated single character, numerical,
2104  * or string condition.
2105  */
2106 static int
2107 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2108 {
2109 	char	*cp, *name;
2110 	size_t	 sz;
2111 	int	 number, savepos, wanttrue;
2112 
2113 	if ('!' == v[*pos]) {
2114 		wanttrue = 0;
2115 		(*pos)++;
2116 	} else
2117 		wanttrue = 1;
2118 
2119 	switch (v[*pos]) {
2120 	case '\0':
2121 		return 0;
2122 	case 'n':
2123 	case 'o':
2124 		(*pos)++;
2125 		return wanttrue;
2126 	case 'c':
2127 	case 'd':
2128 	case 'e':
2129 	case 't':
2130 	case 'v':
2131 		(*pos)++;
2132 		return !wanttrue;
2133 	case 'r':
2134 		cp = name = v + ++*pos;
2135 		sz = roff_getname(r, &cp, ln, *pos);
2136 		*pos = cp - v;
2137 		return (sz && roff_hasregn(r, name, sz)) == wanttrue;
2138 	default:
2139 		break;
2140 	}
2141 
2142 	savepos = *pos;
2143 	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2144 		return (number > 0) == wanttrue;
2145 	else if (*pos == savepos)
2146 		return roff_evalstrcond(v, pos) == wanttrue;
2147 	else
2148 		return 0;
2149 }
2150 
2151 static enum rofferr
2152 roff_line_ignore(ROFF_ARGS)
2153 {
2154 
2155 	return ROFF_IGN;
2156 }
2157 
2158 static enum rofferr
2159 roff_insec(ROFF_ARGS)
2160 {
2161 
2162 	mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2163 	    ln, ppos, roffs[tok].name);
2164 	return ROFF_IGN;
2165 }
2166 
2167 static enum rofferr
2168 roff_unsupp(ROFF_ARGS)
2169 {
2170 
2171 	mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2172 	    ln, ppos, roffs[tok].name);
2173 	return ROFF_IGN;
2174 }
2175 
2176 static enum rofferr
2177 roff_cond(ROFF_ARGS)
2178 {
2179 
2180 	roffnode_push(r, tok, NULL, ln, ppos);
2181 
2182 	/*
2183 	 * An `.el' has no conditional body: it will consume the value
2184 	 * of the current rstack entry set in prior `ie' calls or
2185 	 * defaults to DENY.
2186 	 *
2187 	 * If we're not an `el', however, then evaluate the conditional.
2188 	 */
2189 
2190 	r->last->rule = tok == ROFF_el ?
2191 	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2192 	    roff_evalcond(r, ln, buf->buf, &pos);
2193 
2194 	/*
2195 	 * An if-else will put the NEGATION of the current evaluated
2196 	 * conditional into the stack of rules.
2197 	 */
2198 
2199 	if (tok == ROFF_ie) {
2200 		if (r->rstackpos + 1 == r->rstacksz) {
2201 			r->rstacksz += 16;
2202 			r->rstack = mandoc_reallocarray(r->rstack,
2203 			    r->rstacksz, sizeof(int));
2204 		}
2205 		r->rstack[++r->rstackpos] = !r->last->rule;
2206 	}
2207 
2208 	/* If the parent has false as its rule, then so do we. */
2209 
2210 	if (r->last->parent && !r->last->parent->rule)
2211 		r->last->rule = 0;
2212 
2213 	/*
2214 	 * Determine scope.
2215 	 * If there is nothing on the line after the conditional,
2216 	 * not even whitespace, use next-line scope.
2217 	 */
2218 
2219 	if (buf->buf[pos] == '\0') {
2220 		r->last->endspan = 2;
2221 		goto out;
2222 	}
2223 
2224 	while (buf->buf[pos] == ' ')
2225 		pos++;
2226 
2227 	/* An opening brace requests multiline scope. */
2228 
2229 	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2230 		r->last->endspan = -1;
2231 		pos += 2;
2232 		while (buf->buf[pos] == ' ')
2233 			pos++;
2234 		goto out;
2235 	}
2236 
2237 	/*
2238 	 * Anything else following the conditional causes
2239 	 * single-line scope.  Warn if the scope contains
2240 	 * nothing but trailing whitespace.
2241 	 */
2242 
2243 	if (buf->buf[pos] == '\0')
2244 		mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2245 		    ln, ppos, roffs[tok].name);
2246 
2247 	r->last->endspan = 1;
2248 
2249 out:
2250 	*offs = pos;
2251 	return ROFF_RERUN;
2252 }
2253 
2254 static enum rofferr
2255 roff_ds(ROFF_ARGS)
2256 {
2257 	char		*string;
2258 	const char	*name;
2259 	size_t		 namesz;
2260 
2261 	/* Ignore groff compatibility mode for now. */
2262 
2263 	if (tok == ROFF_ds1)
2264 		tok = ROFF_ds;
2265 	else if (tok == ROFF_as1)
2266 		tok = ROFF_as;
2267 
2268 	/*
2269 	 * The first word is the name of the string.
2270 	 * If it is empty or terminated by an escape sequence,
2271 	 * abort the `ds' request without defining anything.
2272 	 */
2273 
2274 	name = string = buf->buf + pos;
2275 	if (*name == '\0')
2276 		return ROFF_IGN;
2277 
2278 	namesz = roff_getname(r, &string, ln, pos);
2279 	if (name[namesz] == '\\')
2280 		return ROFF_IGN;
2281 
2282 	/* Read past the initial double-quote, if any. */
2283 	if (*string == '"')
2284 		string++;
2285 
2286 	/* The rest is the value. */
2287 	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2288 	    ROFF_as == tok);
2289 	return ROFF_IGN;
2290 }
2291 
2292 /*
2293  * Parse a single operator, one or two characters long.
2294  * If the operator is recognized, return success and advance the
2295  * parse point, else return failure and let the parse point unchanged.
2296  */
2297 static int
2298 roff_getop(const char *v, int *pos, char *res)
2299 {
2300 
2301 	*res = v[*pos];
2302 
2303 	switch (*res) {
2304 	case '+':
2305 	case '-':
2306 	case '*':
2307 	case '/':
2308 	case '%':
2309 	case '&':
2310 	case ':':
2311 		break;
2312 	case '<':
2313 		switch (v[*pos + 1]) {
2314 		case '=':
2315 			*res = 'l';
2316 			(*pos)++;
2317 			break;
2318 		case '>':
2319 			*res = '!';
2320 			(*pos)++;
2321 			break;
2322 		case '?':
2323 			*res = 'i';
2324 			(*pos)++;
2325 			break;
2326 		default:
2327 			break;
2328 		}
2329 		break;
2330 	case '>':
2331 		switch (v[*pos + 1]) {
2332 		case '=':
2333 			*res = 'g';
2334 			(*pos)++;
2335 			break;
2336 		case '?':
2337 			*res = 'a';
2338 			(*pos)++;
2339 			break;
2340 		default:
2341 			break;
2342 		}
2343 		break;
2344 	case '=':
2345 		if ('=' == v[*pos + 1])
2346 			(*pos)++;
2347 		break;
2348 	default:
2349 		return 0;
2350 	}
2351 	(*pos)++;
2352 
2353 	return *res;
2354 }
2355 
2356 /*
2357  * Evaluate either a parenthesized numeric expression
2358  * or a single signed integer number.
2359  */
2360 static int
2361 roff_evalpar(struct roff *r, int ln,
2362 	const char *v, int *pos, int *res, int flags)
2363 {
2364 
2365 	if ('(' != v[*pos])
2366 		return roff_getnum(v, pos, res, flags);
2367 
2368 	(*pos)++;
2369 	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2370 		return 0;
2371 
2372 	/*
2373 	 * Omission of the closing parenthesis
2374 	 * is an error in validation mode,
2375 	 * but ignored in evaluation mode.
2376 	 */
2377 
2378 	if (')' == v[*pos])
2379 		(*pos)++;
2380 	else if (NULL == res)
2381 		return 0;
2382 
2383 	return 1;
2384 }
2385 
2386 /*
2387  * Evaluate a complete numeric expression.
2388  * Proceed left to right, there is no concept of precedence.
2389  */
2390 static int
2391 roff_evalnum(struct roff *r, int ln, const char *v,
2392 	int *pos, int *res, int flags)
2393 {
2394 	int		 mypos, operand2;
2395 	char		 operator;
2396 
2397 	if (NULL == pos) {
2398 		mypos = 0;
2399 		pos = &mypos;
2400 	}
2401 
2402 	if (flags & ROFFNUM_WHITE)
2403 		while (isspace((unsigned char)v[*pos]))
2404 			(*pos)++;
2405 
2406 	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2407 		return 0;
2408 
2409 	while (1) {
2410 		if (flags & ROFFNUM_WHITE)
2411 			while (isspace((unsigned char)v[*pos]))
2412 				(*pos)++;
2413 
2414 		if ( ! roff_getop(v, pos, &operator))
2415 			break;
2416 
2417 		if (flags & ROFFNUM_WHITE)
2418 			while (isspace((unsigned char)v[*pos]))
2419 				(*pos)++;
2420 
2421 		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2422 			return 0;
2423 
2424 		if (flags & ROFFNUM_WHITE)
2425 			while (isspace((unsigned char)v[*pos]))
2426 				(*pos)++;
2427 
2428 		if (NULL == res)
2429 			continue;
2430 
2431 		switch (operator) {
2432 		case '+':
2433 			*res += operand2;
2434 			break;
2435 		case '-':
2436 			*res -= operand2;
2437 			break;
2438 		case '*':
2439 			*res *= operand2;
2440 			break;
2441 		case '/':
2442 			if (operand2 == 0) {
2443 				mandoc_msg(MANDOCERR_DIVZERO,
2444 					r->parse, ln, *pos, v);
2445 				*res = 0;
2446 				break;
2447 			}
2448 			*res /= operand2;
2449 			break;
2450 		case '%':
2451 			if (operand2 == 0) {
2452 				mandoc_msg(MANDOCERR_DIVZERO,
2453 					r->parse, ln, *pos, v);
2454 				*res = 0;
2455 				break;
2456 			}
2457 			*res %= operand2;
2458 			break;
2459 		case '<':
2460 			*res = *res < operand2;
2461 			break;
2462 		case '>':
2463 			*res = *res > operand2;
2464 			break;
2465 		case 'l':
2466 			*res = *res <= operand2;
2467 			break;
2468 		case 'g':
2469 			*res = *res >= operand2;
2470 			break;
2471 		case '=':
2472 			*res = *res == operand2;
2473 			break;
2474 		case '!':
2475 			*res = *res != operand2;
2476 			break;
2477 		case '&':
2478 			*res = *res && operand2;
2479 			break;
2480 		case ':':
2481 			*res = *res || operand2;
2482 			break;
2483 		case 'i':
2484 			if (operand2 < *res)
2485 				*res = operand2;
2486 			break;
2487 		case 'a':
2488 			if (operand2 > *res)
2489 				*res = operand2;
2490 			break;
2491 		default:
2492 			abort();
2493 		}
2494 	}
2495 	return 1;
2496 }
2497 
2498 /* --- register management ------------------------------------------------ */
2499 
2500 void
2501 roff_setreg(struct roff *r, const char *name, int val, char sign)
2502 {
2503 	struct roffreg	*reg;
2504 
2505 	/* Search for an existing register with the same name. */
2506 	reg = r->regtab;
2507 
2508 	while (reg && strcmp(name, reg->key.p))
2509 		reg = reg->next;
2510 
2511 	if (NULL == reg) {
2512 		/* Create a new register. */
2513 		reg = mandoc_malloc(sizeof(struct roffreg));
2514 		reg->key.p = mandoc_strdup(name);
2515 		reg->key.sz = strlen(name);
2516 		reg->val = 0;
2517 		reg->next = r->regtab;
2518 		r->regtab = reg;
2519 	}
2520 
2521 	if ('+' == sign)
2522 		reg->val += val;
2523 	else if ('-' == sign)
2524 		reg->val -= val;
2525 	else
2526 		reg->val = val;
2527 }
2528 
2529 /*
2530  * Handle some predefined read-only number registers.
2531  * For now, return -1 if the requested register is not predefined;
2532  * in case a predefined read-only register having the value -1
2533  * were to turn up, another special value would have to be chosen.
2534  */
2535 static int
2536 roff_getregro(const struct roff *r, const char *name)
2537 {
2538 
2539 	switch (*name) {
2540 	case '$':  /* Number of arguments of the last macro evaluated. */
2541 		return r->argc;
2542 	case 'A':  /* ASCII approximation mode is always off. */
2543 		return 0;
2544 	case 'g':  /* Groff compatibility mode is always on. */
2545 		return 1;
2546 	case 'H':  /* Fixed horizontal resolution. */
2547 		return 24;
2548 	case 'j':  /* Always adjust left margin only. */
2549 		return 0;
2550 	case 'T':  /* Some output device is always defined. */
2551 		return 1;
2552 	case 'V':  /* Fixed vertical resolution. */
2553 		return 40;
2554 	default:
2555 		return -1;
2556 	}
2557 }
2558 
2559 int
2560 roff_getreg(const struct roff *r, const char *name)
2561 {
2562 	struct roffreg	*reg;
2563 	int		 val;
2564 
2565 	if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2566 		val = roff_getregro(r, name + 1);
2567 		if (-1 != val)
2568 			return val;
2569 	}
2570 
2571 	for (reg = r->regtab; reg; reg = reg->next)
2572 		if (0 == strcmp(name, reg->key.p))
2573 			return reg->val;
2574 
2575 	return 0;
2576 }
2577 
2578 static int
2579 roff_getregn(const struct roff *r, const char *name, size_t len)
2580 {
2581 	struct roffreg	*reg;
2582 	int		 val;
2583 
2584 	if ('.' == name[0] && 2 == len) {
2585 		val = roff_getregro(r, name + 1);
2586 		if (-1 != val)
2587 			return val;
2588 	}
2589 
2590 	for (reg = r->regtab; reg; reg = reg->next)
2591 		if (len == reg->key.sz &&
2592 		    0 == strncmp(name, reg->key.p, len))
2593 			return reg->val;
2594 
2595 	return 0;
2596 }
2597 
2598 static int
2599 roff_hasregn(const struct roff *r, const char *name, size_t len)
2600 {
2601 	struct roffreg	*reg;
2602 	int		 val;
2603 
2604 	if ('.' == name[0] && 2 == len) {
2605 		val = roff_getregro(r, name + 1);
2606 		if (-1 != val)
2607 			return 1;
2608 	}
2609 
2610 	for (reg = r->regtab; reg; reg = reg->next)
2611 		if (len == reg->key.sz &&
2612 		    0 == strncmp(name, reg->key.p, len))
2613 			return 1;
2614 
2615 	return 0;
2616 }
2617 
2618 static void
2619 roff_freereg(struct roffreg *reg)
2620 {
2621 	struct roffreg	*old_reg;
2622 
2623 	while (NULL != reg) {
2624 		free(reg->key.p);
2625 		old_reg = reg;
2626 		reg = reg->next;
2627 		free(old_reg);
2628 	}
2629 }
2630 
2631 static enum rofferr
2632 roff_nr(ROFF_ARGS)
2633 {
2634 	char		*key, *val;
2635 	size_t		 keysz;
2636 	int		 iv;
2637 	char		 sign;
2638 
2639 	key = val = buf->buf + pos;
2640 	if (*key == '\0')
2641 		return ROFF_IGN;
2642 
2643 	keysz = roff_getname(r, &val, ln, pos);
2644 	if (key[keysz] == '\\')
2645 		return ROFF_IGN;
2646 	key[keysz] = '\0';
2647 
2648 	sign = *val;
2649 	if (sign == '+' || sign == '-')
2650 		val++;
2651 
2652 	if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2653 		roff_setreg(r, key, iv, sign);
2654 
2655 	return ROFF_IGN;
2656 }
2657 
2658 static enum rofferr
2659 roff_rr(ROFF_ARGS)
2660 {
2661 	struct roffreg	*reg, **prev;
2662 	char		*name, *cp;
2663 	size_t		 namesz;
2664 
2665 	name = cp = buf->buf + pos;
2666 	if (*name == '\0')
2667 		return ROFF_IGN;
2668 	namesz = roff_getname(r, &cp, ln, pos);
2669 	name[namesz] = '\0';
2670 
2671 	prev = &r->regtab;
2672 	while (1) {
2673 		reg = *prev;
2674 		if (reg == NULL || !strcmp(name, reg->key.p))
2675 			break;
2676 		prev = &reg->next;
2677 	}
2678 	if (reg != NULL) {
2679 		*prev = reg->next;
2680 		free(reg->key.p);
2681 		free(reg);
2682 	}
2683 	return ROFF_IGN;
2684 }
2685 
2686 /* --- handler functions for roff requests -------------------------------- */
2687 
2688 static enum rofferr
2689 roff_rm(ROFF_ARGS)
2690 {
2691 	const char	 *name;
2692 	char		 *cp;
2693 	size_t		  namesz;
2694 
2695 	cp = buf->buf + pos;
2696 	while (*cp != '\0') {
2697 		name = cp;
2698 		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2699 		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2700 		if (name[namesz] == '\\')
2701 			break;
2702 	}
2703 	return ROFF_IGN;
2704 }
2705 
2706 static enum rofferr
2707 roff_it(ROFF_ARGS)
2708 {
2709 	int		 iv;
2710 
2711 	/* Parse the number of lines. */
2712 
2713 	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2714 		mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2715 		    ln, ppos, buf->buf + 1);
2716 		return ROFF_IGN;
2717 	}
2718 
2719 	while (isspace((unsigned char)buf->buf[pos]))
2720 		pos++;
2721 
2722 	/*
2723 	 * Arm the input line trap.
2724 	 * Special-casing "an-trap" is an ugly workaround to cope
2725 	 * with DocBook stupidly fiddling with man(7) internals.
2726 	 */
2727 
2728 	roffit_lines = iv;
2729 	roffit_macro = mandoc_strdup(iv != 1 ||
2730 	    strcmp(buf->buf + pos, "an-trap") ?
2731 	    buf->buf + pos : "br");
2732 	return ROFF_IGN;
2733 }
2734 
2735 static enum rofferr
2736 roff_Dd(ROFF_ARGS)
2737 {
2738 	const char *const	*cp;
2739 
2740 	if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
2741 		for (cp = __mdoc_reserved; *cp; cp++)
2742 			roff_setstr(r, *cp, NULL, 0);
2743 
2744 	if (r->format == 0)
2745 		r->format = MPARSE_MDOC;
2746 
2747 	return ROFF_CONT;
2748 }
2749 
2750 static enum rofferr
2751 roff_TH(ROFF_ARGS)
2752 {
2753 	const char *const	*cp;
2754 
2755 	if ((r->options & MPARSE_QUICK) == 0)
2756 		for (cp = __man_reserved; *cp; cp++)
2757 			roff_setstr(r, *cp, NULL, 0);
2758 
2759 	if (r->format == 0)
2760 		r->format = MPARSE_MAN;
2761 
2762 	return ROFF_CONT;
2763 }
2764 
2765 static enum rofferr
2766 roff_TE(ROFF_ARGS)
2767 {
2768 
2769 	if (NULL == r->tbl)
2770 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2771 		    ln, ppos, "TE");
2772 	else if ( ! tbl_end(&r->tbl)) {
2773 		free(buf->buf);
2774 		buf->buf = mandoc_strdup(".sp");
2775 		buf->sz = 4;
2776 		return ROFF_REPARSE;
2777 	}
2778 	return ROFF_IGN;
2779 }
2780 
2781 static enum rofferr
2782 roff_T_(ROFF_ARGS)
2783 {
2784 
2785 	if (NULL == r->tbl)
2786 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2787 		    ln, ppos, "T&");
2788 	else
2789 		tbl_restart(ppos, ln, r->tbl);
2790 
2791 	return ROFF_IGN;
2792 }
2793 
2794 /*
2795  * Handle in-line equation delimiters.
2796  */
2797 static enum rofferr
2798 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2799 {
2800 	char		*cp1, *cp2;
2801 	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2802 
2803 	/*
2804 	 * Outside equations, look for an opening delimiter.
2805 	 * If we are inside an equation, we already know it is
2806 	 * in-line, or this function wouldn't have been called;
2807 	 * so look for a closing delimiter.
2808 	 */
2809 
2810 	cp1 = buf->buf + pos;
2811 	cp2 = strchr(cp1, r->eqn == NULL ?
2812 	    r->last_eqn->odelim : r->last_eqn->cdelim);
2813 	if (cp2 == NULL)
2814 		return ROFF_CONT;
2815 
2816 	*cp2++ = '\0';
2817 	bef_pr = bef_nl = aft_nl = aft_pr = "";
2818 
2819 	/* Handle preceding text, protecting whitespace. */
2820 
2821 	if (*buf->buf != '\0') {
2822 		if (r->eqn == NULL)
2823 			bef_pr = "\\&";
2824 		bef_nl = "\n";
2825 	}
2826 
2827 	/*
2828 	 * Prepare replacing the delimiter with an equation macro
2829 	 * and drop leading white space from the equation.
2830 	 */
2831 
2832 	if (r->eqn == NULL) {
2833 		while (*cp2 == ' ')
2834 			cp2++;
2835 		mac = ".EQ";
2836 	} else
2837 		mac = ".EN";
2838 
2839 	/* Handle following text, protecting whitespace. */
2840 
2841 	if (*cp2 != '\0') {
2842 		aft_nl = "\n";
2843 		if (r->eqn != NULL)
2844 			aft_pr = "\\&";
2845 	}
2846 
2847 	/* Do the actual replacement. */
2848 
2849 	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2850 	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2851 	free(buf->buf);
2852 	buf->buf = cp1;
2853 
2854 	/* Toggle the in-line state of the eqn subsystem. */
2855 
2856 	r->eqn_inline = r->eqn == NULL;
2857 	return ROFF_REPARSE;
2858 }
2859 
2860 static enum rofferr
2861 roff_EQ(ROFF_ARGS)
2862 {
2863 	struct eqn_node *e;
2864 
2865 	assert(r->eqn == NULL);
2866 	e = eqn_alloc(ppos, ln, r->parse);
2867 
2868 	if (r->last_eqn) {
2869 		r->last_eqn->next = e;
2870 		e->delim = r->last_eqn->delim;
2871 		e->odelim = r->last_eqn->odelim;
2872 		e->cdelim = r->last_eqn->cdelim;
2873 	} else
2874 		r->first_eqn = r->last_eqn = e;
2875 
2876 	r->eqn = r->last_eqn = e;
2877 
2878 	if (buf->buf[pos] != '\0')
2879 		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2880 		    ".EQ %s", buf->buf + pos);
2881 
2882 	return ROFF_IGN;
2883 }
2884 
2885 static enum rofferr
2886 roff_EN(ROFF_ARGS)
2887 {
2888 
2889 	mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2890 	return ROFF_IGN;
2891 }
2892 
2893 static enum rofferr
2894 roff_TS(ROFF_ARGS)
2895 {
2896 	struct tbl_node	*tbl;
2897 
2898 	if (r->tbl) {
2899 		mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2900 		    ln, ppos, "TS breaks TS");
2901 		tbl_end(&r->tbl);
2902 	}
2903 
2904 	tbl = tbl_alloc(ppos, ln, r->parse);
2905 
2906 	if (r->last_tbl)
2907 		r->last_tbl->next = tbl;
2908 	else
2909 		r->first_tbl = r->last_tbl = tbl;
2910 
2911 	r->tbl = r->last_tbl = tbl;
2912 	return ROFF_IGN;
2913 }
2914 
2915 static enum rofferr
2916 roff_brp(ROFF_ARGS)
2917 {
2918 
2919 	buf->buf[pos - 1] = '\0';
2920 	return ROFF_CONT;
2921 }
2922 
2923 static enum rofferr
2924 roff_cc(ROFF_ARGS)
2925 {
2926 	const char	*p;
2927 
2928 	p = buf->buf + pos;
2929 
2930 	if (*p == '\0' || (r->control = *p++) == '.')
2931 		r->control = 0;
2932 
2933 	if (*p != '\0')
2934 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2935 		    ln, p - buf->buf, "cc ... %s", p);
2936 
2937 	return ROFF_IGN;
2938 }
2939 
2940 static enum rofferr
2941 roff_tr(ROFF_ARGS)
2942 {
2943 	const char	*p, *first, *second;
2944 	size_t		 fsz, ssz;
2945 	enum mandoc_esc	 esc;
2946 
2947 	p = buf->buf + pos;
2948 
2949 	if (*p == '\0') {
2950 		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
2951 		return ROFF_IGN;
2952 	}
2953 
2954 	while (*p != '\0') {
2955 		fsz = ssz = 1;
2956 
2957 		first = p++;
2958 		if (*first == '\\') {
2959 			esc = mandoc_escape(&p, NULL, NULL);
2960 			if (esc == ESCAPE_ERROR) {
2961 				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2962 				    ln, (int)(p - buf->buf), first);
2963 				return ROFF_IGN;
2964 			}
2965 			fsz = (size_t)(p - first);
2966 		}
2967 
2968 		second = p++;
2969 		if (*second == '\\') {
2970 			esc = mandoc_escape(&p, NULL, NULL);
2971 			if (esc == ESCAPE_ERROR) {
2972 				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2973 				    ln, (int)(p - buf->buf), second);
2974 				return ROFF_IGN;
2975 			}
2976 			ssz = (size_t)(p - second);
2977 		} else if (*second == '\0') {
2978 			mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
2979 			    ln, first - buf->buf, "tr %s", first);
2980 			second = " ";
2981 			p--;
2982 		}
2983 
2984 		if (fsz > 1) {
2985 			roff_setstrn(&r->xmbtab, first, fsz,
2986 			    second, ssz, 0);
2987 			continue;
2988 		}
2989 
2990 		if (r->xtab == NULL)
2991 			r->xtab = mandoc_calloc(128,
2992 			    sizeof(struct roffstr));
2993 
2994 		free(r->xtab[(int)*first].p);
2995 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2996 		r->xtab[(int)*first].sz = ssz;
2997 	}
2998 
2999 	return ROFF_IGN;
3000 }
3001 
3002 static enum rofferr
3003 roff_so(ROFF_ARGS)
3004 {
3005 	char *name, *cp;
3006 
3007 	name = buf->buf + pos;
3008 	mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3009 
3010 	/*
3011 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3012 	 * opening anything that's not in our cwd or anything beneath
3013 	 * it.  Thus, explicitly disallow traversing up the file-system
3014 	 * or using absolute paths.
3015 	 */
3016 
3017 	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3018 		mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3019 		    ".so %s", name);
3020 		buf->sz = mandoc_asprintf(&cp,
3021 		    ".sp\nSee the file %s.\n.sp", name) + 1;
3022 		free(buf->buf);
3023 		buf->buf = cp;
3024 		*offs = 0;
3025 		return ROFF_REPARSE;
3026 	}
3027 
3028 	*offs = pos;
3029 	return ROFF_SO;
3030 }
3031 
3032 /* --- user defined strings and macros ------------------------------------ */
3033 
3034 static enum rofferr
3035 roff_userdef(ROFF_ARGS)
3036 {
3037 	const char	 *arg[9], *ap;
3038 	char		 *cp, *n1, *n2;
3039 	int		  i, ib, ie;
3040 	size_t		  asz, rsz;
3041 
3042 	/*
3043 	 * Collect pointers to macro argument strings
3044 	 * and NUL-terminate them.
3045 	 */
3046 
3047 	r->argc = 0;
3048 	cp = buf->buf + pos;
3049 	for (i = 0; i < 9; i++) {
3050 		if (*cp == '\0')
3051 			arg[i] = "";
3052 		else {
3053 			arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3054 			r->argc = i + 1;
3055 		}
3056 	}
3057 
3058 	/*
3059 	 * Expand macro arguments.
3060 	 */
3061 
3062 	buf->sz = strlen(r->current_string) + 1;
3063 	n1 = cp = mandoc_malloc(buf->sz);
3064 	memcpy(n1, r->current_string, buf->sz);
3065 	while (*cp != '\0') {
3066 
3067 		/* Scan ahead for the next argument invocation. */
3068 
3069 		if (*cp++ != '\\')
3070 			continue;
3071 		if (*cp++ != '$')
3072 			continue;
3073 		if (*cp == '*') {  /* \\$* inserts all arguments */
3074 			ib = 0;
3075 			ie = r->argc - 1;
3076 		} else {  /* \\$1 .. \\$9 insert one argument */
3077 			ib = ie = *cp - '1';
3078 			if (ib < 0 || ib > 8)
3079 				continue;
3080 		}
3081 		cp -= 2;
3082 
3083 		/*
3084 		 * Determine the size of the expanded argument,
3085 		 * taking escaping of quotes into account.
3086 		 */
3087 
3088 		asz = ie > ib ? ie - ib : 0;  /* for blanks */
3089 		for (i = ib; i <= ie; i++) {
3090 			for (ap = arg[i]; *ap != '\0'; ap++) {
3091 				asz++;
3092 				if (*ap == '"')
3093 					asz += 3;
3094 			}
3095 		}
3096 		if (asz != 3) {
3097 
3098 			/*
3099 			 * Determine the size of the rest of the
3100 			 * unexpanded macro, including the NUL.
3101 			 */
3102 
3103 			rsz = buf->sz - (cp - n1) - 3;
3104 
3105 			/*
3106 			 * When shrinking, move before
3107 			 * releasing the storage.
3108 			 */
3109 
3110 			if (asz < 3)
3111 				memmove(cp + asz, cp + 3, rsz);
3112 
3113 			/*
3114 			 * Resize the storage for the macro
3115 			 * and readjust the parse pointer.
3116 			 */
3117 
3118 			buf->sz += asz - 3;
3119 			n2 = mandoc_realloc(n1, buf->sz);
3120 			cp = n2 + (cp - n1);
3121 			n1 = n2;
3122 
3123 			/*
3124 			 * When growing, make room
3125 			 * for the expanded argument.
3126 			 */
3127 
3128 			if (asz > 3)
3129 				memmove(cp + asz, cp + 3, rsz);
3130 		}
3131 
3132 		/* Copy the expanded argument, escaping quotes. */
3133 
3134 		n2 = cp;
3135 		for (i = ib; i <= ie; i++) {
3136 			for (ap = arg[i]; *ap != '\0'; ap++) {
3137 				if (*ap == '"') {
3138 					memcpy(n2, "\\(dq", 4);
3139 					n2 += 4;
3140 				} else
3141 					*n2++ = *ap;
3142 			}
3143 			if (i < ie)
3144 				*n2++ = ' ';
3145 		}
3146 	}
3147 
3148 	/*
3149 	 * Replace the macro invocation
3150 	 * by the expanded macro.
3151 	 */
3152 
3153 	free(buf->buf);
3154 	buf->buf = n1;
3155 	*offs = 0;
3156 
3157 	return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3158 	   ROFF_REPARSE : ROFF_APPEND;
3159 }
3160 
3161 static size_t
3162 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3163 {
3164 	char	 *name, *cp;
3165 	size_t	  namesz;
3166 
3167 	name = *cpp;
3168 	if ('\0' == *name)
3169 		return 0;
3170 
3171 	/* Read until end of name and terminate it with NUL. */
3172 	for (cp = name; 1; cp++) {
3173 		if ('\0' == *cp || ' ' == *cp) {
3174 			namesz = cp - name;
3175 			break;
3176 		}
3177 		if ('\\' != *cp)
3178 			continue;
3179 		namesz = cp - name;
3180 		if ('{' == cp[1] || '}' == cp[1])
3181 			break;
3182 		cp++;
3183 		if ('\\' == *cp)
3184 			continue;
3185 		mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3186 		    "%.*s", (int)(cp - name + 1), name);
3187 		mandoc_escape((const char **)&cp, NULL, NULL);
3188 		break;
3189 	}
3190 
3191 	/* Read past spaces. */
3192 	while (' ' == *cp)
3193 		cp++;
3194 
3195 	*cpp = cp;
3196 	return namesz;
3197 }
3198 
3199 /*
3200  * Store *string into the user-defined string called *name.
3201  * To clear an existing entry, call with (*r, *name, NULL, 0).
3202  * append == 0: replace mode
3203  * append == 1: single-line append mode
3204  * append == 2: multiline append mode, append '\n' after each call
3205  */
3206 static void
3207 roff_setstr(struct roff *r, const char *name, const char *string,
3208 	int append)
3209 {
3210 
3211 	roff_setstrn(&r->strtab, name, strlen(name), string,
3212 	    string ? strlen(string) : 0, append);
3213 }
3214 
3215 static void
3216 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3217 		const char *string, size_t stringsz, int append)
3218 {
3219 	struct roffkv	*n;
3220 	char		*c;
3221 	int		 i;
3222 	size_t		 oldch, newch;
3223 
3224 	/* Search for an existing string with the same name. */
3225 	n = *r;
3226 
3227 	while (n && (namesz != n->key.sz ||
3228 			strncmp(n->key.p, name, namesz)))
3229 		n = n->next;
3230 
3231 	if (NULL == n) {
3232 		/* Create a new string table entry. */
3233 		n = mandoc_malloc(sizeof(struct roffkv));
3234 		n->key.p = mandoc_strndup(name, namesz);
3235 		n->key.sz = namesz;
3236 		n->val.p = NULL;
3237 		n->val.sz = 0;
3238 		n->next = *r;
3239 		*r = n;
3240 	} else if (0 == append) {
3241 		free(n->val.p);
3242 		n->val.p = NULL;
3243 		n->val.sz = 0;
3244 	}
3245 
3246 	if (NULL == string)
3247 		return;
3248 
3249 	/*
3250 	 * One additional byte for the '\n' in multiline mode,
3251 	 * and one for the terminating '\0'.
3252 	 */
3253 	newch = stringsz + (1 < append ? 2u : 1u);
3254 
3255 	if (NULL == n->val.p) {
3256 		n->val.p = mandoc_malloc(newch);
3257 		*n->val.p = '\0';
3258 		oldch = 0;
3259 	} else {
3260 		oldch = n->val.sz;
3261 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3262 	}
3263 
3264 	/* Skip existing content in the destination buffer. */
3265 	c = n->val.p + (int)oldch;
3266 
3267 	/* Append new content to the destination buffer. */
3268 	i = 0;
3269 	while (i < (int)stringsz) {
3270 		/*
3271 		 * Rudimentary roff copy mode:
3272 		 * Handle escaped backslashes.
3273 		 */
3274 		if ('\\' == string[i] && '\\' == string[i + 1])
3275 			i++;
3276 		*c++ = string[i++];
3277 	}
3278 
3279 	/* Append terminating bytes. */
3280 	if (1 < append)
3281 		*c++ = '\n';
3282 
3283 	*c = '\0';
3284 	n->val.sz = (int)(c - n->val.p);
3285 }
3286 
3287 static const char *
3288 roff_getstrn(const struct roff *r, const char *name, size_t len)
3289 {
3290 	const struct roffkv *n;
3291 	int i;
3292 
3293 	for (n = r->strtab; n; n = n->next)
3294 		if (0 == strncmp(name, n->key.p, len) &&
3295 		    '\0' == n->key.p[(int)len])
3296 			return n->val.p;
3297 
3298 	for (i = 0; i < PREDEFS_MAX; i++)
3299 		if (0 == strncmp(name, predefs[i].name, len) &&
3300 				'\0' == predefs[i].name[(int)len])
3301 			return predefs[i].str;
3302 
3303 	return NULL;
3304 }
3305 
3306 static void
3307 roff_freestr(struct roffkv *r)
3308 {
3309 	struct roffkv	 *n, *nn;
3310 
3311 	for (n = r; n; n = nn) {
3312 		free(n->key.p);
3313 		free(n->val.p);
3314 		nn = n->next;
3315 		free(n);
3316 	}
3317 }
3318 
3319 /* --- accessors and utility functions ------------------------------------ */
3320 
3321 const struct tbl_span *
3322 roff_span(const struct roff *r)
3323 {
3324 
3325 	return r->tbl ? tbl_span(r->tbl) : NULL;
3326 }
3327 
3328 const struct eqn *
3329 roff_eqn(const struct roff *r)
3330 {
3331 
3332 	return r->last_eqn ? &r->last_eqn->eqn : NULL;
3333 }
3334 
3335 /*
3336  * Duplicate an input string, making the appropriate character
3337  * conversations (as stipulated by `tr') along the way.
3338  * Returns a heap-allocated string with all the replacements made.
3339  */
3340 char *
3341 roff_strdup(const struct roff *r, const char *p)
3342 {
3343 	const struct roffkv *cp;
3344 	char		*res;
3345 	const char	*pp;
3346 	size_t		 ssz, sz;
3347 	enum mandoc_esc	 esc;
3348 
3349 	if (NULL == r->xmbtab && NULL == r->xtab)
3350 		return mandoc_strdup(p);
3351 	else if ('\0' == *p)
3352 		return mandoc_strdup("");
3353 
3354 	/*
3355 	 * Step through each character looking for term matches
3356 	 * (remember that a `tr' can be invoked with an escape, which is
3357 	 * a glyph but the escape is multi-character).
3358 	 * We only do this if the character hash has been initialised
3359 	 * and the string is >0 length.
3360 	 */
3361 
3362 	res = NULL;
3363 	ssz = 0;
3364 
3365 	while ('\0' != *p) {
3366 		assert((unsigned int)*p < 128);
3367 		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
3368 			sz = r->xtab[(int)*p].sz;
3369 			res = mandoc_realloc(res, ssz + sz + 1);
3370 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3371 			ssz += sz;
3372 			p++;
3373 			continue;
3374 		} else if ('\\' != *p) {
3375 			res = mandoc_realloc(res, ssz + 2);
3376 			res[ssz++] = *p++;
3377 			continue;
3378 		}
3379 
3380 		/* Search for term matches. */
3381 		for (cp = r->xmbtab; cp; cp = cp->next)
3382 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
3383 				break;
3384 
3385 		if (NULL != cp) {
3386 			/*
3387 			 * A match has been found.
3388 			 * Append the match to the array and move
3389 			 * forward by its keysize.
3390 			 */
3391 			res = mandoc_realloc(res,
3392 			    ssz + cp->val.sz + 1);
3393 			memcpy(res + ssz, cp->val.p, cp->val.sz);
3394 			ssz += cp->val.sz;
3395 			p += (int)cp->key.sz;
3396 			continue;
3397 		}
3398 
3399 		/*
3400 		 * Handle escapes carefully: we need to copy
3401 		 * over just the escape itself, or else we might
3402 		 * do replacements within the escape itself.
3403 		 * Make sure to pass along the bogus string.
3404 		 */
3405 		pp = p++;
3406 		esc = mandoc_escape(&p, NULL, NULL);
3407 		if (ESCAPE_ERROR == esc) {
3408 			sz = strlen(pp);
3409 			res = mandoc_realloc(res, ssz + sz + 1);
3410 			memcpy(res + ssz, pp, sz);
3411 			break;
3412 		}
3413 		/*
3414 		 * We bail out on bad escapes.
3415 		 * No need to warn: we already did so when
3416 		 * roff_res() was called.
3417 		 */
3418 		sz = (int)(p - pp);
3419 		res = mandoc_realloc(res, ssz + sz + 1);
3420 		memcpy(res + ssz, pp, sz);
3421 		ssz += sz;
3422 	}
3423 
3424 	res[(int)ssz] = '\0';
3425 	return res;
3426 }
3427 
3428 int
3429 roff_getformat(const struct roff *r)
3430 {
3431 
3432 	return r->format;
3433 }
3434 
3435 /*
3436  * Find out whether a line is a macro line or not.
3437  * If it is, adjust the current position and return one; if it isn't,
3438  * return zero and don't change the current position.
3439  * If the control character has been set with `.cc', then let that grain
3440  * precedence.
3441  * This is slighly contrary to groff, where using the non-breaking
3442  * control character when `cc' has been invoked will cause the
3443  * non-breaking macro contents to be printed verbatim.
3444  */
3445 int
3446 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3447 {
3448 	int		pos;
3449 
3450 	pos = *ppos;
3451 
3452 	if (0 != r->control && cp[pos] == r->control)
3453 		pos++;
3454 	else if (0 != r->control)
3455 		return 0;
3456 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3457 		pos += 2;
3458 	else if ('.' == cp[pos] || '\'' == cp[pos])
3459 		pos++;
3460 	else
3461 		return 0;
3462 
3463 	while (' ' == cp[pos] || '\t' == cp[pos])
3464 		pos++;
3465 
3466 	*ppos = pos;
3467 	return 1;
3468 }
3469