1 /* $OpenBSD: roff.c,v 1.250 2021/08/10 12:36:42 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Implementation of the roff(7) parser for mandoc(1). 19 */ 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <limits.h> 25 #include <stddef.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 31 #include "mandoc_aux.h" 32 #include "mandoc_ohash.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mandoc_parse.h" 36 #include "libmandoc.h" 37 #include "roff_int.h" 38 #include "tbl_parse.h" 39 #include "eqn_parse.h" 40 41 /* 42 * ASCII_ESC is used to signal from roff_getarg() to roff_expand() 43 * that an escape sequence resulted from copy-in processing and 44 * needs to be checked or interpolated. As it is used nowhere 45 * else, it is defined here rather than in a header file. 46 */ 47 #define ASCII_ESC 27 48 49 /* Maximum number of string expansions per line, to break infinite loops. */ 50 #define EXPAND_LIMIT 1000 51 52 /* Types of definitions of macros and strings. */ 53 #define ROFFDEF_USER (1 << 1) /* User-defined. */ 54 #define ROFFDEF_PRE (1 << 2) /* Predefined. */ 55 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ 56 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ 57 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ 58 ROFFDEF_REN | ROFFDEF_STD) 59 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ 60 61 /* --- data types --------------------------------------------------------- */ 62 63 /* 64 * An incredibly-simple string buffer. 65 */ 66 struct roffstr { 67 char *p; /* nil-terminated buffer */ 68 size_t sz; /* saved strlen(p) */ 69 }; 70 71 /* 72 * A key-value roffstr pair as part of a singly-linked list. 73 */ 74 struct roffkv { 75 struct roffstr key; 76 struct roffstr val; 77 struct roffkv *next; /* next in list */ 78 }; 79 80 /* 81 * A single number register as part of a singly-linked list. 82 */ 83 struct roffreg { 84 struct roffstr key; 85 int val; 86 int step; 87 struct roffreg *next; 88 }; 89 90 /* 91 * Association of request and macro names with token IDs. 92 */ 93 struct roffreq { 94 enum roff_tok tok; 95 char name[]; 96 }; 97 98 /* 99 * A macro processing context. 100 * More than one is needed when macro calls are nested. 101 */ 102 struct mctx { 103 char **argv; 104 int argc; 105 int argsz; 106 }; 107 108 struct roff { 109 struct roff_man *man; /* mdoc or man parser */ 110 struct roffnode *last; /* leaf of stack */ 111 struct mctx *mstack; /* stack of macro contexts */ 112 int *rstack; /* stack of inverted `ie' values */ 113 struct ohash *reqtab; /* request lookup table */ 114 struct roffreg *regtab; /* number registers */ 115 struct roffkv *strtab; /* user-defined strings & macros */ 116 struct roffkv *rentab; /* renamed strings & macros */ 117 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 118 struct roffstr *xtab; /* single-byte trans table (`tr') */ 119 const char *current_string; /* value of last called user macro */ 120 struct tbl_node *first_tbl; /* first table parsed */ 121 struct tbl_node *last_tbl; /* last table parsed */ 122 struct tbl_node *tbl; /* current table being parsed */ 123 struct eqn_node *last_eqn; /* equation parser */ 124 struct eqn_node *eqn; /* active equation parser */ 125 int eqn_inline; /* current equation is inline */ 126 int options; /* parse options */ 127 int mstacksz; /* current size of mstack */ 128 int mstackpos; /* position in mstack */ 129 int rstacksz; /* current size limit of rstack */ 130 int rstackpos; /* position in rstack */ 131 int format; /* current file in mdoc or man format */ 132 char control; /* control character */ 133 char escape; /* escape character */ 134 }; 135 136 /* 137 * A macro definition, condition, or ignored block. 138 */ 139 struct roffnode { 140 enum roff_tok tok; /* type of node */ 141 struct roffnode *parent; /* up one in stack */ 142 int line; /* parse line */ 143 int col; /* parse col */ 144 char *name; /* node name, e.g. macro name */ 145 char *end; /* custom end macro of the block */ 146 int endspan; /* scope to: 1=eol 2=next line -1=\} */ 147 int rule; /* content is: 1=evaluated 0=skipped */ 148 }; 149 150 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 151 enum roff_tok tok, /* tok of macro */ \ 152 struct buf *buf, /* input buffer */ \ 153 int ln, /* parse line */ \ 154 int ppos, /* original pos in buffer */ \ 155 int pos, /* current pos in buffer */ \ 156 int *offs /* reset offset of buffer data */ 157 158 typedef int (*roffproc)(ROFF_ARGS); 159 160 struct roffmac { 161 roffproc proc; /* process new macro */ 162 roffproc text; /* process as child text of macro */ 163 roffproc sub; /* process as child of macro */ 164 int flags; 165 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 166 }; 167 168 struct predef { 169 const char *name; /* predefined input name */ 170 const char *str; /* replacement symbol */ 171 }; 172 173 #define PREDEF(__name, __str) \ 174 { (__name), (__str) }, 175 176 /* --- function prototypes ------------------------------------------------ */ 177 178 static int roffnode_cleanscope(struct roff *); 179 static int roffnode_pop(struct roff *); 180 static void roffnode_push(struct roff *, enum roff_tok, 181 const char *, int, int); 182 static void roff_addtbl(struct roff_man *, int, struct tbl_node *); 183 static int roff_als(ROFF_ARGS); 184 static int roff_block(ROFF_ARGS); 185 static int roff_block_text(ROFF_ARGS); 186 static int roff_block_sub(ROFF_ARGS); 187 static int roff_break(ROFF_ARGS); 188 static int roff_cblock(ROFF_ARGS); 189 static int roff_cc(ROFF_ARGS); 190 static int roff_ccond(struct roff *, int, int); 191 static int roff_char(ROFF_ARGS); 192 static int roff_cond(ROFF_ARGS); 193 static int roff_cond_checkend(ROFF_ARGS); 194 static int roff_cond_text(ROFF_ARGS); 195 static int roff_cond_sub(ROFF_ARGS); 196 static int roff_ds(ROFF_ARGS); 197 static int roff_ec(ROFF_ARGS); 198 static int roff_eo(ROFF_ARGS); 199 static int roff_eqndelim(struct roff *, struct buf *, int); 200 static int roff_evalcond(struct roff *, int, char *, int *); 201 static int roff_evalnum(struct roff *, int, 202 const char *, int *, int *, int); 203 static int roff_evalpar(struct roff *, int, 204 const char *, int *, int *, int); 205 static int roff_evalstrcond(const char *, int *); 206 static int roff_expand(struct roff *, struct buf *, 207 int, int, char); 208 static void roff_free1(struct roff *); 209 static void roff_freereg(struct roffreg *); 210 static void roff_freestr(struct roffkv *); 211 static size_t roff_getname(struct roff *, char **, int, int); 212 static int roff_getnum(const char *, int *, int *, int); 213 static int roff_getop(const char *, int *, char *); 214 static int roff_getregn(struct roff *, 215 const char *, size_t, char); 216 static int roff_getregro(const struct roff *, 217 const char *name); 218 static const char *roff_getstrn(struct roff *, 219 const char *, size_t, int *); 220 static int roff_hasregn(const struct roff *, 221 const char *, size_t); 222 static int roff_insec(ROFF_ARGS); 223 static int roff_it(ROFF_ARGS); 224 static int roff_line_ignore(ROFF_ARGS); 225 static void roff_man_alloc1(struct roff_man *); 226 static void roff_man_free1(struct roff_man *); 227 static int roff_manyarg(ROFF_ARGS); 228 static int roff_noarg(ROFF_ARGS); 229 static int roff_nop(ROFF_ARGS); 230 static int roff_nr(ROFF_ARGS); 231 static int roff_onearg(ROFF_ARGS); 232 static enum roff_tok roff_parse(struct roff *, char *, int *, 233 int, int); 234 static int roff_parsetext(struct roff *, struct buf *, 235 int, int *); 236 static int roff_renamed(ROFF_ARGS); 237 static int roff_return(ROFF_ARGS); 238 static int roff_rm(ROFF_ARGS); 239 static int roff_rn(ROFF_ARGS); 240 static int roff_rr(ROFF_ARGS); 241 static void roff_setregn(struct roff *, const char *, 242 size_t, int, char, int); 243 static void roff_setstr(struct roff *, 244 const char *, const char *, int); 245 static void roff_setstrn(struct roffkv **, const char *, 246 size_t, const char *, size_t, int); 247 static int roff_shift(ROFF_ARGS); 248 static int roff_so(ROFF_ARGS); 249 static int roff_tr(ROFF_ARGS); 250 static int roff_Dd(ROFF_ARGS); 251 static int roff_TE(ROFF_ARGS); 252 static int roff_TS(ROFF_ARGS); 253 static int roff_EQ(ROFF_ARGS); 254 static int roff_EN(ROFF_ARGS); 255 static int roff_T_(ROFF_ARGS); 256 static int roff_unsupp(ROFF_ARGS); 257 static int roff_userdef(ROFF_ARGS); 258 259 /* --- constant data ------------------------------------------------------ */ 260 261 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 262 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 263 264 const char *__roff_name[MAN_MAX + 1] = { 265 "br", "ce", "fi", "ft", 266 "ll", "mc", "nf", 267 "po", "rj", "sp", 268 "ta", "ti", NULL, 269 "ab", "ad", "af", "aln", 270 "als", "am", "am1", "ami", 271 "ami1", "as", "as1", "asciify", 272 "backtrace", "bd", "bleedat", "blm", 273 "box", "boxa", "bp", "BP", 274 "break", "breakchar", "brnl", "brp", 275 "brpnl", "c2", "cc", 276 "cf", "cflags", "ch", "char", 277 "chop", "class", "close", "CL", 278 "color", "composite", "continue", "cp", 279 "cropat", "cs", "cu", "da", 280 "dch", "Dd", "de", "de1", 281 "defcolor", "dei", "dei1", "device", 282 "devicem", "di", "do", "ds", 283 "ds1", "dwh", "dt", "ec", 284 "ecr", "ecs", "el", "em", 285 "EN", "eo", "EP", "EQ", 286 "errprint", "ev", "evc", "ex", 287 "fallback", "fam", "fc", "fchar", 288 "fcolor", "fdeferlig", "feature", "fkern", 289 "fl", "flig", "fp", "fps", 290 "fschar", "fspacewidth", "fspecial", "ftr", 291 "fzoom", "gcolor", "hc", "hcode", 292 "hidechar", "hla", "hlm", "hpf", 293 "hpfa", "hpfcode", "hw", "hy", 294 "hylang", "hylen", "hym", "hypp", 295 "hys", "ie", "if", "ig", 296 "index", "it", "itc", "IX", 297 "kern", "kernafter", "kernbefore", "kernpair", 298 "lc", "lc_ctype", "lds", "length", 299 "letadj", "lf", "lg", "lhang", 300 "linetabs", "lnr", "lnrf", "lpfx", 301 "ls", "lsm", "lt", 302 "mediasize", "minss", "mk", "mso", 303 "na", "ne", "nh", "nhychar", 304 "nm", "nn", "nop", "nr", 305 "nrf", "nroff", "ns", "nx", 306 "open", "opena", "os", "output", 307 "padj", "papersize", "pc", "pev", 308 "pi", "PI", "pl", "pm", 309 "pn", "pnr", "ps", 310 "psbb", "pshape", "pso", "ptr", 311 "pvs", "rchar", "rd", "recursionlimit", 312 "return", "rfschar", "rhang", 313 "rm", "rn", "rnn", "rr", 314 "rs", "rt", "schar", "sentchar", 315 "shc", "shift", "sizes", "so", 316 "spacewidth", "special", "spreadwarn", "ss", 317 "sty", "substring", "sv", "sy", 318 "T&", "tc", "TE", 319 "TH", "tkf", "tl", 320 "tm", "tm1", "tmc", "tr", 321 "track", "transchar", "trf", "trimat", 322 "trin", "trnt", "troff", "TS", 323 "uf", "ul", "unformat", "unwatch", 324 "unwatchn", "vpt", "vs", "warn", 325 "warnscale", "watch", "watchlength", "watchn", 326 "wh", "while", "write", "writec", 327 "writem", "xflag", ".", NULL, 328 NULL, "text", 329 "Dd", "Dt", "Os", "Sh", 330 "Ss", "Pp", "D1", "Dl", 331 "Bd", "Ed", "Bl", "El", 332 "It", "Ad", "An", "Ap", 333 "Ar", "Cd", "Cm", "Dv", 334 "Er", "Ev", "Ex", "Fa", 335 "Fd", "Fl", "Fn", "Ft", 336 "Ic", "In", "Li", "Nd", 337 "Nm", "Op", "Ot", "Pa", 338 "Rv", "St", "Va", "Vt", 339 "Xr", "%A", "%B", "%D", 340 "%I", "%J", "%N", "%O", 341 "%P", "%R", "%T", "%V", 342 "Ac", "Ao", "Aq", "At", 343 "Bc", "Bf", "Bo", "Bq", 344 "Bsx", "Bx", "Db", "Dc", 345 "Do", "Dq", "Ec", "Ef", 346 "Em", "Eo", "Fx", "Ms", 347 "No", "Ns", "Nx", "Ox", 348 "Pc", "Pf", "Po", "Pq", 349 "Qc", "Ql", "Qo", "Qq", 350 "Re", "Rs", "Sc", "So", 351 "Sq", "Sm", "Sx", "Sy", 352 "Tn", "Ux", "Xc", "Xo", 353 "Fo", "Fc", "Oo", "Oc", 354 "Bk", "Ek", "Bt", "Hf", 355 "Fr", "Ud", "Lb", "Lp", 356 "Lk", "Mt", "Brq", "Bro", 357 "Brc", "%C", "Es", "En", 358 "Dx", "%Q", "%U", "Ta", 359 "Tg", NULL, 360 "TH", "SH", "SS", "TP", 361 "TQ", 362 "LP", "PP", "P", "IP", 363 "HP", "SM", "SB", "BI", 364 "IB", "BR", "RB", "R", 365 "B", "I", "IR", "RI", 366 "RE", "RS", "DT", "UC", 367 "PD", "AT", "in", 368 "SY", "YS", "OP", 369 "EX", "EE", "UR", 370 "UE", "MT", "ME", NULL 371 }; 372 const char *const *roff_name = __roff_name; 373 374 static struct roffmac roffs[TOKEN_NONE] = { 375 { roff_noarg, NULL, NULL, 0 }, /* br */ 376 { roff_onearg, NULL, NULL, 0 }, /* ce */ 377 { roff_noarg, NULL, NULL, 0 }, /* fi */ 378 { roff_onearg, NULL, NULL, 0 }, /* ft */ 379 { roff_onearg, NULL, NULL, 0 }, /* ll */ 380 { roff_onearg, NULL, NULL, 0 }, /* mc */ 381 { roff_noarg, NULL, NULL, 0 }, /* nf */ 382 { roff_onearg, NULL, NULL, 0 }, /* po */ 383 { roff_onearg, NULL, NULL, 0 }, /* rj */ 384 { roff_onearg, NULL, NULL, 0 }, /* sp */ 385 { roff_manyarg, NULL, NULL, 0 }, /* ta */ 386 { roff_onearg, NULL, NULL, 0 }, /* ti */ 387 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ 388 { roff_unsupp, NULL, NULL, 0 }, /* ab */ 389 { roff_line_ignore, NULL, NULL, 0 }, /* ad */ 390 { roff_line_ignore, NULL, NULL, 0 }, /* af */ 391 { roff_unsupp, NULL, NULL, 0 }, /* aln */ 392 { roff_als, NULL, NULL, 0 }, /* als */ 393 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ 394 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ 395 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ 396 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ 397 { roff_ds, NULL, NULL, 0 }, /* as */ 398 { roff_ds, NULL, NULL, 0 }, /* as1 */ 399 { roff_unsupp, NULL, NULL, 0 }, /* asciify */ 400 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ 401 { roff_line_ignore, NULL, NULL, 0 }, /* bd */ 402 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ 403 { roff_unsupp, NULL, NULL, 0 }, /* blm */ 404 { roff_unsupp, NULL, NULL, 0 }, /* box */ 405 { roff_unsupp, NULL, NULL, 0 }, /* boxa */ 406 { roff_line_ignore, NULL, NULL, 0 }, /* bp */ 407 { roff_unsupp, NULL, NULL, 0 }, /* BP */ 408 { roff_break, NULL, NULL, 0 }, /* break */ 409 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ 410 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ 411 { roff_noarg, NULL, NULL, 0 }, /* brp */ 412 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ 413 { roff_unsupp, NULL, NULL, 0 }, /* c2 */ 414 { roff_cc, NULL, NULL, 0 }, /* cc */ 415 { roff_insec, NULL, NULL, 0 }, /* cf */ 416 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ 417 { roff_line_ignore, NULL, NULL, 0 }, /* ch */ 418 { roff_char, NULL, NULL, 0 }, /* char */ 419 { roff_unsupp, NULL, NULL, 0 }, /* chop */ 420 { roff_line_ignore, NULL, NULL, 0 }, /* class */ 421 { roff_insec, NULL, NULL, 0 }, /* close */ 422 { roff_unsupp, NULL, NULL, 0 }, /* CL */ 423 { roff_line_ignore, NULL, NULL, 0 }, /* color */ 424 { roff_unsupp, NULL, NULL, 0 }, /* composite */ 425 { roff_unsupp, NULL, NULL, 0 }, /* continue */ 426 { roff_line_ignore, NULL, NULL, 0 }, /* cp */ 427 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ 428 { roff_line_ignore, NULL, NULL, 0 }, /* cs */ 429 { roff_line_ignore, NULL, NULL, 0 }, /* cu */ 430 { roff_unsupp, NULL, NULL, 0 }, /* da */ 431 { roff_unsupp, NULL, NULL, 0 }, /* dch */ 432 { roff_Dd, NULL, NULL, 0 }, /* Dd */ 433 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ 434 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ 435 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ 436 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ 437 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ 438 { roff_unsupp, NULL, NULL, 0 }, /* device */ 439 { roff_unsupp, NULL, NULL, 0 }, /* devicem */ 440 { roff_unsupp, NULL, NULL, 0 }, /* di */ 441 { roff_unsupp, NULL, NULL, 0 }, /* do */ 442 { roff_ds, NULL, NULL, 0 }, /* ds */ 443 { roff_ds, NULL, NULL, 0 }, /* ds1 */ 444 { roff_unsupp, NULL, NULL, 0 }, /* dwh */ 445 { roff_unsupp, NULL, NULL, 0 }, /* dt */ 446 { roff_ec, NULL, NULL, 0 }, /* ec */ 447 { roff_unsupp, NULL, NULL, 0 }, /* ecr */ 448 { roff_unsupp, NULL, NULL, 0 }, /* ecs */ 449 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ 450 { roff_unsupp, NULL, NULL, 0 }, /* em */ 451 { roff_EN, NULL, NULL, 0 }, /* EN */ 452 { roff_eo, NULL, NULL, 0 }, /* eo */ 453 { roff_unsupp, NULL, NULL, 0 }, /* EP */ 454 { roff_EQ, NULL, NULL, 0 }, /* EQ */ 455 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ 456 { roff_unsupp, NULL, NULL, 0 }, /* ev */ 457 { roff_unsupp, NULL, NULL, 0 }, /* evc */ 458 { roff_unsupp, NULL, NULL, 0 }, /* ex */ 459 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ 460 { roff_line_ignore, NULL, NULL, 0 }, /* fam */ 461 { roff_unsupp, NULL, NULL, 0 }, /* fc */ 462 { roff_unsupp, NULL, NULL, 0 }, /* fchar */ 463 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ 464 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ 465 { roff_line_ignore, NULL, NULL, 0 }, /* feature */ 466 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ 467 { roff_line_ignore, NULL, NULL, 0 }, /* fl */ 468 { roff_line_ignore, NULL, NULL, 0 }, /* flig */ 469 { roff_line_ignore, NULL, NULL, 0 }, /* fp */ 470 { roff_line_ignore, NULL, NULL, 0 }, /* fps */ 471 { roff_unsupp, NULL, NULL, 0 }, /* fschar */ 472 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ 473 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ 474 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ 475 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ 476 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ 477 { roff_line_ignore, NULL, NULL, 0 }, /* hc */ 478 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ 479 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ 480 { roff_line_ignore, NULL, NULL, 0 }, /* hla */ 481 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ 482 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ 483 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ 484 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ 485 { roff_line_ignore, NULL, NULL, 0 }, /* hw */ 486 { roff_line_ignore, NULL, NULL, 0 }, /* hy */ 487 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ 488 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ 489 { roff_line_ignore, NULL, NULL, 0 }, /* hym */ 490 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ 491 { roff_line_ignore, NULL, NULL, 0 }, /* hys */ 492 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ 493 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ 494 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ 495 { roff_unsupp, NULL, NULL, 0 }, /* index */ 496 { roff_it, NULL, NULL, 0 }, /* it */ 497 { roff_unsupp, NULL, NULL, 0 }, /* itc */ 498 { roff_line_ignore, NULL, NULL, 0 }, /* IX */ 499 { roff_line_ignore, NULL, NULL, 0 }, /* kern */ 500 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ 501 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ 502 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ 503 { roff_unsupp, NULL, NULL, 0 }, /* lc */ 504 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ 505 { roff_unsupp, NULL, NULL, 0 }, /* lds */ 506 { roff_unsupp, NULL, NULL, 0 }, /* length */ 507 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ 508 { roff_insec, NULL, NULL, 0 }, /* lf */ 509 { roff_line_ignore, NULL, NULL, 0 }, /* lg */ 510 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ 511 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ 512 { roff_unsupp, NULL, NULL, 0 }, /* lnr */ 513 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ 514 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ 515 { roff_line_ignore, NULL, NULL, 0 }, /* ls */ 516 { roff_unsupp, NULL, NULL, 0 }, /* lsm */ 517 { roff_line_ignore, NULL, NULL, 0 }, /* lt */ 518 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ 519 { roff_line_ignore, NULL, NULL, 0 }, /* minss */ 520 { roff_line_ignore, NULL, NULL, 0 }, /* mk */ 521 { roff_insec, NULL, NULL, 0 }, /* mso */ 522 { roff_line_ignore, NULL, NULL, 0 }, /* na */ 523 { roff_line_ignore, NULL, NULL, 0 }, /* ne */ 524 { roff_line_ignore, NULL, NULL, 0 }, /* nh */ 525 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ 526 { roff_unsupp, NULL, NULL, 0 }, /* nm */ 527 { roff_unsupp, NULL, NULL, 0 }, /* nn */ 528 { roff_nop, NULL, NULL, 0 }, /* nop */ 529 { roff_nr, NULL, NULL, 0 }, /* nr */ 530 { roff_unsupp, NULL, NULL, 0 }, /* nrf */ 531 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ 532 { roff_line_ignore, NULL, NULL, 0 }, /* ns */ 533 { roff_insec, NULL, NULL, 0 }, /* nx */ 534 { roff_insec, NULL, NULL, 0 }, /* open */ 535 { roff_insec, NULL, NULL, 0 }, /* opena */ 536 { roff_line_ignore, NULL, NULL, 0 }, /* os */ 537 { roff_unsupp, NULL, NULL, 0 }, /* output */ 538 { roff_line_ignore, NULL, NULL, 0 }, /* padj */ 539 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ 540 { roff_line_ignore, NULL, NULL, 0 }, /* pc */ 541 { roff_line_ignore, NULL, NULL, 0 }, /* pev */ 542 { roff_insec, NULL, NULL, 0 }, /* pi */ 543 { roff_unsupp, NULL, NULL, 0 }, /* PI */ 544 { roff_line_ignore, NULL, NULL, 0 }, /* pl */ 545 { roff_line_ignore, NULL, NULL, 0 }, /* pm */ 546 { roff_line_ignore, NULL, NULL, 0 }, /* pn */ 547 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ 548 { roff_line_ignore, NULL, NULL, 0 }, /* ps */ 549 { roff_unsupp, NULL, NULL, 0 }, /* psbb */ 550 { roff_unsupp, NULL, NULL, 0 }, /* pshape */ 551 { roff_insec, NULL, NULL, 0 }, /* pso */ 552 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ 553 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ 554 { roff_unsupp, NULL, NULL, 0 }, /* rchar */ 555 { roff_line_ignore, NULL, NULL, 0 }, /* rd */ 556 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ 557 { roff_return, NULL, NULL, 0 }, /* return */ 558 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ 559 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ 560 { roff_rm, NULL, NULL, 0 }, /* rm */ 561 { roff_rn, NULL, NULL, 0 }, /* rn */ 562 { roff_unsupp, NULL, NULL, 0 }, /* rnn */ 563 { roff_rr, NULL, NULL, 0 }, /* rr */ 564 { roff_line_ignore, NULL, NULL, 0 }, /* rs */ 565 { roff_line_ignore, NULL, NULL, 0 }, /* rt */ 566 { roff_unsupp, NULL, NULL, 0 }, /* schar */ 567 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ 568 { roff_line_ignore, NULL, NULL, 0 }, /* shc */ 569 { roff_shift, NULL, NULL, 0 }, /* shift */ 570 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ 571 { roff_so, NULL, NULL, 0 }, /* so */ 572 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ 573 { roff_line_ignore, NULL, NULL, 0 }, /* special */ 574 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ 575 { roff_line_ignore, NULL, NULL, 0 }, /* ss */ 576 { roff_line_ignore, NULL, NULL, 0 }, /* sty */ 577 { roff_unsupp, NULL, NULL, 0 }, /* substring */ 578 { roff_line_ignore, NULL, NULL, 0 }, /* sv */ 579 { roff_insec, NULL, NULL, 0 }, /* sy */ 580 { roff_T_, NULL, NULL, 0 }, /* T& */ 581 { roff_unsupp, NULL, NULL, 0 }, /* tc */ 582 { roff_TE, NULL, NULL, 0 }, /* TE */ 583 { roff_Dd, NULL, NULL, 0 }, /* TH */ 584 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ 585 { roff_unsupp, NULL, NULL, 0 }, /* tl */ 586 { roff_line_ignore, NULL, NULL, 0 }, /* tm */ 587 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ 588 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ 589 { roff_tr, NULL, NULL, 0 }, /* tr */ 590 { roff_line_ignore, NULL, NULL, 0 }, /* track */ 591 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ 592 { roff_insec, NULL, NULL, 0 }, /* trf */ 593 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ 594 { roff_unsupp, NULL, NULL, 0 }, /* trin */ 595 { roff_unsupp, NULL, NULL, 0 }, /* trnt */ 596 { roff_line_ignore, NULL, NULL, 0 }, /* troff */ 597 { roff_TS, NULL, NULL, 0 }, /* TS */ 598 { roff_line_ignore, NULL, NULL, 0 }, /* uf */ 599 { roff_line_ignore, NULL, NULL, 0 }, /* ul */ 600 { roff_unsupp, NULL, NULL, 0 }, /* unformat */ 601 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ 602 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ 603 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ 604 { roff_line_ignore, NULL, NULL, 0 }, /* vs */ 605 { roff_line_ignore, NULL, NULL, 0 }, /* warn */ 606 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ 607 { roff_line_ignore, NULL, NULL, 0 }, /* watch */ 608 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ 609 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ 610 { roff_unsupp, NULL, NULL, 0 }, /* wh */ 611 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/ 612 { roff_insec, NULL, NULL, 0 }, /* write */ 613 { roff_insec, NULL, NULL, 0 }, /* writec */ 614 { roff_insec, NULL, NULL, 0 }, /* writem */ 615 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ 616 { roff_cblock, NULL, NULL, 0 }, /* . */ 617 { roff_renamed, NULL, NULL, 0 }, 618 { roff_userdef, NULL, NULL, 0 } 619 }; 620 621 /* Array of injected predefined strings. */ 622 #define PREDEFS_MAX 38 623 static const struct predef predefs[PREDEFS_MAX] = { 624 #include "predefs.in" 625 }; 626 627 static int roffce_lines; /* number of input lines to center */ 628 static struct roff_node *roffce_node; /* active request */ 629 static int roffit_lines; /* number of lines to delay */ 630 static char *roffit_macro; /* nil-terminated macro line */ 631 632 633 /* --- request table ------------------------------------------------------ */ 634 635 struct ohash * 636 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) 637 { 638 struct ohash *htab; 639 struct roffreq *req; 640 enum roff_tok tok; 641 size_t sz; 642 unsigned int slot; 643 644 htab = mandoc_malloc(sizeof(*htab)); 645 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); 646 647 for (tok = mintok; tok < maxtok; tok++) { 648 if (roff_name[tok] == NULL) 649 continue; 650 sz = strlen(roff_name[tok]); 651 req = mandoc_malloc(sizeof(*req) + sz + 1); 652 req->tok = tok; 653 memcpy(req->name, roff_name[tok], sz + 1); 654 slot = ohash_qlookup(htab, req->name); 655 ohash_insert(htab, slot, req); 656 } 657 return htab; 658 } 659 660 void 661 roffhash_free(struct ohash *htab) 662 { 663 struct roffreq *req; 664 unsigned int slot; 665 666 if (htab == NULL) 667 return; 668 for (req = ohash_first(htab, &slot); req != NULL; 669 req = ohash_next(htab, &slot)) 670 free(req); 671 ohash_delete(htab); 672 free(htab); 673 } 674 675 enum roff_tok 676 roffhash_find(struct ohash *htab, const char *name, size_t sz) 677 { 678 struct roffreq *req; 679 const char *end; 680 681 if (sz) { 682 end = name + sz; 683 req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); 684 } else 685 req = ohash_find(htab, ohash_qlookup(htab, name)); 686 return req == NULL ? TOKEN_NONE : req->tok; 687 } 688 689 /* --- stack of request blocks -------------------------------------------- */ 690 691 /* 692 * Pop the current node off of the stack of roff instructions currently 693 * pending. Return 1 if it is a loop or 0 otherwise. 694 */ 695 static int 696 roffnode_pop(struct roff *r) 697 { 698 struct roffnode *p; 699 int inloop; 700 701 p = r->last; 702 inloop = p->tok == ROFF_while; 703 r->last = p->parent; 704 free(p->name); 705 free(p->end); 706 free(p); 707 return inloop; 708 } 709 710 /* 711 * Push a roff node onto the instruction stack. This must later be 712 * removed with roffnode_pop(). 713 */ 714 static void 715 roffnode_push(struct roff *r, enum roff_tok tok, const char *name, 716 int line, int col) 717 { 718 struct roffnode *p; 719 720 p = mandoc_calloc(1, sizeof(struct roffnode)); 721 p->tok = tok; 722 if (name) 723 p->name = mandoc_strdup(name); 724 p->parent = r->last; 725 p->line = line; 726 p->col = col; 727 p->rule = p->parent ? p->parent->rule : 0; 728 729 r->last = p; 730 } 731 732 /* --- roff parser state data management ---------------------------------- */ 733 734 static void 735 roff_free1(struct roff *r) 736 { 737 int i; 738 739 tbl_free(r->first_tbl); 740 r->first_tbl = r->last_tbl = r->tbl = NULL; 741 742 eqn_free(r->last_eqn); 743 r->last_eqn = r->eqn = NULL; 744 745 while (r->mstackpos >= 0) 746 roff_userret(r); 747 748 while (r->last) 749 roffnode_pop(r); 750 751 free (r->rstack); 752 r->rstack = NULL; 753 r->rstacksz = 0; 754 r->rstackpos = -1; 755 756 roff_freereg(r->regtab); 757 r->regtab = NULL; 758 759 roff_freestr(r->strtab); 760 roff_freestr(r->rentab); 761 roff_freestr(r->xmbtab); 762 r->strtab = r->rentab = r->xmbtab = NULL; 763 764 if (r->xtab) 765 for (i = 0; i < 128; i++) 766 free(r->xtab[i].p); 767 free(r->xtab); 768 r->xtab = NULL; 769 } 770 771 void 772 roff_reset(struct roff *r) 773 { 774 roff_free1(r); 775 r->options |= MPARSE_COMMENT; 776 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 777 r->control = '\0'; 778 r->escape = '\\'; 779 roffce_lines = 0; 780 roffce_node = NULL; 781 roffit_lines = 0; 782 roffit_macro = NULL; 783 } 784 785 void 786 roff_free(struct roff *r) 787 { 788 int i; 789 790 roff_free1(r); 791 for (i = 0; i < r->mstacksz; i++) 792 free(r->mstack[i].argv); 793 free(r->mstack); 794 roffhash_free(r->reqtab); 795 free(r); 796 } 797 798 struct roff * 799 roff_alloc(int options) 800 { 801 struct roff *r; 802 803 r = mandoc_calloc(1, sizeof(struct roff)); 804 r->reqtab = roffhash_alloc(0, ROFF_RENAMED); 805 r->options = options | MPARSE_COMMENT; 806 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 807 r->mstackpos = -1; 808 r->rstackpos = -1; 809 r->escape = '\\'; 810 return r; 811 } 812 813 /* --- syntax tree state data management ---------------------------------- */ 814 815 static void 816 roff_man_free1(struct roff_man *man) 817 { 818 if (man->meta.first != NULL) 819 roff_node_delete(man, man->meta.first); 820 free(man->meta.msec); 821 free(man->meta.vol); 822 free(man->meta.os); 823 free(man->meta.arch); 824 free(man->meta.title); 825 free(man->meta.name); 826 free(man->meta.date); 827 free(man->meta.sodest); 828 } 829 830 void 831 roff_state_reset(struct roff_man *man) 832 { 833 man->last = man->meta.first; 834 man->last_es = NULL; 835 man->flags = 0; 836 man->lastsec = man->lastnamed = SEC_NONE; 837 man->next = ROFF_NEXT_CHILD; 838 roff_setreg(man->roff, "nS", 0, '='); 839 } 840 841 static void 842 roff_man_alloc1(struct roff_man *man) 843 { 844 memset(&man->meta, 0, sizeof(man->meta)); 845 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first)); 846 man->meta.first->type = ROFFT_ROOT; 847 man->meta.macroset = MACROSET_NONE; 848 roff_state_reset(man); 849 } 850 851 void 852 roff_man_reset(struct roff_man *man) 853 { 854 roff_man_free1(man); 855 roff_man_alloc1(man); 856 } 857 858 void 859 roff_man_free(struct roff_man *man) 860 { 861 roff_man_free1(man); 862 free(man); 863 } 864 865 struct roff_man * 866 roff_man_alloc(struct roff *roff, const char *os_s, int quick) 867 { 868 struct roff_man *man; 869 870 man = mandoc_calloc(1, sizeof(*man)); 871 man->roff = roff; 872 man->os_s = os_s; 873 man->quick = quick; 874 roff_man_alloc1(man); 875 roff->man = man; 876 return man; 877 } 878 879 /* --- syntax tree handling ----------------------------------------------- */ 880 881 struct roff_node * 882 roff_node_alloc(struct roff_man *man, int line, int pos, 883 enum roff_type type, int tok) 884 { 885 struct roff_node *n; 886 887 n = mandoc_calloc(1, sizeof(*n)); 888 n->line = line; 889 n->pos = pos; 890 n->tok = tok; 891 n->type = type; 892 n->sec = man->lastsec; 893 894 if (man->flags & MDOC_SYNOPSIS) 895 n->flags |= NODE_SYNPRETTY; 896 else 897 n->flags &= ~NODE_SYNPRETTY; 898 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL) 899 n->flags |= NODE_NOFILL; 900 else 901 n->flags &= ~NODE_NOFILL; 902 if (man->flags & MDOC_NEWLINE) 903 n->flags |= NODE_LINE; 904 man->flags &= ~MDOC_NEWLINE; 905 906 return n; 907 } 908 909 void 910 roff_node_append(struct roff_man *man, struct roff_node *n) 911 { 912 913 switch (man->next) { 914 case ROFF_NEXT_SIBLING: 915 if (man->last->next != NULL) { 916 n->next = man->last->next; 917 man->last->next->prev = n; 918 } else 919 man->last->parent->last = n; 920 man->last->next = n; 921 n->prev = man->last; 922 n->parent = man->last->parent; 923 break; 924 case ROFF_NEXT_CHILD: 925 if (man->last->child != NULL) { 926 n->next = man->last->child; 927 man->last->child->prev = n; 928 } else 929 man->last->last = n; 930 man->last->child = n; 931 n->parent = man->last; 932 break; 933 default: 934 abort(); 935 } 936 man->last = n; 937 938 switch (n->type) { 939 case ROFFT_HEAD: 940 n->parent->head = n; 941 break; 942 case ROFFT_BODY: 943 if (n->end != ENDBODY_NOT) 944 return; 945 n->parent->body = n; 946 break; 947 case ROFFT_TAIL: 948 n->parent->tail = n; 949 break; 950 default: 951 return; 952 } 953 954 /* 955 * Copy over the normalised-data pointer of our parent. Not 956 * everybody has one, but copying a null pointer is fine. 957 */ 958 959 n->norm = n->parent->norm; 960 assert(n->parent->type == ROFFT_BLOCK); 961 } 962 963 void 964 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 965 { 966 struct roff_node *n; 967 968 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 969 n->string = roff_strdup(man->roff, word); 970 roff_node_append(man, n); 971 n->flags |= NODE_VALID | NODE_ENDED; 972 man->next = ROFF_NEXT_SIBLING; 973 } 974 975 void 976 roff_word_append(struct roff_man *man, const char *word) 977 { 978 struct roff_node *n; 979 char *addstr, *newstr; 980 981 n = man->last; 982 addstr = roff_strdup(man->roff, word); 983 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 984 free(addstr); 985 free(n->string); 986 n->string = newstr; 987 man->next = ROFF_NEXT_SIBLING; 988 } 989 990 void 991 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 992 { 993 struct roff_node *n; 994 995 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 996 roff_node_append(man, n); 997 man->next = ROFF_NEXT_CHILD; 998 } 999 1000 struct roff_node * 1001 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 1002 { 1003 struct roff_node *n; 1004 1005 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 1006 roff_node_append(man, n); 1007 man->next = ROFF_NEXT_CHILD; 1008 return n; 1009 } 1010 1011 struct roff_node * 1012 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 1013 { 1014 struct roff_node *n; 1015 1016 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 1017 roff_node_append(man, n); 1018 man->next = ROFF_NEXT_CHILD; 1019 return n; 1020 } 1021 1022 struct roff_node * 1023 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 1024 { 1025 struct roff_node *n; 1026 1027 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 1028 roff_node_append(man, n); 1029 man->next = ROFF_NEXT_CHILD; 1030 return n; 1031 } 1032 1033 static void 1034 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl) 1035 { 1036 struct roff_node *n; 1037 struct tbl_span *span; 1038 1039 if (man->meta.macroset == MACROSET_MAN) 1040 man_breakscope(man, ROFF_TS); 1041 while ((span = tbl_span(tbl)) != NULL) { 1042 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE); 1043 n->span = span; 1044 roff_node_append(man, n); 1045 n->flags |= NODE_VALID | NODE_ENDED; 1046 man->next = ROFF_NEXT_SIBLING; 1047 } 1048 } 1049 1050 void 1051 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1052 { 1053 1054 /* Adjust siblings. */ 1055 1056 if (n->prev) 1057 n->prev->next = n->next; 1058 if (n->next) 1059 n->next->prev = n->prev; 1060 1061 /* Adjust parent. */ 1062 1063 if (n->parent != NULL) { 1064 if (n->parent->child == n) 1065 n->parent->child = n->next; 1066 if (n->parent->last == n) 1067 n->parent->last = n->prev; 1068 } 1069 1070 /* Adjust parse point. */ 1071 1072 if (man == NULL) 1073 return; 1074 if (man->last == n) { 1075 if (n->prev == NULL) { 1076 man->last = n->parent; 1077 man->next = ROFF_NEXT_CHILD; 1078 } else { 1079 man->last = n->prev; 1080 man->next = ROFF_NEXT_SIBLING; 1081 } 1082 } 1083 if (man->meta.first == n) 1084 man->meta.first = NULL; 1085 } 1086 1087 void 1088 roff_node_relink(struct roff_man *man, struct roff_node *n) 1089 { 1090 roff_node_unlink(man, n); 1091 n->prev = n->next = NULL; 1092 roff_node_append(man, n); 1093 } 1094 1095 void 1096 roff_node_free(struct roff_node *n) 1097 { 1098 1099 if (n->args != NULL) 1100 mdoc_argv_free(n->args); 1101 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1102 free(n->norm); 1103 eqn_box_free(n->eqn); 1104 free(n->string); 1105 free(n->tag); 1106 free(n); 1107 } 1108 1109 void 1110 roff_node_delete(struct roff_man *man, struct roff_node *n) 1111 { 1112 1113 while (n->child != NULL) 1114 roff_node_delete(man, n->child); 1115 roff_node_unlink(man, n); 1116 roff_node_free(n); 1117 } 1118 1119 int 1120 roff_node_transparent(struct roff_node *n) 1121 { 1122 if (n == NULL) 1123 return 0; 1124 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) 1125 return 1; 1126 return roff_tok_transparent(n->tok); 1127 } 1128 1129 int 1130 roff_tok_transparent(enum roff_tok tok) 1131 { 1132 switch (tok) { 1133 case ROFF_ft: 1134 case ROFF_ll: 1135 case ROFF_mc: 1136 case ROFF_po: 1137 case ROFF_ta: 1138 case MDOC_Db: 1139 case MDOC_Es: 1140 case MDOC_Sm: 1141 case MDOC_Tg: 1142 case MAN_DT: 1143 case MAN_UC: 1144 case MAN_PD: 1145 case MAN_AT: 1146 return 1; 1147 default: 1148 return 0; 1149 } 1150 } 1151 1152 struct roff_node * 1153 roff_node_child(struct roff_node *n) 1154 { 1155 for (n = n->child; roff_node_transparent(n); n = n->next) 1156 continue; 1157 return n; 1158 } 1159 1160 struct roff_node * 1161 roff_node_prev(struct roff_node *n) 1162 { 1163 do { 1164 n = n->prev; 1165 } while (roff_node_transparent(n)); 1166 return n; 1167 } 1168 1169 struct roff_node * 1170 roff_node_next(struct roff_node *n) 1171 { 1172 do { 1173 n = n->next; 1174 } while (roff_node_transparent(n)); 1175 return n; 1176 } 1177 1178 void 1179 deroff(char **dest, const struct roff_node *n) 1180 { 1181 char *cp; 1182 size_t sz; 1183 1184 if (n->string == NULL) { 1185 for (n = n->child; n != NULL; n = n->next) 1186 deroff(dest, n); 1187 return; 1188 } 1189 1190 /* Skip leading whitespace. */ 1191 1192 for (cp = n->string; *cp != '\0'; cp++) { 1193 if (cp[0] == '\\' && cp[1] != '\0' && 1194 strchr(" %&0^|~", cp[1]) != NULL) 1195 cp++; 1196 else if ( ! isspace((unsigned char)*cp)) 1197 break; 1198 } 1199 1200 /* Skip trailing backslash. */ 1201 1202 sz = strlen(cp); 1203 if (sz > 0 && cp[sz - 1] == '\\') 1204 sz--; 1205 1206 /* Skip trailing whitespace. */ 1207 1208 for (; sz; sz--) 1209 if ( ! isspace((unsigned char)cp[sz-1])) 1210 break; 1211 1212 /* Skip empty strings. */ 1213 1214 if (sz == 0) 1215 return; 1216 1217 if (*dest == NULL) { 1218 *dest = mandoc_strndup(cp, sz); 1219 return; 1220 } 1221 1222 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1223 free(*dest); 1224 *dest = cp; 1225 } 1226 1227 /* --- main functions of the roff parser ---------------------------------- */ 1228 1229 /* 1230 * In the current line, expand escape sequences that produce parsable 1231 * input text. Also check the syntax of the remaining escape sequences, 1232 * which typically produce output glyphs or change formatter state. 1233 */ 1234 static int 1235 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc) 1236 { 1237 struct mctx *ctx; /* current macro call context */ 1238 char ubuf[24]; /* buffer to print the number */ 1239 struct roff_node *n; /* used for header comments */ 1240 const char *start; /* start of the string to process */ 1241 char *stesc; /* start of an escape sequence ('\\') */ 1242 const char *esct; /* type of esccape sequence */ 1243 char *ep; /* end of comment string */ 1244 const char *stnam; /* start of the name, after "[(*" */ 1245 const char *cp; /* end of the name, e.g. before ']' */ 1246 const char *res; /* the string to be substituted */ 1247 char *nbuf; /* new buffer to copy buf->buf to */ 1248 size_t maxl; /* expected length of the escape name */ 1249 size_t naml; /* actual length of the escape name */ 1250 size_t asz; /* length of the replacement */ 1251 size_t rsz; /* length of the rest of the string */ 1252 int inaml; /* length returned from mandoc_escape() */ 1253 int expand_count; /* to avoid infinite loops */ 1254 int npos; /* position in numeric expression */ 1255 int arg_complete; /* argument not interrupted by eol */ 1256 int quote_args; /* true for \\$@, false for \\$* */ 1257 int done; /* no more input available */ 1258 int deftype; /* type of definition to paste */ 1259 int rcsid; /* kind of RCS id seen */ 1260 enum mandocerr err; /* for escape sequence problems */ 1261 char sign; /* increment number register */ 1262 char term; /* character terminating the escape */ 1263 1264 /* Search forward for comments. */ 1265 1266 done = 0; 1267 start = buf->buf + pos; 1268 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { 1269 if (stesc[0] != newesc || stesc[1] == '\0') 1270 continue; 1271 stesc++; 1272 if (*stesc != '"' && *stesc != '#') 1273 continue; 1274 1275 /* Comment found, look for RCS id. */ 1276 1277 rcsid = 0; 1278 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) { 1279 rcsid = 1 << MANDOC_OS_OPENBSD; 1280 cp += 8; 1281 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) { 1282 rcsid = 1 << MANDOC_OS_NETBSD; 1283 cp += 7; 1284 } 1285 if (cp != NULL && 1286 isalnum((unsigned char)*cp) == 0 && 1287 strchr(cp, '$') != NULL) { 1288 if (r->man->meta.rcsids & rcsid) 1289 mandoc_msg(MANDOCERR_RCS_REP, ln, 1290 (int)(stesc - buf->buf) + 1, 1291 "%s", stesc + 1); 1292 r->man->meta.rcsids |= rcsid; 1293 } 1294 1295 /* Handle trailing whitespace. */ 1296 1297 ep = strchr(stesc--, '\0') - 1; 1298 if (*ep == '\n') { 1299 done = 1; 1300 ep--; 1301 } 1302 if (*ep == ' ' || *ep == '\t') 1303 mandoc_msg(MANDOCERR_SPACE_EOL, 1304 ln, (int)(ep - buf->buf), NULL); 1305 1306 /* 1307 * Save comments preceding the title macro 1308 * in the syntax tree. 1309 */ 1310 1311 if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) { 1312 while (*ep == ' ' || *ep == '\t') 1313 ep--; 1314 ep[1] = '\0'; 1315 n = roff_node_alloc(r->man, 1316 ln, stesc + 1 - buf->buf, 1317 ROFFT_COMMENT, TOKEN_NONE); 1318 n->string = mandoc_strdup(stesc + 2); 1319 roff_node_append(r->man, n); 1320 n->flags |= NODE_VALID | NODE_ENDED; 1321 r->man->next = ROFF_NEXT_SIBLING; 1322 } 1323 1324 /* Line continuation with comment. */ 1325 1326 if (stesc[1] == '#') { 1327 *stesc = '\0'; 1328 return ROFF_IGN | ROFF_APPEND; 1329 } 1330 1331 /* Discard normal comments. */ 1332 1333 while (stesc > start && stesc[-1] == ' ' && 1334 (stesc == start + 1 || stesc[-2] != '\\')) 1335 stesc--; 1336 *stesc = '\0'; 1337 break; 1338 } 1339 if (stesc == start) 1340 return ROFF_CONT; 1341 stesc--; 1342 1343 /* Notice the end of the input. */ 1344 1345 if (*stesc == '\n') { 1346 *stesc-- = '\0'; 1347 done = 1; 1348 } 1349 1350 expand_count = 0; 1351 while (stesc >= start) { 1352 if (*stesc != newesc) { 1353 1354 /* 1355 * If we have a non-standard escape character, 1356 * escape literal backslashes because all 1357 * processing in subsequent functions uses 1358 * the standard escaping rules. 1359 */ 1360 1361 if (newesc != ASCII_ESC && *stesc == '\\') { 1362 *stesc = '\0'; 1363 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", 1364 buf->buf, stesc + 1) + 1; 1365 start = nbuf + pos; 1366 stesc = nbuf + (stesc - buf->buf); 1367 free(buf->buf); 1368 buf->buf = nbuf; 1369 } 1370 1371 /* Search backwards for the next escape. */ 1372 1373 stesc--; 1374 continue; 1375 } 1376 1377 /* If it is escaped, skip it. */ 1378 1379 for (cp = stesc - 1; cp >= start; cp--) 1380 if (*cp != r->escape) 1381 break; 1382 1383 if ((stesc - cp) % 2 == 0) { 1384 while (stesc > cp) 1385 *stesc-- = '\\'; 1386 continue; 1387 } else if (stesc[1] != '\0') { 1388 *stesc = '\\'; 1389 } else { 1390 *stesc-- = '\0'; 1391 if (done) 1392 continue; 1393 else 1394 return ROFF_IGN | ROFF_APPEND; 1395 } 1396 1397 /* Decide whether to expand or to check only. */ 1398 1399 term = '\0'; 1400 cp = stesc + 1; 1401 if (*cp == 'E') 1402 cp++; 1403 esct = cp; 1404 switch (*esct) { 1405 case '*': 1406 case '$': 1407 res = NULL; 1408 break; 1409 case 'B': 1410 case 'w': 1411 term = cp[1]; 1412 /* FALLTHROUGH */ 1413 case 'n': 1414 sign = cp[1]; 1415 if (sign == '+' || sign == '-') 1416 cp++; 1417 res = ubuf; 1418 break; 1419 default: 1420 err = MANDOCERR_OK; 1421 switch(mandoc_escape(&cp, &stnam, &inaml)) { 1422 case ESCAPE_SPECIAL: 1423 if (mchars_spec2cp(stnam, inaml) >= 0) 1424 break; 1425 /* FALLTHROUGH */ 1426 case ESCAPE_ERROR: 1427 err = MANDOCERR_ESC_BAD; 1428 break; 1429 case ESCAPE_UNDEF: 1430 err = MANDOCERR_ESC_UNDEF; 1431 break; 1432 case ESCAPE_UNSUPP: 1433 err = MANDOCERR_ESC_UNSUPP; 1434 break; 1435 default: 1436 break; 1437 } 1438 if (err != MANDOCERR_OK) 1439 mandoc_msg(err, ln, (int)(stesc - buf->buf), 1440 "%.*s", (int)(cp - stesc), stesc); 1441 stesc--; 1442 continue; 1443 } 1444 1445 if (EXPAND_LIMIT < ++expand_count) { 1446 mandoc_msg(MANDOCERR_ROFFLOOP, 1447 ln, (int)(stesc - buf->buf), NULL); 1448 return ROFF_IGN; 1449 } 1450 1451 /* 1452 * The third character decides the length 1453 * of the name of the string or register. 1454 * Save a pointer to the name. 1455 */ 1456 1457 if (term == '\0') { 1458 switch (*++cp) { 1459 case '\0': 1460 maxl = 0; 1461 break; 1462 case '(': 1463 cp++; 1464 maxl = 2; 1465 break; 1466 case '[': 1467 cp++; 1468 term = ']'; 1469 maxl = 0; 1470 break; 1471 default: 1472 maxl = 1; 1473 break; 1474 } 1475 } else { 1476 cp += 2; 1477 maxl = 0; 1478 } 1479 stnam = cp; 1480 1481 /* Advance to the end of the name. */ 1482 1483 naml = 0; 1484 arg_complete = 1; 1485 while (maxl == 0 || naml < maxl) { 1486 if (*cp == '\0') { 1487 mandoc_msg(MANDOCERR_ESC_BAD, ln, 1488 (int)(stesc - buf->buf), "%s", stesc); 1489 arg_complete = 0; 1490 break; 1491 } 1492 if (maxl == 0 && *cp == term) { 1493 cp++; 1494 break; 1495 } 1496 if (*cp++ != '\\' || *esct != 'w') { 1497 naml++; 1498 continue; 1499 } 1500 switch (mandoc_escape(&cp, NULL, NULL)) { 1501 case ESCAPE_SPECIAL: 1502 case ESCAPE_UNICODE: 1503 case ESCAPE_NUMBERED: 1504 case ESCAPE_UNDEF: 1505 case ESCAPE_OVERSTRIKE: 1506 naml++; 1507 break; 1508 default: 1509 break; 1510 } 1511 } 1512 1513 /* 1514 * Retrieve the replacement string; if it is 1515 * undefined, resume searching for escapes. 1516 */ 1517 1518 switch (*esct) { 1519 case '*': 1520 if (arg_complete) { 1521 deftype = ROFFDEF_USER | ROFFDEF_PRE; 1522 res = roff_getstrn(r, stnam, naml, &deftype); 1523 1524 /* 1525 * If not overriden, let \*(.T 1526 * through to the formatters. 1527 */ 1528 1529 if (res == NULL && naml == 2 && 1530 stnam[0] == '.' && stnam[1] == 'T') { 1531 roff_setstrn(&r->strtab, 1532 ".T", 2, NULL, 0, 0); 1533 stesc--; 1534 continue; 1535 } 1536 } 1537 break; 1538 case '$': 1539 if (r->mstackpos < 0) { 1540 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, 1541 (int)(stesc - buf->buf), "%.3s", stesc); 1542 break; 1543 } 1544 ctx = r->mstack + r->mstackpos; 1545 npos = esct[1] - '1'; 1546 if (npos >= 0 && npos <= 8) { 1547 res = npos < ctx->argc ? 1548 ctx->argv[npos] : ""; 1549 break; 1550 } 1551 if (esct[1] == '*') 1552 quote_args = 0; 1553 else if (esct[1] == '@') 1554 quote_args = 1; 1555 else { 1556 mandoc_msg(MANDOCERR_ARG_NONUM, ln, 1557 (int)(stesc - buf->buf), "%.3s", stesc); 1558 break; 1559 } 1560 asz = 0; 1561 for (npos = 0; npos < ctx->argc; npos++) { 1562 if (npos) 1563 asz++; /* blank */ 1564 if (quote_args) 1565 asz += 2; /* quotes */ 1566 asz += strlen(ctx->argv[npos]); 1567 } 1568 if (asz != 3) { 1569 rsz = buf->sz - (stesc - buf->buf) - 3; 1570 if (asz < 3) 1571 memmove(stesc + asz, stesc + 3, rsz); 1572 buf->sz += asz - 3; 1573 nbuf = mandoc_realloc(buf->buf, buf->sz); 1574 start = nbuf + pos; 1575 stesc = nbuf + (stesc - buf->buf); 1576 buf->buf = nbuf; 1577 if (asz > 3) 1578 memmove(stesc + asz, stesc + 3, rsz); 1579 } 1580 for (npos = 0; npos < ctx->argc; npos++) { 1581 if (npos) 1582 *stesc++ = ' '; 1583 if (quote_args) 1584 *stesc++ = '"'; 1585 cp = ctx->argv[npos]; 1586 while (*cp != '\0') 1587 *stesc++ = *cp++; 1588 if (quote_args) 1589 *stesc++ = '"'; 1590 } 1591 continue; 1592 case 'B': 1593 npos = 0; 1594 ubuf[0] = arg_complete && 1595 roff_evalnum(r, ln, stnam, &npos, 1596 NULL, ROFFNUM_SCALE) && 1597 stnam + npos + 1 == cp ? '1' : '0'; 1598 ubuf[1] = '\0'; 1599 break; 1600 case 'n': 1601 if (arg_complete) 1602 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1603 roff_getregn(r, stnam, naml, sign)); 1604 else 1605 ubuf[0] = '\0'; 1606 break; 1607 case 'w': 1608 /* use even incomplete args */ 1609 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1610 24 * (int)naml); 1611 break; 1612 } 1613 1614 if (res == NULL) { 1615 if (*esct == '*') 1616 mandoc_msg(MANDOCERR_STR_UNDEF, 1617 ln, (int)(stesc - buf->buf), 1618 "%.*s", (int)naml, stnam); 1619 res = ""; 1620 } else if (buf->sz + strlen(res) > SHRT_MAX) { 1621 mandoc_msg(MANDOCERR_ROFFLOOP, 1622 ln, (int)(stesc - buf->buf), NULL); 1623 return ROFF_IGN; 1624 } 1625 1626 /* Replace the escape sequence by the string. */ 1627 1628 *stesc = '\0'; 1629 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", 1630 buf->buf, res, cp) + 1; 1631 1632 /* Prepare for the next replacement. */ 1633 1634 start = nbuf + pos; 1635 stesc = nbuf + (stesc - buf->buf) + strlen(res); 1636 free(buf->buf); 1637 buf->buf = nbuf; 1638 } 1639 return ROFF_CONT; 1640 } 1641 1642 /* 1643 * Parse a quoted or unquoted roff-style request or macro argument. 1644 * Return a pointer to the parsed argument, which is either the original 1645 * pointer or advanced by one byte in case the argument is quoted. 1646 * NUL-terminate the argument in place. 1647 * Collapse pairs of quotes inside quoted arguments. 1648 * Advance the argument pointer to the next argument, 1649 * or to the NUL byte terminating the argument line. 1650 */ 1651 char * 1652 roff_getarg(struct roff *r, char **cpp, int ln, int *pos) 1653 { 1654 struct buf buf; 1655 char *cp, *start; 1656 int newesc, pairs, quoted, white; 1657 1658 /* Quoting can only start with a new word. */ 1659 start = *cpp; 1660 quoted = 0; 1661 if ('"' == *start) { 1662 quoted = 1; 1663 start++; 1664 } 1665 1666 newesc = pairs = white = 0; 1667 for (cp = start; '\0' != *cp; cp++) { 1668 1669 /* 1670 * Move the following text left 1671 * after quoted quotes and after "\\" and "\t". 1672 */ 1673 if (pairs) 1674 cp[-pairs] = cp[0]; 1675 1676 if ('\\' == cp[0]) { 1677 /* 1678 * In copy mode, translate double to single 1679 * backslashes and backslash-t to literal tabs. 1680 */ 1681 switch (cp[1]) { 1682 case 'a': 1683 case 't': 1684 cp[-pairs] = '\t'; 1685 pairs++; 1686 cp++; 1687 break; 1688 case '\\': 1689 newesc = 1; 1690 cp[-pairs] = ASCII_ESC; 1691 pairs++; 1692 cp++; 1693 break; 1694 case ' ': 1695 /* Skip escaped blanks. */ 1696 if (0 == quoted) 1697 cp++; 1698 break; 1699 default: 1700 break; 1701 } 1702 } else if (0 == quoted) { 1703 if (' ' == cp[0]) { 1704 /* Unescaped blanks end unquoted args. */ 1705 white = 1; 1706 break; 1707 } 1708 } else if ('"' == cp[0]) { 1709 if ('"' == cp[1]) { 1710 /* Quoted quotes collapse. */ 1711 pairs++; 1712 cp++; 1713 } else { 1714 /* Unquoted quotes end quoted args. */ 1715 quoted = 2; 1716 break; 1717 } 1718 } 1719 } 1720 1721 /* Quoted argument without a closing quote. */ 1722 if (1 == quoted) 1723 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL); 1724 1725 /* NUL-terminate this argument and move to the next one. */ 1726 if (pairs) 1727 cp[-pairs] = '\0'; 1728 if ('\0' != *cp) { 1729 *cp++ = '\0'; 1730 while (' ' == *cp) 1731 cp++; 1732 } 1733 *pos += (int)(cp - start) + (quoted ? 1 : 0); 1734 *cpp = cp; 1735 1736 if ('\0' == *cp && (white || ' ' == cp[-1])) 1737 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); 1738 1739 start = mandoc_strdup(start); 1740 if (newesc == 0) 1741 return start; 1742 1743 buf.buf = start; 1744 buf.sz = strlen(start) + 1; 1745 buf.next = NULL; 1746 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { 1747 free(buf.buf); 1748 buf.buf = mandoc_strdup(""); 1749 } 1750 return buf.buf; 1751 } 1752 1753 1754 /* 1755 * Process text streams. 1756 */ 1757 static int 1758 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) 1759 { 1760 size_t sz; 1761 const char *start; 1762 char *p; 1763 int isz; 1764 enum mandoc_esc esc; 1765 1766 /* Spring the input line trap. */ 1767 1768 if (roffit_lines == 1) { 1769 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1770 free(buf->buf); 1771 buf->buf = p; 1772 buf->sz = isz + 1; 1773 *offs = 0; 1774 free(roffit_macro); 1775 roffit_lines = 0; 1776 return ROFF_REPARSE; 1777 } else if (roffit_lines > 1) 1778 --roffit_lines; 1779 1780 if (roffce_node != NULL && buf->buf[pos] != '\0') { 1781 if (roffce_lines < 1) { 1782 r->man->last = roffce_node; 1783 r->man->next = ROFF_NEXT_SIBLING; 1784 roffce_lines = 0; 1785 roffce_node = NULL; 1786 } else 1787 roffce_lines--; 1788 } 1789 1790 /* Convert all breakable hyphens into ASCII_HYPH. */ 1791 1792 start = p = buf->buf + pos; 1793 1794 while (*p != '\0') { 1795 sz = strcspn(p, "-\\"); 1796 p += sz; 1797 1798 if (*p == '\0') 1799 break; 1800 1801 if (*p == '\\') { 1802 /* Skip over escapes. */ 1803 p++; 1804 esc = mandoc_escape((const char **)&p, NULL, NULL); 1805 if (esc == ESCAPE_ERROR) 1806 break; 1807 while (*p == '-') 1808 p++; 1809 continue; 1810 } else if (p == start) { 1811 p++; 1812 continue; 1813 } 1814 1815 if (isalpha((unsigned char)p[-1]) && 1816 isalpha((unsigned char)p[1])) 1817 *p = ASCII_HYPH; 1818 p++; 1819 } 1820 return ROFF_CONT; 1821 } 1822 1823 int 1824 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len) 1825 { 1826 enum roff_tok t; 1827 int e; 1828 int pos; /* parse point */ 1829 int spos; /* saved parse point for messages */ 1830 int ppos; /* original offset in buf->buf */ 1831 int ctl; /* macro line (boolean) */ 1832 1833 ppos = pos = *offs; 1834 1835 if (len > 80 && r->tbl == NULL && r->eqn == NULL && 1836 (r->man->flags & ROFF_NOFILL) == 0 && 1837 strchr(" .\\", buf->buf[pos]) == NULL && 1838 buf->buf[pos] != r->control && 1839 strcspn(buf->buf, " ") < 80) 1840 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1, 1841 "%.20s...", buf->buf + pos); 1842 1843 /* Handle in-line equation delimiters. */ 1844 1845 if (r->tbl == NULL && 1846 r->last_eqn != NULL && r->last_eqn->delim && 1847 (r->eqn == NULL || r->eqn_inline)) { 1848 e = roff_eqndelim(r, buf, pos); 1849 if (e == ROFF_REPARSE) 1850 return e; 1851 assert(e == ROFF_CONT); 1852 } 1853 1854 /* Expand some escape sequences. */ 1855 1856 e = roff_expand(r, buf, ln, pos, r->escape); 1857 if ((e & ROFF_MASK) == ROFF_IGN) 1858 return e; 1859 assert(e == ROFF_CONT); 1860 1861 ctl = roff_getcontrol(r, buf->buf, &pos); 1862 1863 /* 1864 * First, if a scope is open and we're not a macro, pass the 1865 * text through the macro's filter. 1866 * Equations process all content themselves. 1867 * Tables process almost all content themselves, but we want 1868 * to warn about macros before passing it there. 1869 */ 1870 1871 if (r->last != NULL && ! ctl) { 1872 t = r->last->tok; 1873 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1874 if ((e & ROFF_MASK) == ROFF_IGN) 1875 return e; 1876 e &= ~ROFF_MASK; 1877 } else 1878 e = ROFF_IGN; 1879 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { 1880 eqn_read(r->eqn, buf->buf + ppos); 1881 return e; 1882 } 1883 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { 1884 tbl_read(r->tbl, ln, buf->buf, ppos); 1885 roff_addtbl(r->man, ln, r->tbl); 1886 return e; 1887 } 1888 if ( ! ctl) { 1889 r->options &= ~MPARSE_COMMENT; 1890 return roff_parsetext(r, buf, pos, offs) | e; 1891 } 1892 1893 /* Skip empty request lines. */ 1894 1895 if (buf->buf[pos] == '"') { 1896 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL); 1897 return ROFF_IGN; 1898 } else if (buf->buf[pos] == '\0') 1899 return ROFF_IGN; 1900 1901 /* 1902 * If a scope is open, go to the child handler for that macro, 1903 * as it may want to preprocess before doing anything with it. 1904 * Don't do so if an equation is open. 1905 */ 1906 1907 if (r->last) { 1908 t = r->last->tok; 1909 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1910 } 1911 1912 /* No scope is open. This is a new request or macro. */ 1913 1914 r->options &= ~MPARSE_COMMENT; 1915 spos = pos; 1916 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1917 1918 /* Tables ignore most macros. */ 1919 1920 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS || 1921 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) { 1922 mandoc_msg(MANDOCERR_TBLMACRO, 1923 ln, pos, "%s", buf->buf + spos); 1924 if (t != TOKEN_NONE) 1925 return ROFF_IGN; 1926 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1927 pos++; 1928 while (buf->buf[pos] == ' ') 1929 pos++; 1930 tbl_read(r->tbl, ln, buf->buf, pos); 1931 roff_addtbl(r->man, ln, r->tbl); 1932 return ROFF_IGN; 1933 } 1934 1935 /* For now, let high level macros abort .ce mode. */ 1936 1937 if (ctl && roffce_node != NULL && 1938 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 1939 t == ROFF_TH || t == ROFF_TS)) { 1940 r->man->last = roffce_node; 1941 r->man->next = ROFF_NEXT_SIBLING; 1942 roffce_lines = 0; 1943 roffce_node = NULL; 1944 } 1945 1946 /* 1947 * This is neither a roff request nor a user-defined macro. 1948 * Let the standard macro set parsers handle it. 1949 */ 1950 1951 if (t == TOKEN_NONE) 1952 return ROFF_CONT; 1953 1954 /* Execute a roff request or a user defined macro. */ 1955 1956 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); 1957 } 1958 1959 /* 1960 * Internal interface function to tell the roff parser that execution 1961 * of the current macro ended. This is required because macro 1962 * definitions usually do not end with a .return request. 1963 */ 1964 void 1965 roff_userret(struct roff *r) 1966 { 1967 struct mctx *ctx; 1968 int i; 1969 1970 assert(r->mstackpos >= 0); 1971 ctx = r->mstack + r->mstackpos; 1972 for (i = 0; i < ctx->argc; i++) 1973 free(ctx->argv[i]); 1974 ctx->argc = 0; 1975 r->mstackpos--; 1976 } 1977 1978 void 1979 roff_endparse(struct roff *r) 1980 { 1981 if (r->last != NULL) 1982 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line, 1983 r->last->col, "%s", roff_name[r->last->tok]); 1984 1985 if (r->eqn != NULL) { 1986 mandoc_msg(MANDOCERR_BLK_NOEND, 1987 r->eqn->node->line, r->eqn->node->pos, "EQ"); 1988 eqn_parse(r->eqn); 1989 r->eqn = NULL; 1990 } 1991 1992 if (r->tbl != NULL) { 1993 tbl_end(r->tbl, 1); 1994 r->tbl = NULL; 1995 } 1996 } 1997 1998 /* 1999 * Parse a roff node's type from the input buffer. This must be in the 2000 * form of ".foo xxx" in the usual way. 2001 */ 2002 static enum roff_tok 2003 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 2004 { 2005 char *cp; 2006 const char *mac; 2007 size_t maclen; 2008 int deftype; 2009 enum roff_tok t; 2010 2011 cp = buf + *pos; 2012 2013 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 2014 return TOKEN_NONE; 2015 2016 mac = cp; 2017 maclen = roff_getname(r, &cp, ln, ppos); 2018 2019 deftype = ROFFDEF_USER | ROFFDEF_REN; 2020 r->current_string = roff_getstrn(r, mac, maclen, &deftype); 2021 switch (deftype) { 2022 case ROFFDEF_USER: 2023 t = ROFF_USERDEF; 2024 break; 2025 case ROFFDEF_REN: 2026 t = ROFF_RENAMED; 2027 break; 2028 default: 2029 t = roffhash_find(r->reqtab, mac, maclen); 2030 break; 2031 } 2032 if (t != TOKEN_NONE) 2033 *pos = cp - buf; 2034 else if (deftype == ROFFDEF_UNDEF) { 2035 /* Using an undefined macro defines it to be empty. */ 2036 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); 2037 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); 2038 } 2039 return t; 2040 } 2041 2042 /* --- handling of request blocks ----------------------------------------- */ 2043 2044 /* 2045 * Close a macro definition block or an "ignore" block. 2046 */ 2047 static int 2048 roff_cblock(ROFF_ARGS) 2049 { 2050 int rr; 2051 2052 if (r->last == NULL) { 2053 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2054 return ROFF_IGN; 2055 } 2056 2057 switch (r->last->tok) { 2058 case ROFF_am: 2059 case ROFF_ami: 2060 case ROFF_de: 2061 case ROFF_dei: 2062 case ROFF_ig: 2063 break; 2064 case ROFF_am1: 2065 case ROFF_de1: 2066 /* Remapped in roff_block(). */ 2067 abort(); 2068 default: 2069 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2070 return ROFF_IGN; 2071 } 2072 2073 roffnode_pop(r); 2074 roffnode_cleanscope(r); 2075 2076 /* 2077 * If a conditional block with braces is still open, 2078 * check for "\}" block end markers. 2079 */ 2080 2081 if (r->last != NULL && r->last->endspan < 0) { 2082 rr = 1; /* If arguments follow "\}", warn about them. */ 2083 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2084 } 2085 2086 if (buf->buf[pos] != '\0') 2087 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 2088 ".. %s", buf->buf + pos); 2089 2090 return ROFF_IGN; 2091 } 2092 2093 /* 2094 * Pop all nodes ending at the end of the current input line. 2095 * Return the number of loops ended. 2096 */ 2097 static int 2098 roffnode_cleanscope(struct roff *r) 2099 { 2100 int inloop; 2101 2102 inloop = 0; 2103 while (r->last != NULL && r->last->endspan > 0) { 2104 if (--r->last->endspan != 0) 2105 break; 2106 inloop += roffnode_pop(r); 2107 } 2108 return inloop; 2109 } 2110 2111 /* 2112 * Handle the closing "\}" of a conditional block. 2113 * Apart from generating warnings, this only pops nodes. 2114 * Return the number of loops ended. 2115 */ 2116 static int 2117 roff_ccond(struct roff *r, int ln, int ppos) 2118 { 2119 if (NULL == r->last) { 2120 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2121 return 0; 2122 } 2123 2124 switch (r->last->tok) { 2125 case ROFF_el: 2126 case ROFF_ie: 2127 case ROFF_if: 2128 case ROFF_while: 2129 break; 2130 default: 2131 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2132 return 0; 2133 } 2134 2135 if (r->last->endspan > -1) { 2136 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2137 return 0; 2138 } 2139 2140 return roffnode_pop(r) + roffnode_cleanscope(r); 2141 } 2142 2143 static int 2144 roff_block(ROFF_ARGS) 2145 { 2146 const char *name, *value; 2147 char *call, *cp, *iname, *rname; 2148 size_t csz, namesz, rsz; 2149 int deftype; 2150 2151 /* Ignore groff compatibility mode for now. */ 2152 2153 if (tok == ROFF_de1) 2154 tok = ROFF_de; 2155 else if (tok == ROFF_dei1) 2156 tok = ROFF_dei; 2157 else if (tok == ROFF_am1) 2158 tok = ROFF_am; 2159 else if (tok == ROFF_ami1) 2160 tok = ROFF_ami; 2161 2162 /* Parse the macro name argument. */ 2163 2164 cp = buf->buf + pos; 2165 if (tok == ROFF_ig) { 2166 iname = NULL; 2167 namesz = 0; 2168 } else { 2169 iname = cp; 2170 namesz = roff_getname(r, &cp, ln, ppos); 2171 iname[namesz] = '\0'; 2172 } 2173 2174 /* Resolve the macro name argument if it is indirect. */ 2175 2176 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2177 deftype = ROFFDEF_USER; 2178 name = roff_getstrn(r, iname, namesz, &deftype); 2179 if (name == NULL) { 2180 mandoc_msg(MANDOCERR_STR_UNDEF, 2181 ln, (int)(iname - buf->buf), 2182 "%.*s", (int)namesz, iname); 2183 namesz = 0; 2184 } else 2185 namesz = strlen(name); 2186 } else 2187 name = iname; 2188 2189 if (namesz == 0 && tok != ROFF_ig) { 2190 mandoc_msg(MANDOCERR_REQ_EMPTY, 2191 ln, ppos, "%s", roff_name[tok]); 2192 return ROFF_IGN; 2193 } 2194 2195 roffnode_push(r, tok, name, ln, ppos); 2196 2197 /* 2198 * At the beginning of a `de' macro, clear the existing string 2199 * with the same name, if there is one. New content will be 2200 * appended from roff_block_text() in multiline mode. 2201 */ 2202 2203 if (tok == ROFF_de || tok == ROFF_dei) { 2204 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 2205 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2206 } else if (tok == ROFF_am || tok == ROFF_ami) { 2207 deftype = ROFFDEF_ANY; 2208 value = roff_getstrn(r, iname, namesz, &deftype); 2209 switch (deftype) { /* Before appending, ... */ 2210 case ROFFDEF_PRE: /* copy predefined to user-defined. */ 2211 roff_setstrn(&r->strtab, name, namesz, 2212 value, strlen(value), 0); 2213 break; 2214 case ROFFDEF_REN: /* call original standard macro. */ 2215 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2216 (int)strlen(value), value); 2217 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2218 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2219 free(call); 2220 break; 2221 case ROFFDEF_STD: /* rename and call standard macro. */ 2222 rsz = mandoc_asprintf(&rname, "__%s_renamed", name); 2223 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); 2224 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2225 (int)rsz, rname); 2226 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2227 free(call); 2228 free(rname); 2229 break; 2230 default: 2231 break; 2232 } 2233 } 2234 2235 if (*cp == '\0') 2236 return ROFF_IGN; 2237 2238 /* Get the custom end marker. */ 2239 2240 iname = cp; 2241 namesz = roff_getname(r, &cp, ln, ppos); 2242 2243 /* Resolve the end marker if it is indirect. */ 2244 2245 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2246 deftype = ROFFDEF_USER; 2247 name = roff_getstrn(r, iname, namesz, &deftype); 2248 if (name == NULL) { 2249 mandoc_msg(MANDOCERR_STR_UNDEF, 2250 ln, (int)(iname - buf->buf), 2251 "%.*s", (int)namesz, iname); 2252 namesz = 0; 2253 } else 2254 namesz = strlen(name); 2255 } else 2256 name = iname; 2257 2258 if (namesz) 2259 r->last->end = mandoc_strndup(name, namesz); 2260 2261 if (*cp != '\0') 2262 mandoc_msg(MANDOCERR_ARG_EXCESS, 2263 ln, pos, ".%s ... %s", roff_name[tok], cp); 2264 2265 return ROFF_IGN; 2266 } 2267 2268 static int 2269 roff_block_sub(ROFF_ARGS) 2270 { 2271 enum roff_tok t; 2272 int i, j; 2273 2274 /* 2275 * First check whether a custom macro exists at this level. If 2276 * it does, then check against it. This is some of groff's 2277 * stranger behaviours. If we encountered a custom end-scope 2278 * tag and that tag also happens to be a "real" macro, then we 2279 * need to try interpreting it again as a real macro. If it's 2280 * not, then return ignore. Else continue. 2281 */ 2282 2283 if (r->last->end) { 2284 for (i = pos, j = 0; r->last->end[j]; j++, i++) 2285 if (buf->buf[i] != r->last->end[j]) 2286 break; 2287 2288 if (r->last->end[j] == '\0' && 2289 (buf->buf[i] == '\0' || 2290 buf->buf[i] == ' ' || 2291 buf->buf[i] == '\t')) { 2292 roffnode_pop(r); 2293 roffnode_cleanscope(r); 2294 2295 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 2296 i++; 2297 2298 pos = i; 2299 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 2300 TOKEN_NONE) 2301 return ROFF_RERUN; 2302 return ROFF_IGN; 2303 } 2304 } 2305 2306 /* 2307 * If we have no custom end-query or lookup failed, then try 2308 * pulling it out of the hashtable. 2309 */ 2310 2311 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2312 2313 if (t != ROFF_cblock) { 2314 if (tok != ROFF_ig) 2315 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 2316 return ROFF_IGN; 2317 } 2318 2319 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2320 } 2321 2322 static int 2323 roff_block_text(ROFF_ARGS) 2324 { 2325 2326 if (tok != ROFF_ig) 2327 roff_setstr(r, r->last->name, buf->buf + pos, 2); 2328 2329 return ROFF_IGN; 2330 } 2331 2332 /* 2333 * Check for a closing "\}" and handle it. 2334 * In this function, the final "int *offs" argument is used for 2335 * different purposes than elsewhere: 2336 * Input: *offs == 0: caller wants to discard arguments following \} 2337 * *offs == 1: caller wants to preserve text following \} 2338 * Output: *offs = 0: tell caller to discard input line 2339 * *offs = 1: tell caller to use input line 2340 */ 2341 static int 2342 roff_cond_checkend(ROFF_ARGS) 2343 { 2344 char *ep; 2345 int endloop, irc, rr; 2346 2347 irc = ROFF_IGN; 2348 rr = r->last->rule; 2349 endloop = tok != ROFF_while ? ROFF_IGN : 2350 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2351 if (roffnode_cleanscope(r)) 2352 irc |= endloop; 2353 2354 /* 2355 * If "\}" occurs on a macro line without a preceding macro or 2356 * a text line contains nothing else, drop the line completely. 2357 */ 2358 2359 ep = buf->buf + pos; 2360 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0)) 2361 rr = 0; 2362 2363 /* 2364 * The closing delimiter "\}" rewinds the conditional scope 2365 * but is otherwise ignored when interpreting the line. 2366 */ 2367 2368 while ((ep = strchr(ep, '\\')) != NULL) { 2369 switch (ep[1]) { 2370 case '}': 2371 if (ep[2] == '\0') 2372 ep[0] = '\0'; 2373 else if (rr) 2374 ep[1] = '&'; 2375 else 2376 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2377 if (roff_ccond(r, ln, ep - buf->buf)) 2378 irc |= endloop; 2379 break; 2380 case '\0': 2381 ++ep; 2382 break; 2383 default: 2384 ep += 2; 2385 break; 2386 } 2387 } 2388 *offs = rr; 2389 return irc; 2390 } 2391 2392 /* 2393 * Parse and process a request or macro line in conditional scope. 2394 */ 2395 static int 2396 roff_cond_sub(ROFF_ARGS) 2397 { 2398 struct roffnode *bl; 2399 int irc, rr; 2400 enum roff_tok t; 2401 2402 rr = 0; /* If arguments follow "\}", skip them. */ 2403 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2404 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2405 2406 /* For now, let high level macros abort .ce mode. */ 2407 2408 if (roffce_node != NULL && 2409 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 2410 t == ROFF_TH || t == ROFF_TS)) { 2411 r->man->last = roffce_node; 2412 r->man->next = ROFF_NEXT_SIBLING; 2413 roffce_lines = 0; 2414 roffce_node = NULL; 2415 } 2416 2417 /* 2418 * Fully handle known macros when they are structurally 2419 * required or when the conditional evaluated to true. 2420 */ 2421 2422 if (t == ROFF_break) { 2423 if (irc & ROFF_LOOPMASK) 2424 irc = ROFF_IGN | ROFF_LOOPEXIT; 2425 else if (rr) { 2426 for (bl = r->last; bl != NULL; bl = bl->parent) { 2427 bl->rule = 0; 2428 if (bl->tok == ROFF_while) 2429 break; 2430 } 2431 } 2432 } else if (t != TOKEN_NONE && 2433 (rr || roffs[t].flags & ROFFMAC_STRUCT)) 2434 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2435 else 2436 irc |= rr ? ROFF_CONT : ROFF_IGN; 2437 return irc; 2438 } 2439 2440 /* 2441 * Parse and process a text line in conditional scope. 2442 */ 2443 static int 2444 roff_cond_text(ROFF_ARGS) 2445 { 2446 int irc, rr; 2447 2448 rr = 1; /* If arguments follow "\}", preserve them. */ 2449 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2450 if (rr) 2451 irc |= ROFF_CONT; 2452 return irc; 2453 } 2454 2455 /* --- handling of numeric and conditional expressions -------------------- */ 2456 2457 /* 2458 * Parse a single signed integer number. Stop at the first non-digit. 2459 * If there is at least one digit, return success and advance the 2460 * parse point, else return failure and let the parse point unchanged. 2461 * Ignore overflows, treat them just like the C language. 2462 */ 2463 static int 2464 roff_getnum(const char *v, int *pos, int *res, int flags) 2465 { 2466 int myres, scaled, n, p; 2467 2468 if (NULL == res) 2469 res = &myres; 2470 2471 p = *pos; 2472 n = v[p] == '-'; 2473 if (n || v[p] == '+') 2474 p++; 2475 2476 if (flags & ROFFNUM_WHITE) 2477 while (isspace((unsigned char)v[p])) 2478 p++; 2479 2480 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2481 *res = 10 * *res + v[p] - '0'; 2482 if (p == *pos + n) 2483 return 0; 2484 2485 if (n) 2486 *res = -*res; 2487 2488 /* Each number may be followed by one optional scaling unit. */ 2489 2490 switch (v[p]) { 2491 case 'f': 2492 scaled = *res * 65536; 2493 break; 2494 case 'i': 2495 scaled = *res * 240; 2496 break; 2497 case 'c': 2498 scaled = *res * 240 / 2.54; 2499 break; 2500 case 'v': 2501 case 'P': 2502 scaled = *res * 40; 2503 break; 2504 case 'm': 2505 case 'n': 2506 scaled = *res * 24; 2507 break; 2508 case 'p': 2509 scaled = *res * 10 / 3; 2510 break; 2511 case 'u': 2512 scaled = *res; 2513 break; 2514 case 'M': 2515 scaled = *res * 6 / 25; 2516 break; 2517 default: 2518 scaled = *res; 2519 p--; 2520 break; 2521 } 2522 if (flags & ROFFNUM_SCALE) 2523 *res = scaled; 2524 2525 *pos = p + 1; 2526 return 1; 2527 } 2528 2529 /* 2530 * Evaluate a string comparison condition. 2531 * The first character is the delimiter. 2532 * Succeed if the string up to its second occurrence 2533 * matches the string up to its third occurence. 2534 * Advance the cursor after the third occurrence 2535 * or lacking that, to the end of the line. 2536 */ 2537 static int 2538 roff_evalstrcond(const char *v, int *pos) 2539 { 2540 const char *s1, *s2, *s3; 2541 int match; 2542 2543 match = 0; 2544 s1 = v + *pos; /* initial delimiter */ 2545 s2 = s1 + 1; /* for scanning the first string */ 2546 s3 = strchr(s2, *s1); /* for scanning the second string */ 2547 2548 if (NULL == s3) /* found no middle delimiter */ 2549 goto out; 2550 2551 while ('\0' != *++s3) { 2552 if (*s2 != *s3) { /* mismatch */ 2553 s3 = strchr(s3, *s1); 2554 break; 2555 } 2556 if (*s3 == *s1) { /* found the final delimiter */ 2557 match = 1; 2558 break; 2559 } 2560 s2++; 2561 } 2562 2563 out: 2564 if (NULL == s3) 2565 s3 = strchr(s2, '\0'); 2566 else if (*s3 != '\0') 2567 s3++; 2568 *pos = s3 - v; 2569 return match; 2570 } 2571 2572 /* 2573 * Evaluate an optionally negated single character, numerical, 2574 * or string condition. 2575 */ 2576 static int 2577 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2578 { 2579 const char *start, *end; 2580 char *cp, *name; 2581 size_t sz; 2582 int deftype, len, number, savepos, istrue, wanttrue; 2583 2584 if ('!' == v[*pos]) { 2585 wanttrue = 0; 2586 (*pos)++; 2587 } else 2588 wanttrue = 1; 2589 2590 switch (v[*pos]) { 2591 case '\0': 2592 return 0; 2593 case 'n': 2594 case 'o': 2595 (*pos)++; 2596 return wanttrue; 2597 case 'e': 2598 case 't': 2599 case 'v': 2600 (*pos)++; 2601 return !wanttrue; 2602 case 'c': 2603 do { 2604 (*pos)++; 2605 } while (v[*pos] == ' '); 2606 2607 /* 2608 * Quirk for groff compatibility: 2609 * The horizontal tab is neither available nor unavailable. 2610 */ 2611 2612 if (v[*pos] == '\t') { 2613 (*pos)++; 2614 return 0; 2615 } 2616 2617 /* Printable ASCII characters are available. */ 2618 2619 if (v[*pos] != '\\') { 2620 (*pos)++; 2621 return wanttrue; 2622 } 2623 2624 end = v + ++*pos; 2625 switch (mandoc_escape(&end, &start, &len)) { 2626 case ESCAPE_SPECIAL: 2627 istrue = mchars_spec2cp(start, len) != -1; 2628 break; 2629 case ESCAPE_UNICODE: 2630 istrue = 1; 2631 break; 2632 case ESCAPE_NUMBERED: 2633 istrue = mchars_num2char(start, len) != -1; 2634 break; 2635 default: 2636 istrue = !wanttrue; 2637 break; 2638 } 2639 *pos = end - v; 2640 return istrue == wanttrue; 2641 case 'd': 2642 case 'r': 2643 cp = v + *pos + 1; 2644 while (*cp == ' ') 2645 cp++; 2646 name = cp; 2647 sz = roff_getname(r, &cp, ln, cp - v); 2648 if (sz == 0) 2649 istrue = 0; 2650 else if (v[*pos] == 'r') 2651 istrue = roff_hasregn(r, name, sz); 2652 else { 2653 deftype = ROFFDEF_ANY; 2654 roff_getstrn(r, name, sz, &deftype); 2655 istrue = !!deftype; 2656 } 2657 *pos = (name + sz) - v; 2658 return istrue == wanttrue; 2659 default: 2660 break; 2661 } 2662 2663 savepos = *pos; 2664 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2665 return (number > 0) == wanttrue; 2666 else if (*pos == savepos) 2667 return roff_evalstrcond(v, pos) == wanttrue; 2668 else 2669 return 0; 2670 } 2671 2672 static int 2673 roff_line_ignore(ROFF_ARGS) 2674 { 2675 2676 return ROFF_IGN; 2677 } 2678 2679 static int 2680 roff_insec(ROFF_ARGS) 2681 { 2682 2683 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]); 2684 return ROFF_IGN; 2685 } 2686 2687 static int 2688 roff_unsupp(ROFF_ARGS) 2689 { 2690 2691 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]); 2692 return ROFF_IGN; 2693 } 2694 2695 static int 2696 roff_cond(ROFF_ARGS) 2697 { 2698 int irc; 2699 2700 roffnode_push(r, tok, NULL, ln, ppos); 2701 2702 /* 2703 * An `.el' has no conditional body: it will consume the value 2704 * of the current rstack entry set in prior `ie' calls or 2705 * defaults to DENY. 2706 * 2707 * If we're not an `el', however, then evaluate the conditional. 2708 */ 2709 2710 r->last->rule = tok == ROFF_el ? 2711 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2712 roff_evalcond(r, ln, buf->buf, &pos); 2713 2714 /* 2715 * An if-else will put the NEGATION of the current evaluated 2716 * conditional into the stack of rules. 2717 */ 2718 2719 if (tok == ROFF_ie) { 2720 if (r->rstackpos + 1 == r->rstacksz) { 2721 r->rstacksz += 16; 2722 r->rstack = mandoc_reallocarray(r->rstack, 2723 r->rstacksz, sizeof(int)); 2724 } 2725 r->rstack[++r->rstackpos] = !r->last->rule; 2726 } 2727 2728 /* If the parent has false as its rule, then so do we. */ 2729 2730 if (r->last->parent && !r->last->parent->rule) 2731 r->last->rule = 0; 2732 2733 /* 2734 * Determine scope. 2735 * If there is nothing on the line after the conditional, 2736 * not even whitespace, use next-line scope. 2737 * Except that .while does not support next-line scope. 2738 */ 2739 2740 if (buf->buf[pos] == '\0' && tok != ROFF_while) { 2741 r->last->endspan = 2; 2742 goto out; 2743 } 2744 2745 while (buf->buf[pos] == ' ') 2746 pos++; 2747 2748 /* An opening brace requests multiline scope. */ 2749 2750 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2751 r->last->endspan = -1; 2752 pos += 2; 2753 while (buf->buf[pos] == ' ') 2754 pos++; 2755 goto out; 2756 } 2757 2758 /* 2759 * Anything else following the conditional causes 2760 * single-line scope. Warn if the scope contains 2761 * nothing but trailing whitespace. 2762 */ 2763 2764 if (buf->buf[pos] == '\0') 2765 mandoc_msg(MANDOCERR_COND_EMPTY, 2766 ln, ppos, "%s", roff_name[tok]); 2767 2768 r->last->endspan = 1; 2769 2770 out: 2771 *offs = pos; 2772 irc = ROFF_RERUN; 2773 if (tok == ROFF_while) 2774 irc |= ROFF_WHILE; 2775 return irc; 2776 } 2777 2778 static int 2779 roff_ds(ROFF_ARGS) 2780 { 2781 char *string; 2782 const char *name; 2783 size_t namesz; 2784 2785 /* Ignore groff compatibility mode for now. */ 2786 2787 if (tok == ROFF_ds1) 2788 tok = ROFF_ds; 2789 else if (tok == ROFF_as1) 2790 tok = ROFF_as; 2791 2792 /* 2793 * The first word is the name of the string. 2794 * If it is empty or terminated by an escape sequence, 2795 * abort the `ds' request without defining anything. 2796 */ 2797 2798 name = string = buf->buf + pos; 2799 if (*name == '\0') 2800 return ROFF_IGN; 2801 2802 namesz = roff_getname(r, &string, ln, pos); 2803 switch (name[namesz]) { 2804 case '\\': 2805 return ROFF_IGN; 2806 case '\t': 2807 string = buf->buf + pos + namesz; 2808 break; 2809 default: 2810 break; 2811 } 2812 2813 /* Read past the initial double-quote, if any. */ 2814 if (*string == '"') 2815 string++; 2816 2817 /* The rest is the value. */ 2818 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2819 ROFF_as == tok); 2820 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2821 return ROFF_IGN; 2822 } 2823 2824 /* 2825 * Parse a single operator, one or two characters long. 2826 * If the operator is recognized, return success and advance the 2827 * parse point, else return failure and let the parse point unchanged. 2828 */ 2829 static int 2830 roff_getop(const char *v, int *pos, char *res) 2831 { 2832 2833 *res = v[*pos]; 2834 2835 switch (*res) { 2836 case '+': 2837 case '-': 2838 case '*': 2839 case '/': 2840 case '%': 2841 case '&': 2842 case ':': 2843 break; 2844 case '<': 2845 switch (v[*pos + 1]) { 2846 case '=': 2847 *res = 'l'; 2848 (*pos)++; 2849 break; 2850 case '>': 2851 *res = '!'; 2852 (*pos)++; 2853 break; 2854 case '?': 2855 *res = 'i'; 2856 (*pos)++; 2857 break; 2858 default: 2859 break; 2860 } 2861 break; 2862 case '>': 2863 switch (v[*pos + 1]) { 2864 case '=': 2865 *res = 'g'; 2866 (*pos)++; 2867 break; 2868 case '?': 2869 *res = 'a'; 2870 (*pos)++; 2871 break; 2872 default: 2873 break; 2874 } 2875 break; 2876 case '=': 2877 if ('=' == v[*pos + 1]) 2878 (*pos)++; 2879 break; 2880 default: 2881 return 0; 2882 } 2883 (*pos)++; 2884 2885 return *res; 2886 } 2887 2888 /* 2889 * Evaluate either a parenthesized numeric expression 2890 * or a single signed integer number. 2891 */ 2892 static int 2893 roff_evalpar(struct roff *r, int ln, 2894 const char *v, int *pos, int *res, int flags) 2895 { 2896 2897 if ('(' != v[*pos]) 2898 return roff_getnum(v, pos, res, flags); 2899 2900 (*pos)++; 2901 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2902 return 0; 2903 2904 /* 2905 * Omission of the closing parenthesis 2906 * is an error in validation mode, 2907 * but ignored in evaluation mode. 2908 */ 2909 2910 if (')' == v[*pos]) 2911 (*pos)++; 2912 else if (NULL == res) 2913 return 0; 2914 2915 return 1; 2916 } 2917 2918 /* 2919 * Evaluate a complete numeric expression. 2920 * Proceed left to right, there is no concept of precedence. 2921 */ 2922 static int 2923 roff_evalnum(struct roff *r, int ln, const char *v, 2924 int *pos, int *res, int flags) 2925 { 2926 int mypos, operand2; 2927 char operator; 2928 2929 if (NULL == pos) { 2930 mypos = 0; 2931 pos = &mypos; 2932 } 2933 2934 if (flags & ROFFNUM_WHITE) 2935 while (isspace((unsigned char)v[*pos])) 2936 (*pos)++; 2937 2938 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2939 return 0; 2940 2941 while (1) { 2942 if (flags & ROFFNUM_WHITE) 2943 while (isspace((unsigned char)v[*pos])) 2944 (*pos)++; 2945 2946 if ( ! roff_getop(v, pos, &operator)) 2947 break; 2948 2949 if (flags & ROFFNUM_WHITE) 2950 while (isspace((unsigned char)v[*pos])) 2951 (*pos)++; 2952 2953 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2954 return 0; 2955 2956 if (flags & ROFFNUM_WHITE) 2957 while (isspace((unsigned char)v[*pos])) 2958 (*pos)++; 2959 2960 if (NULL == res) 2961 continue; 2962 2963 switch (operator) { 2964 case '+': 2965 *res += operand2; 2966 break; 2967 case '-': 2968 *res -= operand2; 2969 break; 2970 case '*': 2971 *res *= operand2; 2972 break; 2973 case '/': 2974 if (operand2 == 0) { 2975 mandoc_msg(MANDOCERR_DIVZERO, 2976 ln, *pos, "%s", v); 2977 *res = 0; 2978 break; 2979 } 2980 *res /= operand2; 2981 break; 2982 case '%': 2983 if (operand2 == 0) { 2984 mandoc_msg(MANDOCERR_DIVZERO, 2985 ln, *pos, "%s", v); 2986 *res = 0; 2987 break; 2988 } 2989 *res %= operand2; 2990 break; 2991 case '<': 2992 *res = *res < operand2; 2993 break; 2994 case '>': 2995 *res = *res > operand2; 2996 break; 2997 case 'l': 2998 *res = *res <= operand2; 2999 break; 3000 case 'g': 3001 *res = *res >= operand2; 3002 break; 3003 case '=': 3004 *res = *res == operand2; 3005 break; 3006 case '!': 3007 *res = *res != operand2; 3008 break; 3009 case '&': 3010 *res = *res && operand2; 3011 break; 3012 case ':': 3013 *res = *res || operand2; 3014 break; 3015 case 'i': 3016 if (operand2 < *res) 3017 *res = operand2; 3018 break; 3019 case 'a': 3020 if (operand2 > *res) 3021 *res = operand2; 3022 break; 3023 default: 3024 abort(); 3025 } 3026 } 3027 return 1; 3028 } 3029 3030 /* --- register management ------------------------------------------------ */ 3031 3032 void 3033 roff_setreg(struct roff *r, const char *name, int val, char sign) 3034 { 3035 roff_setregn(r, name, strlen(name), val, sign, INT_MIN); 3036 } 3037 3038 static void 3039 roff_setregn(struct roff *r, const char *name, size_t len, 3040 int val, char sign, int step) 3041 { 3042 struct roffreg *reg; 3043 3044 /* Search for an existing register with the same name. */ 3045 reg = r->regtab; 3046 3047 while (reg != NULL && (reg->key.sz != len || 3048 strncmp(reg->key.p, name, len) != 0)) 3049 reg = reg->next; 3050 3051 if (NULL == reg) { 3052 /* Create a new register. */ 3053 reg = mandoc_malloc(sizeof(struct roffreg)); 3054 reg->key.p = mandoc_strndup(name, len); 3055 reg->key.sz = len; 3056 reg->val = 0; 3057 reg->step = 0; 3058 reg->next = r->regtab; 3059 r->regtab = reg; 3060 } 3061 3062 if ('+' == sign) 3063 reg->val += val; 3064 else if ('-' == sign) 3065 reg->val -= val; 3066 else 3067 reg->val = val; 3068 if (step != INT_MIN) 3069 reg->step = step; 3070 } 3071 3072 /* 3073 * Handle some predefined read-only number registers. 3074 * For now, return -1 if the requested register is not predefined; 3075 * in case a predefined read-only register having the value -1 3076 * were to turn up, another special value would have to be chosen. 3077 */ 3078 static int 3079 roff_getregro(const struct roff *r, const char *name) 3080 { 3081 3082 switch (*name) { 3083 case '$': /* Number of arguments of the last macro evaluated. */ 3084 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; 3085 case 'A': /* ASCII approximation mode is always off. */ 3086 return 0; 3087 case 'g': /* Groff compatibility mode is always on. */ 3088 return 1; 3089 case 'H': /* Fixed horizontal resolution. */ 3090 return 24; 3091 case 'j': /* Always adjust left margin only. */ 3092 return 0; 3093 case 'T': /* Some output device is always defined. */ 3094 return 1; 3095 case 'V': /* Fixed vertical resolution. */ 3096 return 40; 3097 default: 3098 return -1; 3099 } 3100 } 3101 3102 int 3103 roff_getreg(struct roff *r, const char *name) 3104 { 3105 return roff_getregn(r, name, strlen(name), '\0'); 3106 } 3107 3108 static int 3109 roff_getregn(struct roff *r, const char *name, size_t len, char sign) 3110 { 3111 struct roffreg *reg; 3112 int val; 3113 3114 if ('.' == name[0] && 2 == len) { 3115 val = roff_getregro(r, name + 1); 3116 if (-1 != val) 3117 return val; 3118 } 3119 3120 for (reg = r->regtab; reg; reg = reg->next) { 3121 if (len == reg->key.sz && 3122 0 == strncmp(name, reg->key.p, len)) { 3123 switch (sign) { 3124 case '+': 3125 reg->val += reg->step; 3126 break; 3127 case '-': 3128 reg->val -= reg->step; 3129 break; 3130 default: 3131 break; 3132 } 3133 return reg->val; 3134 } 3135 } 3136 3137 roff_setregn(r, name, len, 0, '\0', INT_MIN); 3138 return 0; 3139 } 3140 3141 static int 3142 roff_hasregn(const struct roff *r, const char *name, size_t len) 3143 { 3144 struct roffreg *reg; 3145 int val; 3146 3147 if ('.' == name[0] && 2 == len) { 3148 val = roff_getregro(r, name + 1); 3149 if (-1 != val) 3150 return 1; 3151 } 3152 3153 for (reg = r->regtab; reg; reg = reg->next) 3154 if (len == reg->key.sz && 3155 0 == strncmp(name, reg->key.p, len)) 3156 return 1; 3157 3158 return 0; 3159 } 3160 3161 static void 3162 roff_freereg(struct roffreg *reg) 3163 { 3164 struct roffreg *old_reg; 3165 3166 while (NULL != reg) { 3167 free(reg->key.p); 3168 old_reg = reg; 3169 reg = reg->next; 3170 free(old_reg); 3171 } 3172 } 3173 3174 static int 3175 roff_nr(ROFF_ARGS) 3176 { 3177 char *key, *val, *step; 3178 size_t keysz; 3179 int iv, is, len; 3180 char sign; 3181 3182 key = val = buf->buf + pos; 3183 if (*key == '\0') 3184 return ROFF_IGN; 3185 3186 keysz = roff_getname(r, &val, ln, pos); 3187 if (key[keysz] == '\\' || key[keysz] == '\t') 3188 return ROFF_IGN; 3189 3190 sign = *val; 3191 if (sign == '+' || sign == '-') 3192 val++; 3193 3194 len = 0; 3195 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) 3196 return ROFF_IGN; 3197 3198 step = val + len; 3199 while (isspace((unsigned char)*step)) 3200 step++; 3201 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) 3202 is = INT_MIN; 3203 3204 roff_setregn(r, key, keysz, iv, sign, is); 3205 return ROFF_IGN; 3206 } 3207 3208 static int 3209 roff_rr(ROFF_ARGS) 3210 { 3211 struct roffreg *reg, **prev; 3212 char *name, *cp; 3213 size_t namesz; 3214 3215 name = cp = buf->buf + pos; 3216 if (*name == '\0') 3217 return ROFF_IGN; 3218 namesz = roff_getname(r, &cp, ln, pos); 3219 name[namesz] = '\0'; 3220 3221 prev = &r->regtab; 3222 while (1) { 3223 reg = *prev; 3224 if (reg == NULL || !strcmp(name, reg->key.p)) 3225 break; 3226 prev = ®->next; 3227 } 3228 if (reg != NULL) { 3229 *prev = reg->next; 3230 free(reg->key.p); 3231 free(reg); 3232 } 3233 return ROFF_IGN; 3234 } 3235 3236 /* --- handler functions for roff requests -------------------------------- */ 3237 3238 static int 3239 roff_rm(ROFF_ARGS) 3240 { 3241 const char *name; 3242 char *cp; 3243 size_t namesz; 3244 3245 cp = buf->buf + pos; 3246 while (*cp != '\0') { 3247 name = cp; 3248 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 3249 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 3250 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3251 if (name[namesz] == '\\' || name[namesz] == '\t') 3252 break; 3253 } 3254 return ROFF_IGN; 3255 } 3256 3257 static int 3258 roff_it(ROFF_ARGS) 3259 { 3260 int iv; 3261 3262 /* Parse the number of lines. */ 3263 3264 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 3265 mandoc_msg(MANDOCERR_IT_NONUM, 3266 ln, ppos, "%s", buf->buf + 1); 3267 return ROFF_IGN; 3268 } 3269 3270 while (isspace((unsigned char)buf->buf[pos])) 3271 pos++; 3272 3273 /* 3274 * Arm the input line trap. 3275 * Special-casing "an-trap" is an ugly workaround to cope 3276 * with DocBook stupidly fiddling with man(7) internals. 3277 */ 3278 3279 roffit_lines = iv; 3280 roffit_macro = mandoc_strdup(iv != 1 || 3281 strcmp(buf->buf + pos, "an-trap") ? 3282 buf->buf + pos : "br"); 3283 return ROFF_IGN; 3284 } 3285 3286 static int 3287 roff_Dd(ROFF_ARGS) 3288 { 3289 int mask; 3290 enum roff_tok t, te; 3291 3292 switch (tok) { 3293 case ROFF_Dd: 3294 tok = MDOC_Dd; 3295 te = MDOC_MAX; 3296 if (r->format == 0) 3297 r->format = MPARSE_MDOC; 3298 mask = MPARSE_MDOC | MPARSE_QUICK; 3299 break; 3300 case ROFF_TH: 3301 tok = MAN_TH; 3302 te = MAN_MAX; 3303 if (r->format == 0) 3304 r->format = MPARSE_MAN; 3305 mask = MPARSE_QUICK; 3306 break; 3307 default: 3308 abort(); 3309 } 3310 if ((r->options & mask) == 0) 3311 for (t = tok; t < te; t++) 3312 roff_setstr(r, roff_name[t], NULL, 0); 3313 return ROFF_CONT; 3314 } 3315 3316 static int 3317 roff_TE(ROFF_ARGS) 3318 { 3319 r->man->flags &= ~ROFF_NONOFILL; 3320 if (r->tbl == NULL) { 3321 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE"); 3322 return ROFF_IGN; 3323 } 3324 if (tbl_end(r->tbl, 0) == 0) { 3325 r->tbl = NULL; 3326 free(buf->buf); 3327 buf->buf = mandoc_strdup(".sp"); 3328 buf->sz = 4; 3329 *offs = 0; 3330 return ROFF_REPARSE; 3331 } 3332 r->tbl = NULL; 3333 return ROFF_IGN; 3334 } 3335 3336 static int 3337 roff_T_(ROFF_ARGS) 3338 { 3339 3340 if (NULL == r->tbl) 3341 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&"); 3342 else 3343 tbl_restart(ln, ppos, r->tbl); 3344 3345 return ROFF_IGN; 3346 } 3347 3348 /* 3349 * Handle in-line equation delimiters. 3350 */ 3351 static int 3352 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 3353 { 3354 char *cp1, *cp2; 3355 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 3356 3357 /* 3358 * Outside equations, look for an opening delimiter. 3359 * If we are inside an equation, we already know it is 3360 * in-line, or this function wouldn't have been called; 3361 * so look for a closing delimiter. 3362 */ 3363 3364 cp1 = buf->buf + pos; 3365 cp2 = strchr(cp1, r->eqn == NULL ? 3366 r->last_eqn->odelim : r->last_eqn->cdelim); 3367 if (cp2 == NULL) 3368 return ROFF_CONT; 3369 3370 *cp2++ = '\0'; 3371 bef_pr = bef_nl = aft_nl = aft_pr = ""; 3372 3373 /* Handle preceding text, protecting whitespace. */ 3374 3375 if (*buf->buf != '\0') { 3376 if (r->eqn == NULL) 3377 bef_pr = "\\&"; 3378 bef_nl = "\n"; 3379 } 3380 3381 /* 3382 * Prepare replacing the delimiter with an equation macro 3383 * and drop leading white space from the equation. 3384 */ 3385 3386 if (r->eqn == NULL) { 3387 while (*cp2 == ' ') 3388 cp2++; 3389 mac = ".EQ"; 3390 } else 3391 mac = ".EN"; 3392 3393 /* Handle following text, protecting whitespace. */ 3394 3395 if (*cp2 != '\0') { 3396 aft_nl = "\n"; 3397 if (r->eqn != NULL) 3398 aft_pr = "\\&"; 3399 } 3400 3401 /* Do the actual replacement. */ 3402 3403 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 3404 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 3405 free(buf->buf); 3406 buf->buf = cp1; 3407 3408 /* Toggle the in-line state of the eqn subsystem. */ 3409 3410 r->eqn_inline = r->eqn == NULL; 3411 return ROFF_REPARSE; 3412 } 3413 3414 static int 3415 roff_EQ(ROFF_ARGS) 3416 { 3417 struct roff_node *n; 3418 3419 if (r->man->meta.macroset == MACROSET_MAN) 3420 man_breakscope(r->man, ROFF_EQ); 3421 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); 3422 if (ln > r->man->last->line) 3423 n->flags |= NODE_LINE; 3424 n->eqn = eqn_box_new(); 3425 roff_node_append(r->man, n); 3426 r->man->next = ROFF_NEXT_SIBLING; 3427 3428 assert(r->eqn == NULL); 3429 if (r->last_eqn == NULL) 3430 r->last_eqn = eqn_alloc(); 3431 else 3432 eqn_reset(r->last_eqn); 3433 r->eqn = r->last_eqn; 3434 r->eqn->node = n; 3435 3436 if (buf->buf[pos] != '\0') 3437 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3438 ".EQ %s", buf->buf + pos); 3439 3440 return ROFF_IGN; 3441 } 3442 3443 static int 3444 roff_EN(ROFF_ARGS) 3445 { 3446 if (r->eqn != NULL) { 3447 eqn_parse(r->eqn); 3448 r->eqn = NULL; 3449 } else 3450 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN"); 3451 if (buf->buf[pos] != '\0') 3452 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3453 "EN %s", buf->buf + pos); 3454 return ROFF_IGN; 3455 } 3456 3457 static int 3458 roff_TS(ROFF_ARGS) 3459 { 3460 if (r->tbl != NULL) { 3461 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); 3462 tbl_end(r->tbl, 0); 3463 } 3464 r->man->flags |= ROFF_NONOFILL; 3465 r->tbl = tbl_alloc(ppos, ln, r->last_tbl); 3466 if (r->last_tbl == NULL) 3467 r->first_tbl = r->tbl; 3468 r->last_tbl = r->tbl; 3469 return ROFF_IGN; 3470 } 3471 3472 static int 3473 roff_noarg(ROFF_ARGS) 3474 { 3475 if (r->man->flags & (MAN_BLINE | MAN_ELINE)) 3476 man_breakscope(r->man, tok); 3477 if (tok == ROFF_brp) 3478 tok = ROFF_br; 3479 roff_elem_alloc(r->man, ln, ppos, tok); 3480 if (buf->buf[pos] != '\0') 3481 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3482 "%s %s", roff_name[tok], buf->buf + pos); 3483 if (tok == ROFF_nf) 3484 r->man->flags |= ROFF_NOFILL; 3485 else if (tok == ROFF_fi) 3486 r->man->flags &= ~ROFF_NOFILL; 3487 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3488 r->man->next = ROFF_NEXT_SIBLING; 3489 return ROFF_IGN; 3490 } 3491 3492 static int 3493 roff_onearg(ROFF_ARGS) 3494 { 3495 struct roff_node *n; 3496 char *cp; 3497 int npos; 3498 3499 if (r->man->flags & (MAN_BLINE | MAN_ELINE) && 3500 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || 3501 tok == ROFF_ti)) 3502 man_breakscope(r->man, tok); 3503 3504 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { 3505 r->man->last = roffce_node; 3506 r->man->next = ROFF_NEXT_SIBLING; 3507 } 3508 3509 roff_elem_alloc(r->man, ln, ppos, tok); 3510 n = r->man->last; 3511 3512 cp = buf->buf + pos; 3513 if (*cp != '\0') { 3514 while (*cp != '\0' && *cp != ' ') 3515 cp++; 3516 while (*cp == ' ') 3517 *cp++ = '\0'; 3518 if (*cp != '\0') 3519 mandoc_msg(MANDOCERR_ARG_EXCESS, 3520 ln, (int)(cp - buf->buf), 3521 "%s ... %s", roff_name[tok], cp); 3522 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3523 } 3524 3525 if (tok == ROFF_ce || tok == ROFF_rj) { 3526 if (r->man->last->type == ROFFT_ELEM) { 3527 roff_word_alloc(r->man, ln, pos, "1"); 3528 r->man->last->flags |= NODE_NOSRC; 3529 } 3530 npos = 0; 3531 if (roff_evalnum(r, ln, r->man->last->string, &npos, 3532 &roffce_lines, 0) == 0) { 3533 mandoc_msg(MANDOCERR_CE_NONUM, 3534 ln, pos, "ce %s", buf->buf + pos); 3535 roffce_lines = 1; 3536 } 3537 if (roffce_lines < 1) { 3538 r->man->last = r->man->last->parent; 3539 roffce_node = NULL; 3540 roffce_lines = 0; 3541 } else 3542 roffce_node = r->man->last->parent; 3543 } else { 3544 n->flags |= NODE_VALID | NODE_ENDED; 3545 r->man->last = n; 3546 } 3547 n->flags |= NODE_LINE; 3548 r->man->next = ROFF_NEXT_SIBLING; 3549 return ROFF_IGN; 3550 } 3551 3552 static int 3553 roff_manyarg(ROFF_ARGS) 3554 { 3555 struct roff_node *n; 3556 char *sp, *ep; 3557 3558 roff_elem_alloc(r->man, ln, ppos, tok); 3559 n = r->man->last; 3560 3561 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { 3562 while (*ep != '\0' && *ep != ' ') 3563 ep++; 3564 while (*ep == ' ') 3565 *ep++ = '\0'; 3566 roff_word_alloc(r->man, ln, sp - buf->buf, sp); 3567 } 3568 3569 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3570 r->man->last = n; 3571 r->man->next = ROFF_NEXT_SIBLING; 3572 return ROFF_IGN; 3573 } 3574 3575 static int 3576 roff_als(ROFF_ARGS) 3577 { 3578 char *oldn, *newn, *end, *value; 3579 size_t oldsz, newsz, valsz; 3580 3581 newn = oldn = buf->buf + pos; 3582 if (*newn == '\0') 3583 return ROFF_IGN; 3584 3585 newsz = roff_getname(r, &oldn, ln, pos); 3586 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0') 3587 return ROFF_IGN; 3588 3589 end = oldn; 3590 oldsz = roff_getname(r, &end, ln, oldn - buf->buf); 3591 if (oldsz == 0) 3592 return ROFF_IGN; 3593 3594 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", 3595 (int)oldsz, oldn); 3596 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); 3597 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3598 free(value); 3599 return ROFF_IGN; 3600 } 3601 3602 /* 3603 * The .break request only makes sense inside conditionals, 3604 * and that case is already handled in roff_cond_sub(). 3605 */ 3606 static int 3607 roff_break(ROFF_ARGS) 3608 { 3609 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break"); 3610 return ROFF_IGN; 3611 } 3612 3613 static int 3614 roff_cc(ROFF_ARGS) 3615 { 3616 const char *p; 3617 3618 p = buf->buf + pos; 3619 3620 if (*p == '\0' || (r->control = *p++) == '.') 3621 r->control = '\0'; 3622 3623 if (*p != '\0') 3624 mandoc_msg(MANDOCERR_ARG_EXCESS, 3625 ln, p - buf->buf, "cc ... %s", p); 3626 3627 return ROFF_IGN; 3628 } 3629 3630 static int 3631 roff_char(ROFF_ARGS) 3632 { 3633 const char *p, *kp, *vp; 3634 size_t ksz, vsz; 3635 int font; 3636 3637 /* Parse the character to be replaced. */ 3638 3639 kp = buf->buf + pos; 3640 p = kp + 1; 3641 if (*kp == '\0' || (*kp == '\\' && 3642 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) || 3643 (*p != ' ' && *p != '\0')) { 3644 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp); 3645 return ROFF_IGN; 3646 } 3647 ksz = p - kp; 3648 while (*p == ' ') 3649 p++; 3650 3651 /* 3652 * If the replacement string contains a font escape sequence, 3653 * we have to restore the font at the end. 3654 */ 3655 3656 vp = p; 3657 vsz = strlen(p); 3658 font = 0; 3659 while (*p != '\0') { 3660 if (*p++ != '\\') 3661 continue; 3662 switch (mandoc_escape(&p, NULL, NULL)) { 3663 case ESCAPE_FONT: 3664 case ESCAPE_FONTROMAN: 3665 case ESCAPE_FONTITALIC: 3666 case ESCAPE_FONTBOLD: 3667 case ESCAPE_FONTBI: 3668 case ESCAPE_FONTCR: 3669 case ESCAPE_FONTCB: 3670 case ESCAPE_FONTCI: 3671 case ESCAPE_FONTPREV: 3672 font++; 3673 break; 3674 default: 3675 break; 3676 } 3677 } 3678 if (font > 1) 3679 mandoc_msg(MANDOCERR_CHAR_FONT, 3680 ln, (int)(vp - buf->buf), "%s", vp); 3681 3682 /* 3683 * Approximate the effect of .char using the .tr tables. 3684 * XXX In groff, .char and .tr interact differently. 3685 */ 3686 3687 if (ksz == 1) { 3688 if (r->xtab == NULL) 3689 r->xtab = mandoc_calloc(128, sizeof(*r->xtab)); 3690 assert((unsigned int)*kp < 128); 3691 free(r->xtab[(int)*kp].p); 3692 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p, 3693 "%s%s", vp, font ? "\fP" : ""); 3694 } else { 3695 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0); 3696 if (font) 3697 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1); 3698 } 3699 return ROFF_IGN; 3700 } 3701 3702 static int 3703 roff_ec(ROFF_ARGS) 3704 { 3705 const char *p; 3706 3707 p = buf->buf + pos; 3708 if (*p == '\0') 3709 r->escape = '\\'; 3710 else { 3711 r->escape = *p; 3712 if (*++p != '\0') 3713 mandoc_msg(MANDOCERR_ARG_EXCESS, ln, 3714 (int)(p - buf->buf), "ec ... %s", p); 3715 } 3716 return ROFF_IGN; 3717 } 3718 3719 static int 3720 roff_eo(ROFF_ARGS) 3721 { 3722 r->escape = '\0'; 3723 if (buf->buf[pos] != '\0') 3724 mandoc_msg(MANDOCERR_ARG_SKIP, 3725 ln, pos, "eo %s", buf->buf + pos); 3726 return ROFF_IGN; 3727 } 3728 3729 static int 3730 roff_nop(ROFF_ARGS) 3731 { 3732 while (buf->buf[pos] == ' ') 3733 pos++; 3734 *offs = pos; 3735 return ROFF_RERUN; 3736 } 3737 3738 static int 3739 roff_tr(ROFF_ARGS) 3740 { 3741 const char *p, *first, *second; 3742 size_t fsz, ssz; 3743 enum mandoc_esc esc; 3744 3745 p = buf->buf + pos; 3746 3747 if (*p == '\0') { 3748 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr"); 3749 return ROFF_IGN; 3750 } 3751 3752 while (*p != '\0') { 3753 fsz = ssz = 1; 3754 3755 first = p++; 3756 if (*first == '\\') { 3757 esc = mandoc_escape(&p, NULL, NULL); 3758 if (esc == ESCAPE_ERROR) { 3759 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3760 (int)(p - buf->buf), "%s", first); 3761 return ROFF_IGN; 3762 } 3763 fsz = (size_t)(p - first); 3764 } 3765 3766 second = p++; 3767 if (*second == '\\') { 3768 esc = mandoc_escape(&p, NULL, NULL); 3769 if (esc == ESCAPE_ERROR) { 3770 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3771 (int)(p - buf->buf), "%s", second); 3772 return ROFF_IGN; 3773 } 3774 ssz = (size_t)(p - second); 3775 } else if (*second == '\0') { 3776 mandoc_msg(MANDOCERR_TR_ODD, ln, 3777 (int)(first - buf->buf), "tr %s", first); 3778 second = " "; 3779 p--; 3780 } 3781 3782 if (fsz > 1) { 3783 roff_setstrn(&r->xmbtab, first, fsz, 3784 second, ssz, 0); 3785 continue; 3786 } 3787 3788 if (r->xtab == NULL) 3789 r->xtab = mandoc_calloc(128, 3790 sizeof(struct roffstr)); 3791 3792 free(r->xtab[(int)*first].p); 3793 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 3794 r->xtab[(int)*first].sz = ssz; 3795 } 3796 3797 return ROFF_IGN; 3798 } 3799 3800 /* 3801 * Implementation of the .return request. 3802 * There is no need to call roff_userret() from here. 3803 * The read module will call that after rewinding the reader stack 3804 * to the place from where the current macro was called. 3805 */ 3806 static int 3807 roff_return(ROFF_ARGS) 3808 { 3809 if (r->mstackpos >= 0) 3810 return ROFF_IGN | ROFF_USERRET; 3811 3812 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return"); 3813 return ROFF_IGN; 3814 } 3815 3816 static int 3817 roff_rn(ROFF_ARGS) 3818 { 3819 const char *value; 3820 char *oldn, *newn, *end; 3821 size_t oldsz, newsz; 3822 int deftype; 3823 3824 oldn = newn = buf->buf + pos; 3825 if (*oldn == '\0') 3826 return ROFF_IGN; 3827 3828 oldsz = roff_getname(r, &newn, ln, pos); 3829 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0') 3830 return ROFF_IGN; 3831 3832 end = newn; 3833 newsz = roff_getname(r, &end, ln, newn - buf->buf); 3834 if (newsz == 0) 3835 return ROFF_IGN; 3836 3837 deftype = ROFFDEF_ANY; 3838 value = roff_getstrn(r, oldn, oldsz, &deftype); 3839 switch (deftype) { 3840 case ROFFDEF_USER: 3841 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3842 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); 3843 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3844 break; 3845 case ROFFDEF_PRE: 3846 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3847 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3848 break; 3849 case ROFFDEF_REN: 3850 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); 3851 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); 3852 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3853 break; 3854 case ROFFDEF_STD: 3855 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); 3856 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3857 break; 3858 default: 3859 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3860 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3861 break; 3862 } 3863 return ROFF_IGN; 3864 } 3865 3866 static int 3867 roff_shift(ROFF_ARGS) 3868 { 3869 struct mctx *ctx; 3870 int levels, i; 3871 3872 levels = 1; 3873 if (buf->buf[pos] != '\0' && 3874 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { 3875 mandoc_msg(MANDOCERR_CE_NONUM, 3876 ln, pos, "shift %s", buf->buf + pos); 3877 levels = 1; 3878 } 3879 if (r->mstackpos < 0) { 3880 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift"); 3881 return ROFF_IGN; 3882 } 3883 ctx = r->mstack + r->mstackpos; 3884 if (levels > ctx->argc) { 3885 mandoc_msg(MANDOCERR_SHIFT, 3886 ln, pos, "%d, but max is %d", levels, ctx->argc); 3887 levels = ctx->argc; 3888 } 3889 if (levels == 0) 3890 return ROFF_IGN; 3891 for (i = 0; i < levels; i++) 3892 free(ctx->argv[i]); 3893 ctx->argc -= levels; 3894 for (i = 0; i < ctx->argc; i++) 3895 ctx->argv[i] = ctx->argv[i + levels]; 3896 return ROFF_IGN; 3897 } 3898 3899 static int 3900 roff_so(ROFF_ARGS) 3901 { 3902 char *name, *cp; 3903 3904 name = buf->buf + pos; 3905 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name); 3906 3907 /* 3908 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3909 * opening anything that's not in our cwd or anything beneath 3910 * it. Thus, explicitly disallow traversing up the file-system 3911 * or using absolute paths. 3912 */ 3913 3914 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3915 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name); 3916 buf->sz = mandoc_asprintf(&cp, 3917 ".sp\nSee the file %s.\n.sp", name) + 1; 3918 free(buf->buf); 3919 buf->buf = cp; 3920 *offs = 0; 3921 return ROFF_REPARSE; 3922 } 3923 3924 *offs = pos; 3925 return ROFF_SO; 3926 } 3927 3928 /* --- user defined strings and macros ------------------------------------ */ 3929 3930 static int 3931 roff_userdef(ROFF_ARGS) 3932 { 3933 struct mctx *ctx; 3934 char *arg, *ap, *dst, *src; 3935 size_t sz; 3936 3937 /* If the macro is empty, ignore it altogether. */ 3938 3939 if (*r->current_string == '\0') 3940 return ROFF_IGN; 3941 3942 /* Initialize a new macro stack context. */ 3943 3944 if (++r->mstackpos == r->mstacksz) { 3945 r->mstack = mandoc_recallocarray(r->mstack, 3946 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); 3947 r->mstacksz += 8; 3948 } 3949 ctx = r->mstack + r->mstackpos; 3950 ctx->argsz = 0; 3951 ctx->argc = 0; 3952 ctx->argv = NULL; 3953 3954 /* 3955 * Collect pointers to macro argument strings, 3956 * NUL-terminating them and escaping quotes. 3957 */ 3958 3959 src = buf->buf + pos; 3960 while (*src != '\0') { 3961 if (ctx->argc == ctx->argsz) { 3962 ctx->argsz += 8; 3963 ctx->argv = mandoc_reallocarray(ctx->argv, 3964 ctx->argsz, sizeof(*ctx->argv)); 3965 } 3966 arg = roff_getarg(r, &src, ln, &pos); 3967 sz = 1; /* For the terminating NUL. */ 3968 for (ap = arg; *ap != '\0'; ap++) 3969 sz += *ap == '"' ? 4 : 1; 3970 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); 3971 for (ap = arg; *ap != '\0'; ap++) { 3972 if (*ap == '"') { 3973 memcpy(dst, "\\(dq", 4); 3974 dst += 4; 3975 } else 3976 *dst++ = *ap; 3977 } 3978 *dst = '\0'; 3979 free(arg); 3980 } 3981 3982 /* Replace the macro invocation by the macro definition. */ 3983 3984 free(buf->buf); 3985 buf->buf = mandoc_strdup(r->current_string); 3986 buf->sz = strlen(buf->buf) + 1; 3987 *offs = 0; 3988 3989 return buf->buf[buf->sz - 2] == '\n' ? 3990 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND; 3991 } 3992 3993 /* 3994 * Calling a high-level macro that was renamed with .rn. 3995 * r->current_string has already been set up by roff_parse(). 3996 */ 3997 static int 3998 roff_renamed(ROFF_ARGS) 3999 { 4000 char *nbuf; 4001 4002 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, 4003 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; 4004 free(buf->buf); 4005 buf->buf = nbuf; 4006 *offs = 0; 4007 return ROFF_CONT; 4008 } 4009 4010 /* 4011 * Measure the length in bytes of the roff identifier at *cpp 4012 * and advance the pointer to the next word. 4013 */ 4014 static size_t 4015 roff_getname(struct roff *r, char **cpp, int ln, int pos) 4016 { 4017 char *name, *cp; 4018 size_t namesz; 4019 4020 name = *cpp; 4021 if (*name == '\0') 4022 return 0; 4023 4024 /* Advance cp to the byte after the end of the name. */ 4025 4026 for (cp = name; 1; cp++) { 4027 namesz = cp - name; 4028 if (*cp == '\0') 4029 break; 4030 if (*cp == ' ' || *cp == '\t') { 4031 cp++; 4032 break; 4033 } 4034 if (*cp != '\\') 4035 continue; 4036 if (cp[1] == '{' || cp[1] == '}') 4037 break; 4038 if (*++cp == '\\') 4039 continue; 4040 mandoc_msg(MANDOCERR_NAMESC, ln, pos, 4041 "%.*s", (int)(cp - name + 1), name); 4042 mandoc_escape((const char **)&cp, NULL, NULL); 4043 break; 4044 } 4045 4046 /* Read past spaces. */ 4047 4048 while (*cp == ' ') 4049 cp++; 4050 4051 *cpp = cp; 4052 return namesz; 4053 } 4054 4055 /* 4056 * Store *string into the user-defined string called *name. 4057 * To clear an existing entry, call with (*r, *name, NULL, 0). 4058 * append == 0: replace mode 4059 * append == 1: single-line append mode 4060 * append == 2: multiline append mode, append '\n' after each call 4061 */ 4062 static void 4063 roff_setstr(struct roff *r, const char *name, const char *string, 4064 int append) 4065 { 4066 size_t namesz; 4067 4068 namesz = strlen(name); 4069 roff_setstrn(&r->strtab, name, namesz, string, 4070 string ? strlen(string) : 0, append); 4071 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 4072 } 4073 4074 static void 4075 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 4076 const char *string, size_t stringsz, int append) 4077 { 4078 struct roffkv *n; 4079 char *c; 4080 int i; 4081 size_t oldch, newch; 4082 4083 /* Search for an existing string with the same name. */ 4084 n = *r; 4085 4086 while (n && (namesz != n->key.sz || 4087 strncmp(n->key.p, name, namesz))) 4088 n = n->next; 4089 4090 if (NULL == n) { 4091 /* Create a new string table entry. */ 4092 n = mandoc_malloc(sizeof(struct roffkv)); 4093 n->key.p = mandoc_strndup(name, namesz); 4094 n->key.sz = namesz; 4095 n->val.p = NULL; 4096 n->val.sz = 0; 4097 n->next = *r; 4098 *r = n; 4099 } else if (0 == append) { 4100 free(n->val.p); 4101 n->val.p = NULL; 4102 n->val.sz = 0; 4103 } 4104 4105 if (NULL == string) 4106 return; 4107 4108 /* 4109 * One additional byte for the '\n' in multiline mode, 4110 * and one for the terminating '\0'. 4111 */ 4112 newch = stringsz + (1 < append ? 2u : 1u); 4113 4114 if (NULL == n->val.p) { 4115 n->val.p = mandoc_malloc(newch); 4116 *n->val.p = '\0'; 4117 oldch = 0; 4118 } else { 4119 oldch = n->val.sz; 4120 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 4121 } 4122 4123 /* Skip existing content in the destination buffer. */ 4124 c = n->val.p + (int)oldch; 4125 4126 /* Append new content to the destination buffer. */ 4127 i = 0; 4128 while (i < (int)stringsz) { 4129 /* 4130 * Rudimentary roff copy mode: 4131 * Handle escaped backslashes. 4132 */ 4133 if ('\\' == string[i] && '\\' == string[i + 1]) 4134 i++; 4135 *c++ = string[i++]; 4136 } 4137 4138 /* Append terminating bytes. */ 4139 if (1 < append) 4140 *c++ = '\n'; 4141 4142 *c = '\0'; 4143 n->val.sz = (int)(c - n->val.p); 4144 } 4145 4146 static const char * 4147 roff_getstrn(struct roff *r, const char *name, size_t len, 4148 int *deftype) 4149 { 4150 const struct roffkv *n; 4151 int found, i; 4152 enum roff_tok tok; 4153 4154 found = 0; 4155 for (n = r->strtab; n != NULL; n = n->next) { 4156 if (strncmp(name, n->key.p, len) != 0 || 4157 n->key.p[len] != '\0' || n->val.p == NULL) 4158 continue; 4159 if (*deftype & ROFFDEF_USER) { 4160 *deftype = ROFFDEF_USER; 4161 return n->val.p; 4162 } else { 4163 found = 1; 4164 break; 4165 } 4166 } 4167 for (n = r->rentab; n != NULL; n = n->next) { 4168 if (strncmp(name, n->key.p, len) != 0 || 4169 n->key.p[len] != '\0' || n->val.p == NULL) 4170 continue; 4171 if (*deftype & ROFFDEF_REN) { 4172 *deftype = ROFFDEF_REN; 4173 return n->val.p; 4174 } else { 4175 found = 1; 4176 break; 4177 } 4178 } 4179 for (i = 0; i < PREDEFS_MAX; i++) { 4180 if (strncmp(name, predefs[i].name, len) != 0 || 4181 predefs[i].name[len] != '\0') 4182 continue; 4183 if (*deftype & ROFFDEF_PRE) { 4184 *deftype = ROFFDEF_PRE; 4185 return predefs[i].str; 4186 } else { 4187 found = 1; 4188 break; 4189 } 4190 } 4191 if (r->man->meta.macroset != MACROSET_MAN) { 4192 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { 4193 if (strncmp(name, roff_name[tok], len) != 0 || 4194 roff_name[tok][len] != '\0') 4195 continue; 4196 if (*deftype & ROFFDEF_STD) { 4197 *deftype = ROFFDEF_STD; 4198 return NULL; 4199 } else { 4200 found = 1; 4201 break; 4202 } 4203 } 4204 } 4205 if (r->man->meta.macroset != MACROSET_MDOC) { 4206 for (tok = MAN_TH; tok < MAN_MAX; tok++) { 4207 if (strncmp(name, roff_name[tok], len) != 0 || 4208 roff_name[tok][len] != '\0') 4209 continue; 4210 if (*deftype & ROFFDEF_STD) { 4211 *deftype = ROFFDEF_STD; 4212 return NULL; 4213 } else { 4214 found = 1; 4215 break; 4216 } 4217 } 4218 } 4219 4220 if (found == 0 && *deftype != ROFFDEF_ANY) { 4221 if (*deftype & ROFFDEF_REN) { 4222 /* 4223 * This might still be a request, 4224 * so do not treat it as undefined yet. 4225 */ 4226 *deftype = ROFFDEF_UNDEF; 4227 return NULL; 4228 } 4229 4230 /* Using an undefined string defines it to be empty. */ 4231 4232 roff_setstrn(&r->strtab, name, len, "", 0, 0); 4233 roff_setstrn(&r->rentab, name, len, NULL, 0, 0); 4234 } 4235 4236 *deftype = 0; 4237 return NULL; 4238 } 4239 4240 static void 4241 roff_freestr(struct roffkv *r) 4242 { 4243 struct roffkv *n, *nn; 4244 4245 for (n = r; n; n = nn) { 4246 free(n->key.p); 4247 free(n->val.p); 4248 nn = n->next; 4249 free(n); 4250 } 4251 } 4252 4253 /* --- accessors and utility functions ------------------------------------ */ 4254 4255 /* 4256 * Duplicate an input string, making the appropriate character 4257 * conversations (as stipulated by `tr') along the way. 4258 * Returns a heap-allocated string with all the replacements made. 4259 */ 4260 char * 4261 roff_strdup(const struct roff *r, const char *p) 4262 { 4263 const struct roffkv *cp; 4264 char *res; 4265 const char *pp; 4266 size_t ssz, sz; 4267 enum mandoc_esc esc; 4268 4269 if (NULL == r->xmbtab && NULL == r->xtab) 4270 return mandoc_strdup(p); 4271 else if ('\0' == *p) 4272 return mandoc_strdup(""); 4273 4274 /* 4275 * Step through each character looking for term matches 4276 * (remember that a `tr' can be invoked with an escape, which is 4277 * a glyph but the escape is multi-character). 4278 * We only do this if the character hash has been initialised 4279 * and the string is >0 length. 4280 */ 4281 4282 res = NULL; 4283 ssz = 0; 4284 4285 while ('\0' != *p) { 4286 assert((unsigned int)*p < 128); 4287 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { 4288 sz = r->xtab[(int)*p].sz; 4289 res = mandoc_realloc(res, ssz + sz + 1); 4290 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 4291 ssz += sz; 4292 p++; 4293 continue; 4294 } else if ('\\' != *p) { 4295 res = mandoc_realloc(res, ssz + 2); 4296 res[ssz++] = *p++; 4297 continue; 4298 } 4299 4300 /* Search for term matches. */ 4301 for (cp = r->xmbtab; cp; cp = cp->next) 4302 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 4303 break; 4304 4305 if (NULL != cp) { 4306 /* 4307 * A match has been found. 4308 * Append the match to the array and move 4309 * forward by its keysize. 4310 */ 4311 res = mandoc_realloc(res, 4312 ssz + cp->val.sz + 1); 4313 memcpy(res + ssz, cp->val.p, cp->val.sz); 4314 ssz += cp->val.sz; 4315 p += (int)cp->key.sz; 4316 continue; 4317 } 4318 4319 /* 4320 * Handle escapes carefully: we need to copy 4321 * over just the escape itself, or else we might 4322 * do replacements within the escape itself. 4323 * Make sure to pass along the bogus string. 4324 */ 4325 pp = p++; 4326 esc = mandoc_escape(&p, NULL, NULL); 4327 if (ESCAPE_ERROR == esc) { 4328 sz = strlen(pp); 4329 res = mandoc_realloc(res, ssz + sz + 1); 4330 memcpy(res + ssz, pp, sz); 4331 break; 4332 } 4333 /* 4334 * We bail out on bad escapes. 4335 * No need to warn: we already did so when 4336 * roff_expand() was called. 4337 */ 4338 sz = (int)(p - pp); 4339 res = mandoc_realloc(res, ssz + sz + 1); 4340 memcpy(res + ssz, pp, sz); 4341 ssz += sz; 4342 } 4343 4344 res[(int)ssz] = '\0'; 4345 return res; 4346 } 4347 4348 int 4349 roff_getformat(const struct roff *r) 4350 { 4351 4352 return r->format; 4353 } 4354 4355 /* 4356 * Find out whether a line is a macro line or not. 4357 * If it is, adjust the current position and return one; if it isn't, 4358 * return zero and don't change the current position. 4359 * If the control character has been set with `.cc', then let that grain 4360 * precedence. 4361 * This is slighly contrary to groff, where using the non-breaking 4362 * control character when `cc' has been invoked will cause the 4363 * non-breaking macro contents to be printed verbatim. 4364 */ 4365 int 4366 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 4367 { 4368 int pos; 4369 4370 pos = *ppos; 4371 4372 if (r->control != '\0' && cp[pos] == r->control) 4373 pos++; 4374 else if (r->control != '\0') 4375 return 0; 4376 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 4377 pos += 2; 4378 else if ('.' == cp[pos] || '\'' == cp[pos]) 4379 pos++; 4380 else 4381 return 0; 4382 4383 while (' ' == cp[pos] || '\t' == cp[pos]) 4384 pos++; 4385 4386 *ppos = pos; 4387 return 1; 4388 } 4389