1 /* $OpenBSD: roff.c,v 1.246 2020/04/08 11:54:14 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Implementation of the roff(7) parser for mandoc(1). 19 */ 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <limits.h> 25 #include <stddef.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 31 #include "mandoc_aux.h" 32 #include "mandoc_ohash.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mandoc_parse.h" 36 #include "libmandoc.h" 37 #include "roff_int.h" 38 #include "tbl_parse.h" 39 #include "eqn_parse.h" 40 41 /* 42 * ASCII_ESC is used to signal from roff_getarg() to roff_expand() 43 * that an escape sequence resulted from copy-in processing and 44 * needs to be checked or interpolated. As it is used nowhere 45 * else, it is defined here rather than in a header file. 46 */ 47 #define ASCII_ESC 27 48 49 /* Maximum number of string expansions per line, to break infinite loops. */ 50 #define EXPAND_LIMIT 1000 51 52 /* Types of definitions of macros and strings. */ 53 #define ROFFDEF_USER (1 << 1) /* User-defined. */ 54 #define ROFFDEF_PRE (1 << 2) /* Predefined. */ 55 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ 56 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ 57 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ 58 ROFFDEF_REN | ROFFDEF_STD) 59 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ 60 61 /* --- data types --------------------------------------------------------- */ 62 63 /* 64 * An incredibly-simple string buffer. 65 */ 66 struct roffstr { 67 char *p; /* nil-terminated buffer */ 68 size_t sz; /* saved strlen(p) */ 69 }; 70 71 /* 72 * A key-value roffstr pair as part of a singly-linked list. 73 */ 74 struct roffkv { 75 struct roffstr key; 76 struct roffstr val; 77 struct roffkv *next; /* next in list */ 78 }; 79 80 /* 81 * A single number register as part of a singly-linked list. 82 */ 83 struct roffreg { 84 struct roffstr key; 85 int val; 86 int step; 87 struct roffreg *next; 88 }; 89 90 /* 91 * Association of request and macro names with token IDs. 92 */ 93 struct roffreq { 94 enum roff_tok tok; 95 char name[]; 96 }; 97 98 /* 99 * A macro processing context. 100 * More than one is needed when macro calls are nested. 101 */ 102 struct mctx { 103 char **argv; 104 int argc; 105 int argsz; 106 }; 107 108 struct roff { 109 struct roff_man *man; /* mdoc or man parser */ 110 struct roffnode *last; /* leaf of stack */ 111 struct mctx *mstack; /* stack of macro contexts */ 112 int *rstack; /* stack of inverted `ie' values */ 113 struct ohash *reqtab; /* request lookup table */ 114 struct roffreg *regtab; /* number registers */ 115 struct roffkv *strtab; /* user-defined strings & macros */ 116 struct roffkv *rentab; /* renamed strings & macros */ 117 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 118 struct roffstr *xtab; /* single-byte trans table (`tr') */ 119 const char *current_string; /* value of last called user macro */ 120 struct tbl_node *first_tbl; /* first table parsed */ 121 struct tbl_node *last_tbl; /* last table parsed */ 122 struct tbl_node *tbl; /* current table being parsed */ 123 struct eqn_node *last_eqn; /* equation parser */ 124 struct eqn_node *eqn; /* active equation parser */ 125 int eqn_inline; /* current equation is inline */ 126 int options; /* parse options */ 127 int mstacksz; /* current size of mstack */ 128 int mstackpos; /* position in mstack */ 129 int rstacksz; /* current size limit of rstack */ 130 int rstackpos; /* position in rstack */ 131 int format; /* current file in mdoc or man format */ 132 char control; /* control character */ 133 char escape; /* escape character */ 134 }; 135 136 /* 137 * A macro definition, condition, or ignored block. 138 */ 139 struct roffnode { 140 enum roff_tok tok; /* type of node */ 141 struct roffnode *parent; /* up one in stack */ 142 int line; /* parse line */ 143 int col; /* parse col */ 144 char *name; /* node name, e.g. macro name */ 145 char *end; /* custom end macro of the block */ 146 int endspan; /* scope to: 1=eol 2=next line -1=\} */ 147 int rule; /* content is: 1=evaluated 0=skipped */ 148 }; 149 150 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 151 enum roff_tok tok, /* tok of macro */ \ 152 struct buf *buf, /* input buffer */ \ 153 int ln, /* parse line */ \ 154 int ppos, /* original pos in buffer */ \ 155 int pos, /* current pos in buffer */ \ 156 int *offs /* reset offset of buffer data */ 157 158 typedef int (*roffproc)(ROFF_ARGS); 159 160 struct roffmac { 161 roffproc proc; /* process new macro */ 162 roffproc text; /* process as child text of macro */ 163 roffproc sub; /* process as child of macro */ 164 int flags; 165 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 166 }; 167 168 struct predef { 169 const char *name; /* predefined input name */ 170 const char *str; /* replacement symbol */ 171 }; 172 173 #define PREDEF(__name, __str) \ 174 { (__name), (__str) }, 175 176 /* --- function prototypes ------------------------------------------------ */ 177 178 static int roffnode_cleanscope(struct roff *); 179 static int roffnode_pop(struct roff *); 180 static void roffnode_push(struct roff *, enum roff_tok, 181 const char *, int, int); 182 static void roff_addtbl(struct roff_man *, int, struct tbl_node *); 183 static int roff_als(ROFF_ARGS); 184 static int roff_block(ROFF_ARGS); 185 static int roff_block_text(ROFF_ARGS); 186 static int roff_block_sub(ROFF_ARGS); 187 static int roff_break(ROFF_ARGS); 188 static int roff_cblock(ROFF_ARGS); 189 static int roff_cc(ROFF_ARGS); 190 static int roff_ccond(struct roff *, int, int); 191 static int roff_char(ROFF_ARGS); 192 static int roff_cond(ROFF_ARGS); 193 static int roff_cond_text(ROFF_ARGS); 194 static int roff_cond_sub(ROFF_ARGS); 195 static int roff_ds(ROFF_ARGS); 196 static int roff_ec(ROFF_ARGS); 197 static int roff_eo(ROFF_ARGS); 198 static int roff_eqndelim(struct roff *, struct buf *, int); 199 static int roff_evalcond(struct roff *, int, char *, int *); 200 static int roff_evalnum(struct roff *, int, 201 const char *, int *, int *, int); 202 static int roff_evalpar(struct roff *, int, 203 const char *, int *, int *, int); 204 static int roff_evalstrcond(const char *, int *); 205 static int roff_expand(struct roff *, struct buf *, 206 int, int, char); 207 static void roff_free1(struct roff *); 208 static void roff_freereg(struct roffreg *); 209 static void roff_freestr(struct roffkv *); 210 static size_t roff_getname(struct roff *, char **, int, int); 211 static int roff_getnum(const char *, int *, int *, int); 212 static int roff_getop(const char *, int *, char *); 213 static int roff_getregn(struct roff *, 214 const char *, size_t, char); 215 static int roff_getregro(const struct roff *, 216 const char *name); 217 static const char *roff_getstrn(struct roff *, 218 const char *, size_t, int *); 219 static int roff_hasregn(const struct roff *, 220 const char *, size_t); 221 static int roff_insec(ROFF_ARGS); 222 static int roff_it(ROFF_ARGS); 223 static int roff_line_ignore(ROFF_ARGS); 224 static void roff_man_alloc1(struct roff_man *); 225 static void roff_man_free1(struct roff_man *); 226 static int roff_manyarg(ROFF_ARGS); 227 static int roff_noarg(ROFF_ARGS); 228 static int roff_nop(ROFF_ARGS); 229 static int roff_nr(ROFF_ARGS); 230 static int roff_onearg(ROFF_ARGS); 231 static enum roff_tok roff_parse(struct roff *, char *, int *, 232 int, int); 233 static int roff_parsetext(struct roff *, struct buf *, 234 int, int *); 235 static int roff_renamed(ROFF_ARGS); 236 static int roff_return(ROFF_ARGS); 237 static int roff_rm(ROFF_ARGS); 238 static int roff_rn(ROFF_ARGS); 239 static int roff_rr(ROFF_ARGS); 240 static void roff_setregn(struct roff *, const char *, 241 size_t, int, char, int); 242 static void roff_setstr(struct roff *, 243 const char *, const char *, int); 244 static void roff_setstrn(struct roffkv **, const char *, 245 size_t, const char *, size_t, int); 246 static int roff_shift(ROFF_ARGS); 247 static int roff_so(ROFF_ARGS); 248 static int roff_tr(ROFF_ARGS); 249 static int roff_Dd(ROFF_ARGS); 250 static int roff_TE(ROFF_ARGS); 251 static int roff_TS(ROFF_ARGS); 252 static int roff_EQ(ROFF_ARGS); 253 static int roff_EN(ROFF_ARGS); 254 static int roff_T_(ROFF_ARGS); 255 static int roff_unsupp(ROFF_ARGS); 256 static int roff_userdef(ROFF_ARGS); 257 258 /* --- constant data ------------------------------------------------------ */ 259 260 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 261 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 262 263 const char *__roff_name[MAN_MAX + 1] = { 264 "br", "ce", "fi", "ft", 265 "ll", "mc", "nf", 266 "po", "rj", "sp", 267 "ta", "ti", NULL, 268 "ab", "ad", "af", "aln", 269 "als", "am", "am1", "ami", 270 "ami1", "as", "as1", "asciify", 271 "backtrace", "bd", "bleedat", "blm", 272 "box", "boxa", "bp", "BP", 273 "break", "breakchar", "brnl", "brp", 274 "brpnl", "c2", "cc", 275 "cf", "cflags", "ch", "char", 276 "chop", "class", "close", "CL", 277 "color", "composite", "continue", "cp", 278 "cropat", "cs", "cu", "da", 279 "dch", "Dd", "de", "de1", 280 "defcolor", "dei", "dei1", "device", 281 "devicem", "di", "do", "ds", 282 "ds1", "dwh", "dt", "ec", 283 "ecr", "ecs", "el", "em", 284 "EN", "eo", "EP", "EQ", 285 "errprint", "ev", "evc", "ex", 286 "fallback", "fam", "fc", "fchar", 287 "fcolor", "fdeferlig", "feature", "fkern", 288 "fl", "flig", "fp", "fps", 289 "fschar", "fspacewidth", "fspecial", "ftr", 290 "fzoom", "gcolor", "hc", "hcode", 291 "hidechar", "hla", "hlm", "hpf", 292 "hpfa", "hpfcode", "hw", "hy", 293 "hylang", "hylen", "hym", "hypp", 294 "hys", "ie", "if", "ig", 295 "index", "it", "itc", "IX", 296 "kern", "kernafter", "kernbefore", "kernpair", 297 "lc", "lc_ctype", "lds", "length", 298 "letadj", "lf", "lg", "lhang", 299 "linetabs", "lnr", "lnrf", "lpfx", 300 "ls", "lsm", "lt", 301 "mediasize", "minss", "mk", "mso", 302 "na", "ne", "nh", "nhychar", 303 "nm", "nn", "nop", "nr", 304 "nrf", "nroff", "ns", "nx", 305 "open", "opena", "os", "output", 306 "padj", "papersize", "pc", "pev", 307 "pi", "PI", "pl", "pm", 308 "pn", "pnr", "ps", 309 "psbb", "pshape", "pso", "ptr", 310 "pvs", "rchar", "rd", "recursionlimit", 311 "return", "rfschar", "rhang", 312 "rm", "rn", "rnn", "rr", 313 "rs", "rt", "schar", "sentchar", 314 "shc", "shift", "sizes", "so", 315 "spacewidth", "special", "spreadwarn", "ss", 316 "sty", "substring", "sv", "sy", 317 "T&", "tc", "TE", 318 "TH", "tkf", "tl", 319 "tm", "tm1", "tmc", "tr", 320 "track", "transchar", "trf", "trimat", 321 "trin", "trnt", "troff", "TS", 322 "uf", "ul", "unformat", "unwatch", 323 "unwatchn", "vpt", "vs", "warn", 324 "warnscale", "watch", "watchlength", "watchn", 325 "wh", "while", "write", "writec", 326 "writem", "xflag", ".", NULL, 327 NULL, "text", 328 "Dd", "Dt", "Os", "Sh", 329 "Ss", "Pp", "D1", "Dl", 330 "Bd", "Ed", "Bl", "El", 331 "It", "Ad", "An", "Ap", 332 "Ar", "Cd", "Cm", "Dv", 333 "Er", "Ev", "Ex", "Fa", 334 "Fd", "Fl", "Fn", "Ft", 335 "Ic", "In", "Li", "Nd", 336 "Nm", "Op", "Ot", "Pa", 337 "Rv", "St", "Va", "Vt", 338 "Xr", "%A", "%B", "%D", 339 "%I", "%J", "%N", "%O", 340 "%P", "%R", "%T", "%V", 341 "Ac", "Ao", "Aq", "At", 342 "Bc", "Bf", "Bo", "Bq", 343 "Bsx", "Bx", "Db", "Dc", 344 "Do", "Dq", "Ec", "Ef", 345 "Em", "Eo", "Fx", "Ms", 346 "No", "Ns", "Nx", "Ox", 347 "Pc", "Pf", "Po", "Pq", 348 "Qc", "Ql", "Qo", "Qq", 349 "Re", "Rs", "Sc", "So", 350 "Sq", "Sm", "Sx", "Sy", 351 "Tn", "Ux", "Xc", "Xo", 352 "Fo", "Fc", "Oo", "Oc", 353 "Bk", "Ek", "Bt", "Hf", 354 "Fr", "Ud", "Lb", "Lp", 355 "Lk", "Mt", "Brq", "Bro", 356 "Brc", "%C", "Es", "En", 357 "Dx", "%Q", "%U", "Ta", 358 "Tg", NULL, 359 "TH", "SH", "SS", "TP", 360 "TQ", 361 "LP", "PP", "P", "IP", 362 "HP", "SM", "SB", "BI", 363 "IB", "BR", "RB", "R", 364 "B", "I", "IR", "RI", 365 "RE", "RS", "DT", "UC", 366 "PD", "AT", "in", 367 "SY", "YS", "OP", 368 "EX", "EE", "UR", 369 "UE", "MT", "ME", NULL 370 }; 371 const char *const *roff_name = __roff_name; 372 373 static struct roffmac roffs[TOKEN_NONE] = { 374 { roff_noarg, NULL, NULL, 0 }, /* br */ 375 { roff_onearg, NULL, NULL, 0 }, /* ce */ 376 { roff_noarg, NULL, NULL, 0 }, /* fi */ 377 { roff_onearg, NULL, NULL, 0 }, /* ft */ 378 { roff_onearg, NULL, NULL, 0 }, /* ll */ 379 { roff_onearg, NULL, NULL, 0 }, /* mc */ 380 { roff_noarg, NULL, NULL, 0 }, /* nf */ 381 { roff_onearg, NULL, NULL, 0 }, /* po */ 382 { roff_onearg, NULL, NULL, 0 }, /* rj */ 383 { roff_onearg, NULL, NULL, 0 }, /* sp */ 384 { roff_manyarg, NULL, NULL, 0 }, /* ta */ 385 { roff_onearg, NULL, NULL, 0 }, /* ti */ 386 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ 387 { roff_unsupp, NULL, NULL, 0 }, /* ab */ 388 { roff_line_ignore, NULL, NULL, 0 }, /* ad */ 389 { roff_line_ignore, NULL, NULL, 0 }, /* af */ 390 { roff_unsupp, NULL, NULL, 0 }, /* aln */ 391 { roff_als, NULL, NULL, 0 }, /* als */ 392 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ 393 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ 394 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ 395 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ 396 { roff_ds, NULL, NULL, 0 }, /* as */ 397 { roff_ds, NULL, NULL, 0 }, /* as1 */ 398 { roff_unsupp, NULL, NULL, 0 }, /* asciify */ 399 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ 400 { roff_line_ignore, NULL, NULL, 0 }, /* bd */ 401 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ 402 { roff_unsupp, NULL, NULL, 0 }, /* blm */ 403 { roff_unsupp, NULL, NULL, 0 }, /* box */ 404 { roff_unsupp, NULL, NULL, 0 }, /* boxa */ 405 { roff_line_ignore, NULL, NULL, 0 }, /* bp */ 406 { roff_unsupp, NULL, NULL, 0 }, /* BP */ 407 { roff_break, NULL, NULL, 0 }, /* break */ 408 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ 409 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ 410 { roff_noarg, NULL, NULL, 0 }, /* brp */ 411 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ 412 { roff_unsupp, NULL, NULL, 0 }, /* c2 */ 413 { roff_cc, NULL, NULL, 0 }, /* cc */ 414 { roff_insec, NULL, NULL, 0 }, /* cf */ 415 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ 416 { roff_line_ignore, NULL, NULL, 0 }, /* ch */ 417 { roff_char, NULL, NULL, 0 }, /* char */ 418 { roff_unsupp, NULL, NULL, 0 }, /* chop */ 419 { roff_line_ignore, NULL, NULL, 0 }, /* class */ 420 { roff_insec, NULL, NULL, 0 }, /* close */ 421 { roff_unsupp, NULL, NULL, 0 }, /* CL */ 422 { roff_line_ignore, NULL, NULL, 0 }, /* color */ 423 { roff_unsupp, NULL, NULL, 0 }, /* composite */ 424 { roff_unsupp, NULL, NULL, 0 }, /* continue */ 425 { roff_line_ignore, NULL, NULL, 0 }, /* cp */ 426 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ 427 { roff_line_ignore, NULL, NULL, 0 }, /* cs */ 428 { roff_line_ignore, NULL, NULL, 0 }, /* cu */ 429 { roff_unsupp, NULL, NULL, 0 }, /* da */ 430 { roff_unsupp, NULL, NULL, 0 }, /* dch */ 431 { roff_Dd, NULL, NULL, 0 }, /* Dd */ 432 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ 433 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ 434 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ 435 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ 436 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ 437 { roff_unsupp, NULL, NULL, 0 }, /* device */ 438 { roff_unsupp, NULL, NULL, 0 }, /* devicem */ 439 { roff_unsupp, NULL, NULL, 0 }, /* di */ 440 { roff_unsupp, NULL, NULL, 0 }, /* do */ 441 { roff_ds, NULL, NULL, 0 }, /* ds */ 442 { roff_ds, NULL, NULL, 0 }, /* ds1 */ 443 { roff_unsupp, NULL, NULL, 0 }, /* dwh */ 444 { roff_unsupp, NULL, NULL, 0 }, /* dt */ 445 { roff_ec, NULL, NULL, 0 }, /* ec */ 446 { roff_unsupp, NULL, NULL, 0 }, /* ecr */ 447 { roff_unsupp, NULL, NULL, 0 }, /* ecs */ 448 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ 449 { roff_unsupp, NULL, NULL, 0 }, /* em */ 450 { roff_EN, NULL, NULL, 0 }, /* EN */ 451 { roff_eo, NULL, NULL, 0 }, /* eo */ 452 { roff_unsupp, NULL, NULL, 0 }, /* EP */ 453 { roff_EQ, NULL, NULL, 0 }, /* EQ */ 454 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ 455 { roff_unsupp, NULL, NULL, 0 }, /* ev */ 456 { roff_unsupp, NULL, NULL, 0 }, /* evc */ 457 { roff_unsupp, NULL, NULL, 0 }, /* ex */ 458 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ 459 { roff_line_ignore, NULL, NULL, 0 }, /* fam */ 460 { roff_unsupp, NULL, NULL, 0 }, /* fc */ 461 { roff_unsupp, NULL, NULL, 0 }, /* fchar */ 462 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ 463 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ 464 { roff_line_ignore, NULL, NULL, 0 }, /* feature */ 465 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ 466 { roff_line_ignore, NULL, NULL, 0 }, /* fl */ 467 { roff_line_ignore, NULL, NULL, 0 }, /* flig */ 468 { roff_line_ignore, NULL, NULL, 0 }, /* fp */ 469 { roff_line_ignore, NULL, NULL, 0 }, /* fps */ 470 { roff_unsupp, NULL, NULL, 0 }, /* fschar */ 471 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ 472 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ 473 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ 474 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ 475 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ 476 { roff_line_ignore, NULL, NULL, 0 }, /* hc */ 477 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ 478 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ 479 { roff_line_ignore, NULL, NULL, 0 }, /* hla */ 480 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ 481 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ 482 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ 483 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ 484 { roff_line_ignore, NULL, NULL, 0 }, /* hw */ 485 { roff_line_ignore, NULL, NULL, 0 }, /* hy */ 486 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ 487 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ 488 { roff_line_ignore, NULL, NULL, 0 }, /* hym */ 489 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ 490 { roff_line_ignore, NULL, NULL, 0 }, /* hys */ 491 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ 492 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ 493 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ 494 { roff_unsupp, NULL, NULL, 0 }, /* index */ 495 { roff_it, NULL, NULL, 0 }, /* it */ 496 { roff_unsupp, NULL, NULL, 0 }, /* itc */ 497 { roff_line_ignore, NULL, NULL, 0 }, /* IX */ 498 { roff_line_ignore, NULL, NULL, 0 }, /* kern */ 499 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ 500 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ 501 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ 502 { roff_unsupp, NULL, NULL, 0 }, /* lc */ 503 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ 504 { roff_unsupp, NULL, NULL, 0 }, /* lds */ 505 { roff_unsupp, NULL, NULL, 0 }, /* length */ 506 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ 507 { roff_insec, NULL, NULL, 0 }, /* lf */ 508 { roff_line_ignore, NULL, NULL, 0 }, /* lg */ 509 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ 510 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ 511 { roff_unsupp, NULL, NULL, 0 }, /* lnr */ 512 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ 513 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ 514 { roff_line_ignore, NULL, NULL, 0 }, /* ls */ 515 { roff_unsupp, NULL, NULL, 0 }, /* lsm */ 516 { roff_line_ignore, NULL, NULL, 0 }, /* lt */ 517 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ 518 { roff_line_ignore, NULL, NULL, 0 }, /* minss */ 519 { roff_line_ignore, NULL, NULL, 0 }, /* mk */ 520 { roff_insec, NULL, NULL, 0 }, /* mso */ 521 { roff_line_ignore, NULL, NULL, 0 }, /* na */ 522 { roff_line_ignore, NULL, NULL, 0 }, /* ne */ 523 { roff_line_ignore, NULL, NULL, 0 }, /* nh */ 524 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ 525 { roff_unsupp, NULL, NULL, 0 }, /* nm */ 526 { roff_unsupp, NULL, NULL, 0 }, /* nn */ 527 { roff_nop, NULL, NULL, 0 }, /* nop */ 528 { roff_nr, NULL, NULL, 0 }, /* nr */ 529 { roff_unsupp, NULL, NULL, 0 }, /* nrf */ 530 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ 531 { roff_line_ignore, NULL, NULL, 0 }, /* ns */ 532 { roff_insec, NULL, NULL, 0 }, /* nx */ 533 { roff_insec, NULL, NULL, 0 }, /* open */ 534 { roff_insec, NULL, NULL, 0 }, /* opena */ 535 { roff_line_ignore, NULL, NULL, 0 }, /* os */ 536 { roff_unsupp, NULL, NULL, 0 }, /* output */ 537 { roff_line_ignore, NULL, NULL, 0 }, /* padj */ 538 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ 539 { roff_line_ignore, NULL, NULL, 0 }, /* pc */ 540 { roff_line_ignore, NULL, NULL, 0 }, /* pev */ 541 { roff_insec, NULL, NULL, 0 }, /* pi */ 542 { roff_unsupp, NULL, NULL, 0 }, /* PI */ 543 { roff_line_ignore, NULL, NULL, 0 }, /* pl */ 544 { roff_line_ignore, NULL, NULL, 0 }, /* pm */ 545 { roff_line_ignore, NULL, NULL, 0 }, /* pn */ 546 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ 547 { roff_line_ignore, NULL, NULL, 0 }, /* ps */ 548 { roff_unsupp, NULL, NULL, 0 }, /* psbb */ 549 { roff_unsupp, NULL, NULL, 0 }, /* pshape */ 550 { roff_insec, NULL, NULL, 0 }, /* pso */ 551 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ 552 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ 553 { roff_unsupp, NULL, NULL, 0 }, /* rchar */ 554 { roff_line_ignore, NULL, NULL, 0 }, /* rd */ 555 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ 556 { roff_return, NULL, NULL, 0 }, /* return */ 557 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ 558 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ 559 { roff_rm, NULL, NULL, 0 }, /* rm */ 560 { roff_rn, NULL, NULL, 0 }, /* rn */ 561 { roff_unsupp, NULL, NULL, 0 }, /* rnn */ 562 { roff_rr, NULL, NULL, 0 }, /* rr */ 563 { roff_line_ignore, NULL, NULL, 0 }, /* rs */ 564 { roff_line_ignore, NULL, NULL, 0 }, /* rt */ 565 { roff_unsupp, NULL, NULL, 0 }, /* schar */ 566 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ 567 { roff_line_ignore, NULL, NULL, 0 }, /* shc */ 568 { roff_shift, NULL, NULL, 0 }, /* shift */ 569 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ 570 { roff_so, NULL, NULL, 0 }, /* so */ 571 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ 572 { roff_line_ignore, NULL, NULL, 0 }, /* special */ 573 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ 574 { roff_line_ignore, NULL, NULL, 0 }, /* ss */ 575 { roff_line_ignore, NULL, NULL, 0 }, /* sty */ 576 { roff_unsupp, NULL, NULL, 0 }, /* substring */ 577 { roff_line_ignore, NULL, NULL, 0 }, /* sv */ 578 { roff_insec, NULL, NULL, 0 }, /* sy */ 579 { roff_T_, NULL, NULL, 0 }, /* T& */ 580 { roff_unsupp, NULL, NULL, 0 }, /* tc */ 581 { roff_TE, NULL, NULL, 0 }, /* TE */ 582 { roff_Dd, NULL, NULL, 0 }, /* TH */ 583 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ 584 { roff_unsupp, NULL, NULL, 0 }, /* tl */ 585 { roff_line_ignore, NULL, NULL, 0 }, /* tm */ 586 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ 587 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ 588 { roff_tr, NULL, NULL, 0 }, /* tr */ 589 { roff_line_ignore, NULL, NULL, 0 }, /* track */ 590 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ 591 { roff_insec, NULL, NULL, 0 }, /* trf */ 592 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ 593 { roff_unsupp, NULL, NULL, 0 }, /* trin */ 594 { roff_unsupp, NULL, NULL, 0 }, /* trnt */ 595 { roff_line_ignore, NULL, NULL, 0 }, /* troff */ 596 { roff_TS, NULL, NULL, 0 }, /* TS */ 597 { roff_line_ignore, NULL, NULL, 0 }, /* uf */ 598 { roff_line_ignore, NULL, NULL, 0 }, /* ul */ 599 { roff_unsupp, NULL, NULL, 0 }, /* unformat */ 600 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ 601 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ 602 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ 603 { roff_line_ignore, NULL, NULL, 0 }, /* vs */ 604 { roff_line_ignore, NULL, NULL, 0 }, /* warn */ 605 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ 606 { roff_line_ignore, NULL, NULL, 0 }, /* watch */ 607 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ 608 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ 609 { roff_unsupp, NULL, NULL, 0 }, /* wh */ 610 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/ 611 { roff_insec, NULL, NULL, 0 }, /* write */ 612 { roff_insec, NULL, NULL, 0 }, /* writec */ 613 { roff_insec, NULL, NULL, 0 }, /* writem */ 614 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ 615 { roff_cblock, NULL, NULL, 0 }, /* . */ 616 { roff_renamed, NULL, NULL, 0 }, 617 { roff_userdef, NULL, NULL, 0 } 618 }; 619 620 /* Array of injected predefined strings. */ 621 #define PREDEFS_MAX 38 622 static const struct predef predefs[PREDEFS_MAX] = { 623 #include "predefs.in" 624 }; 625 626 static int roffce_lines; /* number of input lines to center */ 627 static struct roff_node *roffce_node; /* active request */ 628 static int roffit_lines; /* number of lines to delay */ 629 static char *roffit_macro; /* nil-terminated macro line */ 630 631 632 /* --- request table ------------------------------------------------------ */ 633 634 struct ohash * 635 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) 636 { 637 struct ohash *htab; 638 struct roffreq *req; 639 enum roff_tok tok; 640 size_t sz; 641 unsigned int slot; 642 643 htab = mandoc_malloc(sizeof(*htab)); 644 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); 645 646 for (tok = mintok; tok < maxtok; tok++) { 647 if (roff_name[tok] == NULL) 648 continue; 649 sz = strlen(roff_name[tok]); 650 req = mandoc_malloc(sizeof(*req) + sz + 1); 651 req->tok = tok; 652 memcpy(req->name, roff_name[tok], sz + 1); 653 slot = ohash_qlookup(htab, req->name); 654 ohash_insert(htab, slot, req); 655 } 656 return htab; 657 } 658 659 void 660 roffhash_free(struct ohash *htab) 661 { 662 struct roffreq *req; 663 unsigned int slot; 664 665 if (htab == NULL) 666 return; 667 for (req = ohash_first(htab, &slot); req != NULL; 668 req = ohash_next(htab, &slot)) 669 free(req); 670 ohash_delete(htab); 671 free(htab); 672 } 673 674 enum roff_tok 675 roffhash_find(struct ohash *htab, const char *name, size_t sz) 676 { 677 struct roffreq *req; 678 const char *end; 679 680 if (sz) { 681 end = name + sz; 682 req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); 683 } else 684 req = ohash_find(htab, ohash_qlookup(htab, name)); 685 return req == NULL ? TOKEN_NONE : req->tok; 686 } 687 688 /* --- stack of request blocks -------------------------------------------- */ 689 690 /* 691 * Pop the current node off of the stack of roff instructions currently 692 * pending. Return 1 if it is a loop or 0 otherwise. 693 */ 694 static int 695 roffnode_pop(struct roff *r) 696 { 697 struct roffnode *p; 698 int inloop; 699 700 p = r->last; 701 inloop = p->tok == ROFF_while; 702 r->last = p->parent; 703 free(p->name); 704 free(p->end); 705 free(p); 706 return inloop; 707 } 708 709 /* 710 * Push a roff node onto the instruction stack. This must later be 711 * removed with roffnode_pop(). 712 */ 713 static void 714 roffnode_push(struct roff *r, enum roff_tok tok, const char *name, 715 int line, int col) 716 { 717 struct roffnode *p; 718 719 p = mandoc_calloc(1, sizeof(struct roffnode)); 720 p->tok = tok; 721 if (name) 722 p->name = mandoc_strdup(name); 723 p->parent = r->last; 724 p->line = line; 725 p->col = col; 726 p->rule = p->parent ? p->parent->rule : 0; 727 728 r->last = p; 729 } 730 731 /* --- roff parser state data management ---------------------------------- */ 732 733 static void 734 roff_free1(struct roff *r) 735 { 736 int i; 737 738 tbl_free(r->first_tbl); 739 r->first_tbl = r->last_tbl = r->tbl = NULL; 740 741 eqn_free(r->last_eqn); 742 r->last_eqn = r->eqn = NULL; 743 744 while (r->mstackpos >= 0) 745 roff_userret(r); 746 747 while (r->last) 748 roffnode_pop(r); 749 750 free (r->rstack); 751 r->rstack = NULL; 752 r->rstacksz = 0; 753 r->rstackpos = -1; 754 755 roff_freereg(r->regtab); 756 r->regtab = NULL; 757 758 roff_freestr(r->strtab); 759 roff_freestr(r->rentab); 760 roff_freestr(r->xmbtab); 761 r->strtab = r->rentab = r->xmbtab = NULL; 762 763 if (r->xtab) 764 for (i = 0; i < 128; i++) 765 free(r->xtab[i].p); 766 free(r->xtab); 767 r->xtab = NULL; 768 } 769 770 void 771 roff_reset(struct roff *r) 772 { 773 roff_free1(r); 774 r->options |= MPARSE_COMMENT; 775 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 776 r->control = '\0'; 777 r->escape = '\\'; 778 roffce_lines = 0; 779 roffce_node = NULL; 780 roffit_lines = 0; 781 roffit_macro = NULL; 782 } 783 784 void 785 roff_free(struct roff *r) 786 { 787 int i; 788 789 roff_free1(r); 790 for (i = 0; i < r->mstacksz; i++) 791 free(r->mstack[i].argv); 792 free(r->mstack); 793 roffhash_free(r->reqtab); 794 free(r); 795 } 796 797 struct roff * 798 roff_alloc(int options) 799 { 800 struct roff *r; 801 802 r = mandoc_calloc(1, sizeof(struct roff)); 803 r->reqtab = roffhash_alloc(0, ROFF_RENAMED); 804 r->options = options | MPARSE_COMMENT; 805 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 806 r->mstackpos = -1; 807 r->rstackpos = -1; 808 r->escape = '\\'; 809 return r; 810 } 811 812 /* --- syntax tree state data management ---------------------------------- */ 813 814 static void 815 roff_man_free1(struct roff_man *man) 816 { 817 if (man->meta.first != NULL) 818 roff_node_delete(man, man->meta.first); 819 free(man->meta.msec); 820 free(man->meta.vol); 821 free(man->meta.os); 822 free(man->meta.arch); 823 free(man->meta.title); 824 free(man->meta.name); 825 free(man->meta.date); 826 free(man->meta.sodest); 827 } 828 829 void 830 roff_state_reset(struct roff_man *man) 831 { 832 man->last = man->meta.first; 833 man->last_es = NULL; 834 man->flags = 0; 835 man->lastsec = man->lastnamed = SEC_NONE; 836 man->next = ROFF_NEXT_CHILD; 837 roff_setreg(man->roff, "nS", 0, '='); 838 } 839 840 static void 841 roff_man_alloc1(struct roff_man *man) 842 { 843 memset(&man->meta, 0, sizeof(man->meta)); 844 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first)); 845 man->meta.first->type = ROFFT_ROOT; 846 man->meta.macroset = MACROSET_NONE; 847 roff_state_reset(man); 848 } 849 850 void 851 roff_man_reset(struct roff_man *man) 852 { 853 roff_man_free1(man); 854 roff_man_alloc1(man); 855 } 856 857 void 858 roff_man_free(struct roff_man *man) 859 { 860 roff_man_free1(man); 861 free(man); 862 } 863 864 struct roff_man * 865 roff_man_alloc(struct roff *roff, const char *os_s, int quick) 866 { 867 struct roff_man *man; 868 869 man = mandoc_calloc(1, sizeof(*man)); 870 man->roff = roff; 871 man->os_s = os_s; 872 man->quick = quick; 873 roff_man_alloc1(man); 874 roff->man = man; 875 return man; 876 } 877 878 /* --- syntax tree handling ----------------------------------------------- */ 879 880 struct roff_node * 881 roff_node_alloc(struct roff_man *man, int line, int pos, 882 enum roff_type type, int tok) 883 { 884 struct roff_node *n; 885 886 n = mandoc_calloc(1, sizeof(*n)); 887 n->line = line; 888 n->pos = pos; 889 n->tok = tok; 890 n->type = type; 891 n->sec = man->lastsec; 892 893 if (man->flags & MDOC_SYNOPSIS) 894 n->flags |= NODE_SYNPRETTY; 895 else 896 n->flags &= ~NODE_SYNPRETTY; 897 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL) 898 n->flags |= NODE_NOFILL; 899 else 900 n->flags &= ~NODE_NOFILL; 901 if (man->flags & MDOC_NEWLINE) 902 n->flags |= NODE_LINE; 903 man->flags &= ~MDOC_NEWLINE; 904 905 return n; 906 } 907 908 void 909 roff_node_append(struct roff_man *man, struct roff_node *n) 910 { 911 912 switch (man->next) { 913 case ROFF_NEXT_SIBLING: 914 if (man->last->next != NULL) { 915 n->next = man->last->next; 916 man->last->next->prev = n; 917 } else 918 man->last->parent->last = n; 919 man->last->next = n; 920 n->prev = man->last; 921 n->parent = man->last->parent; 922 break; 923 case ROFF_NEXT_CHILD: 924 if (man->last->child != NULL) { 925 n->next = man->last->child; 926 man->last->child->prev = n; 927 } else 928 man->last->last = n; 929 man->last->child = n; 930 n->parent = man->last; 931 break; 932 default: 933 abort(); 934 } 935 man->last = n; 936 937 switch (n->type) { 938 case ROFFT_HEAD: 939 n->parent->head = n; 940 break; 941 case ROFFT_BODY: 942 if (n->end != ENDBODY_NOT) 943 return; 944 n->parent->body = n; 945 break; 946 case ROFFT_TAIL: 947 n->parent->tail = n; 948 break; 949 default: 950 return; 951 } 952 953 /* 954 * Copy over the normalised-data pointer of our parent. Not 955 * everybody has one, but copying a null pointer is fine. 956 */ 957 958 n->norm = n->parent->norm; 959 assert(n->parent->type == ROFFT_BLOCK); 960 } 961 962 void 963 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 964 { 965 struct roff_node *n; 966 967 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 968 n->string = roff_strdup(man->roff, word); 969 roff_node_append(man, n); 970 n->flags |= NODE_VALID | NODE_ENDED; 971 man->next = ROFF_NEXT_SIBLING; 972 } 973 974 void 975 roff_word_append(struct roff_man *man, const char *word) 976 { 977 struct roff_node *n; 978 char *addstr, *newstr; 979 980 n = man->last; 981 addstr = roff_strdup(man->roff, word); 982 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 983 free(addstr); 984 free(n->string); 985 n->string = newstr; 986 man->next = ROFF_NEXT_SIBLING; 987 } 988 989 void 990 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 991 { 992 struct roff_node *n; 993 994 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 995 roff_node_append(man, n); 996 man->next = ROFF_NEXT_CHILD; 997 } 998 999 struct roff_node * 1000 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 1001 { 1002 struct roff_node *n; 1003 1004 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 1005 roff_node_append(man, n); 1006 man->next = ROFF_NEXT_CHILD; 1007 return n; 1008 } 1009 1010 struct roff_node * 1011 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 1012 { 1013 struct roff_node *n; 1014 1015 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 1016 roff_node_append(man, n); 1017 man->next = ROFF_NEXT_CHILD; 1018 return n; 1019 } 1020 1021 struct roff_node * 1022 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 1023 { 1024 struct roff_node *n; 1025 1026 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 1027 roff_node_append(man, n); 1028 man->next = ROFF_NEXT_CHILD; 1029 return n; 1030 } 1031 1032 static void 1033 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl) 1034 { 1035 struct roff_node *n; 1036 struct tbl_span *span; 1037 1038 if (man->meta.macroset == MACROSET_MAN) 1039 man_breakscope(man, ROFF_TS); 1040 while ((span = tbl_span(tbl)) != NULL) { 1041 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE); 1042 n->span = span; 1043 roff_node_append(man, n); 1044 n->flags |= NODE_VALID | NODE_ENDED; 1045 man->next = ROFF_NEXT_SIBLING; 1046 } 1047 } 1048 1049 void 1050 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1051 { 1052 1053 /* Adjust siblings. */ 1054 1055 if (n->prev) 1056 n->prev->next = n->next; 1057 if (n->next) 1058 n->next->prev = n->prev; 1059 1060 /* Adjust parent. */ 1061 1062 if (n->parent != NULL) { 1063 if (n->parent->child == n) 1064 n->parent->child = n->next; 1065 if (n->parent->last == n) 1066 n->parent->last = n->prev; 1067 } 1068 1069 /* Adjust parse point. */ 1070 1071 if (man == NULL) 1072 return; 1073 if (man->last == n) { 1074 if (n->prev == NULL) { 1075 man->last = n->parent; 1076 man->next = ROFF_NEXT_CHILD; 1077 } else { 1078 man->last = n->prev; 1079 man->next = ROFF_NEXT_SIBLING; 1080 } 1081 } 1082 if (man->meta.first == n) 1083 man->meta.first = NULL; 1084 } 1085 1086 void 1087 roff_node_relink(struct roff_man *man, struct roff_node *n) 1088 { 1089 roff_node_unlink(man, n); 1090 n->prev = n->next = NULL; 1091 roff_node_append(man, n); 1092 } 1093 1094 void 1095 roff_node_free(struct roff_node *n) 1096 { 1097 1098 if (n->args != NULL) 1099 mdoc_argv_free(n->args); 1100 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1101 free(n->norm); 1102 eqn_box_free(n->eqn); 1103 free(n->string); 1104 free(n->tag); 1105 free(n); 1106 } 1107 1108 void 1109 roff_node_delete(struct roff_man *man, struct roff_node *n) 1110 { 1111 1112 while (n->child != NULL) 1113 roff_node_delete(man, n->child); 1114 roff_node_unlink(man, n); 1115 roff_node_free(n); 1116 } 1117 1118 int 1119 roff_node_transparent(struct roff_node *n) 1120 { 1121 if (n == NULL) 1122 return 0; 1123 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) 1124 return 1; 1125 return roff_tok_transparent(n->tok); 1126 } 1127 1128 int 1129 roff_tok_transparent(enum roff_tok tok) 1130 { 1131 switch (tok) { 1132 case ROFF_ft: 1133 case ROFF_ll: 1134 case ROFF_mc: 1135 case ROFF_po: 1136 case ROFF_ta: 1137 case MDOC_Db: 1138 case MDOC_Es: 1139 case MDOC_Sm: 1140 case MDOC_Tg: 1141 case MAN_DT: 1142 case MAN_UC: 1143 case MAN_PD: 1144 case MAN_AT: 1145 return 1; 1146 default: 1147 return 0; 1148 } 1149 } 1150 1151 struct roff_node * 1152 roff_node_child(struct roff_node *n) 1153 { 1154 for (n = n->child; roff_node_transparent(n); n = n->next) 1155 continue; 1156 return n; 1157 } 1158 1159 struct roff_node * 1160 roff_node_prev(struct roff_node *n) 1161 { 1162 do { 1163 n = n->prev; 1164 } while (roff_node_transparent(n)); 1165 return n; 1166 } 1167 1168 struct roff_node * 1169 roff_node_next(struct roff_node *n) 1170 { 1171 do { 1172 n = n->next; 1173 } while (roff_node_transparent(n)); 1174 return n; 1175 } 1176 1177 void 1178 deroff(char **dest, const struct roff_node *n) 1179 { 1180 char *cp; 1181 size_t sz; 1182 1183 if (n->string == NULL) { 1184 for (n = n->child; n != NULL; n = n->next) 1185 deroff(dest, n); 1186 return; 1187 } 1188 1189 /* Skip leading whitespace. */ 1190 1191 for (cp = n->string; *cp != '\0'; cp++) { 1192 if (cp[0] == '\\' && cp[1] != '\0' && 1193 strchr(" %&0^|~", cp[1]) != NULL) 1194 cp++; 1195 else if ( ! isspace((unsigned char)*cp)) 1196 break; 1197 } 1198 1199 /* Skip trailing backslash. */ 1200 1201 sz = strlen(cp); 1202 if (sz > 0 && cp[sz - 1] == '\\') 1203 sz--; 1204 1205 /* Skip trailing whitespace. */ 1206 1207 for (; sz; sz--) 1208 if ( ! isspace((unsigned char)cp[sz-1])) 1209 break; 1210 1211 /* Skip empty strings. */ 1212 1213 if (sz == 0) 1214 return; 1215 1216 if (*dest == NULL) { 1217 *dest = mandoc_strndup(cp, sz); 1218 return; 1219 } 1220 1221 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1222 free(*dest); 1223 *dest = cp; 1224 } 1225 1226 /* --- main functions of the roff parser ---------------------------------- */ 1227 1228 /* 1229 * In the current line, expand escape sequences that produce parsable 1230 * input text. Also check the syntax of the remaining escape sequences, 1231 * which typically produce output glyphs or change formatter state. 1232 */ 1233 static int 1234 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc) 1235 { 1236 struct mctx *ctx; /* current macro call context */ 1237 char ubuf[24]; /* buffer to print the number */ 1238 struct roff_node *n; /* used for header comments */ 1239 const char *start; /* start of the string to process */ 1240 char *stesc; /* start of an escape sequence ('\\') */ 1241 const char *esct; /* type of esccape sequence */ 1242 char *ep; /* end of comment string */ 1243 const char *stnam; /* start of the name, after "[(*" */ 1244 const char *cp; /* end of the name, e.g. before ']' */ 1245 const char *res; /* the string to be substituted */ 1246 char *nbuf; /* new buffer to copy buf->buf to */ 1247 size_t maxl; /* expected length of the escape name */ 1248 size_t naml; /* actual length of the escape name */ 1249 size_t asz; /* length of the replacement */ 1250 size_t rsz; /* length of the rest of the string */ 1251 int inaml; /* length returned from mandoc_escape() */ 1252 int expand_count; /* to avoid infinite loops */ 1253 int npos; /* position in numeric expression */ 1254 int arg_complete; /* argument not interrupted by eol */ 1255 int quote_args; /* true for \\$@, false for \\$* */ 1256 int done; /* no more input available */ 1257 int deftype; /* type of definition to paste */ 1258 int rcsid; /* kind of RCS id seen */ 1259 enum mandocerr err; /* for escape sequence problems */ 1260 char sign; /* increment number register */ 1261 char term; /* character terminating the escape */ 1262 1263 /* Search forward for comments. */ 1264 1265 done = 0; 1266 start = buf->buf + pos; 1267 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { 1268 if (stesc[0] != newesc || stesc[1] == '\0') 1269 continue; 1270 stesc++; 1271 if (*stesc != '"' && *stesc != '#') 1272 continue; 1273 1274 /* Comment found, look for RCS id. */ 1275 1276 rcsid = 0; 1277 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) { 1278 rcsid = 1 << MANDOC_OS_OPENBSD; 1279 cp += 8; 1280 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) { 1281 rcsid = 1 << MANDOC_OS_NETBSD; 1282 cp += 7; 1283 } 1284 if (cp != NULL && 1285 isalnum((unsigned char)*cp) == 0 && 1286 strchr(cp, '$') != NULL) { 1287 if (r->man->meta.rcsids & rcsid) 1288 mandoc_msg(MANDOCERR_RCS_REP, ln, 1289 (int)(stesc - buf->buf) + 1, 1290 "%s", stesc + 1); 1291 r->man->meta.rcsids |= rcsid; 1292 } 1293 1294 /* Handle trailing whitespace. */ 1295 1296 ep = strchr(stesc--, '\0') - 1; 1297 if (*ep == '\n') { 1298 done = 1; 1299 ep--; 1300 } 1301 if (*ep == ' ' || *ep == '\t') 1302 mandoc_msg(MANDOCERR_SPACE_EOL, 1303 ln, (int)(ep - buf->buf), NULL); 1304 1305 /* 1306 * Save comments preceding the title macro 1307 * in the syntax tree. 1308 */ 1309 1310 if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) { 1311 while (*ep == ' ' || *ep == '\t') 1312 ep--; 1313 ep[1] = '\0'; 1314 n = roff_node_alloc(r->man, 1315 ln, stesc + 1 - buf->buf, 1316 ROFFT_COMMENT, TOKEN_NONE); 1317 n->string = mandoc_strdup(stesc + 2); 1318 roff_node_append(r->man, n); 1319 n->flags |= NODE_VALID | NODE_ENDED; 1320 r->man->next = ROFF_NEXT_SIBLING; 1321 } 1322 1323 /* Line continuation with comment. */ 1324 1325 if (stesc[1] == '#') { 1326 *stesc = '\0'; 1327 return ROFF_IGN | ROFF_APPEND; 1328 } 1329 1330 /* Discard normal comments. */ 1331 1332 while (stesc > start && stesc[-1] == ' ' && 1333 (stesc == start + 1 || stesc[-2] != '\\')) 1334 stesc--; 1335 *stesc = '\0'; 1336 break; 1337 } 1338 if (stesc == start) 1339 return ROFF_CONT; 1340 stesc--; 1341 1342 /* Notice the end of the input. */ 1343 1344 if (*stesc == '\n') { 1345 *stesc-- = '\0'; 1346 done = 1; 1347 } 1348 1349 expand_count = 0; 1350 while (stesc >= start) { 1351 if (*stesc != newesc) { 1352 1353 /* 1354 * If we have a non-standard escape character, 1355 * escape literal backslashes because all 1356 * processing in subsequent functions uses 1357 * the standard escaping rules. 1358 */ 1359 1360 if (newesc != ASCII_ESC && *stesc == '\\') { 1361 *stesc = '\0'; 1362 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", 1363 buf->buf, stesc + 1) + 1; 1364 start = nbuf + pos; 1365 stesc = nbuf + (stesc - buf->buf); 1366 free(buf->buf); 1367 buf->buf = nbuf; 1368 } 1369 1370 /* Search backwards for the next escape. */ 1371 1372 stesc--; 1373 continue; 1374 } 1375 1376 /* If it is escaped, skip it. */ 1377 1378 for (cp = stesc - 1; cp >= start; cp--) 1379 if (*cp != r->escape) 1380 break; 1381 1382 if ((stesc - cp) % 2 == 0) { 1383 while (stesc > cp) 1384 *stesc-- = '\\'; 1385 continue; 1386 } else if (stesc[1] != '\0') { 1387 *stesc = '\\'; 1388 } else { 1389 *stesc-- = '\0'; 1390 if (done) 1391 continue; 1392 else 1393 return ROFF_IGN | ROFF_APPEND; 1394 } 1395 1396 /* Decide whether to expand or to check only. */ 1397 1398 term = '\0'; 1399 cp = stesc + 1; 1400 if (*cp == 'E') 1401 cp++; 1402 esct = cp; 1403 switch (*esct) { 1404 case '*': 1405 case '$': 1406 res = NULL; 1407 break; 1408 case 'B': 1409 case 'w': 1410 term = cp[1]; 1411 /* FALLTHROUGH */ 1412 case 'n': 1413 sign = cp[1]; 1414 if (sign == '+' || sign == '-') 1415 cp++; 1416 res = ubuf; 1417 break; 1418 default: 1419 err = MANDOCERR_OK; 1420 switch(mandoc_escape(&cp, &stnam, &inaml)) { 1421 case ESCAPE_SPECIAL: 1422 if (mchars_spec2cp(stnam, inaml) >= 0) 1423 break; 1424 /* FALLTHROUGH */ 1425 case ESCAPE_ERROR: 1426 err = MANDOCERR_ESC_BAD; 1427 break; 1428 case ESCAPE_UNDEF: 1429 err = MANDOCERR_ESC_UNDEF; 1430 break; 1431 case ESCAPE_UNSUPP: 1432 err = MANDOCERR_ESC_UNSUPP; 1433 break; 1434 default: 1435 break; 1436 } 1437 if (err != MANDOCERR_OK) 1438 mandoc_msg(err, ln, (int)(stesc - buf->buf), 1439 "%.*s", (int)(cp - stesc), stesc); 1440 stesc--; 1441 continue; 1442 } 1443 1444 if (EXPAND_LIMIT < ++expand_count) { 1445 mandoc_msg(MANDOCERR_ROFFLOOP, 1446 ln, (int)(stesc - buf->buf), NULL); 1447 return ROFF_IGN; 1448 } 1449 1450 /* 1451 * The third character decides the length 1452 * of the name of the string or register. 1453 * Save a pointer to the name. 1454 */ 1455 1456 if (term == '\0') { 1457 switch (*++cp) { 1458 case '\0': 1459 maxl = 0; 1460 break; 1461 case '(': 1462 cp++; 1463 maxl = 2; 1464 break; 1465 case '[': 1466 cp++; 1467 term = ']'; 1468 maxl = 0; 1469 break; 1470 default: 1471 maxl = 1; 1472 break; 1473 } 1474 } else { 1475 cp += 2; 1476 maxl = 0; 1477 } 1478 stnam = cp; 1479 1480 /* Advance to the end of the name. */ 1481 1482 naml = 0; 1483 arg_complete = 1; 1484 while (maxl == 0 || naml < maxl) { 1485 if (*cp == '\0') { 1486 mandoc_msg(MANDOCERR_ESC_BAD, ln, 1487 (int)(stesc - buf->buf), "%s", stesc); 1488 arg_complete = 0; 1489 break; 1490 } 1491 if (maxl == 0 && *cp == term) { 1492 cp++; 1493 break; 1494 } 1495 if (*cp++ != '\\' || *esct != 'w') { 1496 naml++; 1497 continue; 1498 } 1499 switch (mandoc_escape(&cp, NULL, NULL)) { 1500 case ESCAPE_SPECIAL: 1501 case ESCAPE_UNICODE: 1502 case ESCAPE_NUMBERED: 1503 case ESCAPE_UNDEF: 1504 case ESCAPE_OVERSTRIKE: 1505 naml++; 1506 break; 1507 default: 1508 break; 1509 } 1510 } 1511 1512 /* 1513 * Retrieve the replacement string; if it is 1514 * undefined, resume searching for escapes. 1515 */ 1516 1517 switch (*esct) { 1518 case '*': 1519 if (arg_complete) { 1520 deftype = ROFFDEF_USER | ROFFDEF_PRE; 1521 res = roff_getstrn(r, stnam, naml, &deftype); 1522 1523 /* 1524 * If not overriden, let \*(.T 1525 * through to the formatters. 1526 */ 1527 1528 if (res == NULL && naml == 2 && 1529 stnam[0] == '.' && stnam[1] == 'T') { 1530 roff_setstrn(&r->strtab, 1531 ".T", 2, NULL, 0, 0); 1532 stesc--; 1533 continue; 1534 } 1535 } 1536 break; 1537 case '$': 1538 if (r->mstackpos < 0) { 1539 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, 1540 (int)(stesc - buf->buf), "%.3s", stesc); 1541 break; 1542 } 1543 ctx = r->mstack + r->mstackpos; 1544 npos = esct[1] - '1'; 1545 if (npos >= 0 && npos <= 8) { 1546 res = npos < ctx->argc ? 1547 ctx->argv[npos] : ""; 1548 break; 1549 } 1550 if (esct[1] == '*') 1551 quote_args = 0; 1552 else if (esct[1] == '@') 1553 quote_args = 1; 1554 else { 1555 mandoc_msg(MANDOCERR_ARG_NONUM, ln, 1556 (int)(stesc - buf->buf), "%.3s", stesc); 1557 break; 1558 } 1559 asz = 0; 1560 for (npos = 0; npos < ctx->argc; npos++) { 1561 if (npos) 1562 asz++; /* blank */ 1563 if (quote_args) 1564 asz += 2; /* quotes */ 1565 asz += strlen(ctx->argv[npos]); 1566 } 1567 if (asz != 3) { 1568 rsz = buf->sz - (stesc - buf->buf) - 3; 1569 if (asz < 3) 1570 memmove(stesc + asz, stesc + 3, rsz); 1571 buf->sz += asz - 3; 1572 nbuf = mandoc_realloc(buf->buf, buf->sz); 1573 start = nbuf + pos; 1574 stesc = nbuf + (stesc - buf->buf); 1575 buf->buf = nbuf; 1576 if (asz > 3) 1577 memmove(stesc + asz, stesc + 3, rsz); 1578 } 1579 for (npos = 0; npos < ctx->argc; npos++) { 1580 if (npos) 1581 *stesc++ = ' '; 1582 if (quote_args) 1583 *stesc++ = '"'; 1584 cp = ctx->argv[npos]; 1585 while (*cp != '\0') 1586 *stesc++ = *cp++; 1587 if (quote_args) 1588 *stesc++ = '"'; 1589 } 1590 continue; 1591 case 'B': 1592 npos = 0; 1593 ubuf[0] = arg_complete && 1594 roff_evalnum(r, ln, stnam, &npos, 1595 NULL, ROFFNUM_SCALE) && 1596 stnam + npos + 1 == cp ? '1' : '0'; 1597 ubuf[1] = '\0'; 1598 break; 1599 case 'n': 1600 if (arg_complete) 1601 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1602 roff_getregn(r, stnam, naml, sign)); 1603 else 1604 ubuf[0] = '\0'; 1605 break; 1606 case 'w': 1607 /* use even incomplete args */ 1608 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1609 24 * (int)naml); 1610 break; 1611 } 1612 1613 if (res == NULL) { 1614 if (*esct == '*') 1615 mandoc_msg(MANDOCERR_STR_UNDEF, 1616 ln, (int)(stesc - buf->buf), 1617 "%.*s", (int)naml, stnam); 1618 res = ""; 1619 } else if (buf->sz + strlen(res) > SHRT_MAX) { 1620 mandoc_msg(MANDOCERR_ROFFLOOP, 1621 ln, (int)(stesc - buf->buf), NULL); 1622 return ROFF_IGN; 1623 } 1624 1625 /* Replace the escape sequence by the string. */ 1626 1627 *stesc = '\0'; 1628 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", 1629 buf->buf, res, cp) + 1; 1630 1631 /* Prepare for the next replacement. */ 1632 1633 start = nbuf + pos; 1634 stesc = nbuf + (stesc - buf->buf) + strlen(res); 1635 free(buf->buf); 1636 buf->buf = nbuf; 1637 } 1638 return ROFF_CONT; 1639 } 1640 1641 /* 1642 * Parse a quoted or unquoted roff-style request or macro argument. 1643 * Return a pointer to the parsed argument, which is either the original 1644 * pointer or advanced by one byte in case the argument is quoted. 1645 * NUL-terminate the argument in place. 1646 * Collapse pairs of quotes inside quoted arguments. 1647 * Advance the argument pointer to the next argument, 1648 * or to the NUL byte terminating the argument line. 1649 */ 1650 char * 1651 roff_getarg(struct roff *r, char **cpp, int ln, int *pos) 1652 { 1653 struct buf buf; 1654 char *cp, *start; 1655 int newesc, pairs, quoted, white; 1656 1657 /* Quoting can only start with a new word. */ 1658 start = *cpp; 1659 quoted = 0; 1660 if ('"' == *start) { 1661 quoted = 1; 1662 start++; 1663 } 1664 1665 newesc = pairs = white = 0; 1666 for (cp = start; '\0' != *cp; cp++) { 1667 1668 /* 1669 * Move the following text left 1670 * after quoted quotes and after "\\" and "\t". 1671 */ 1672 if (pairs) 1673 cp[-pairs] = cp[0]; 1674 1675 if ('\\' == cp[0]) { 1676 /* 1677 * In copy mode, translate double to single 1678 * backslashes and backslash-t to literal tabs. 1679 */ 1680 switch (cp[1]) { 1681 case 'a': 1682 case 't': 1683 cp[-pairs] = '\t'; 1684 pairs++; 1685 cp++; 1686 break; 1687 case '\\': 1688 newesc = 1; 1689 cp[-pairs] = ASCII_ESC; 1690 pairs++; 1691 cp++; 1692 break; 1693 case ' ': 1694 /* Skip escaped blanks. */ 1695 if (0 == quoted) 1696 cp++; 1697 break; 1698 default: 1699 break; 1700 } 1701 } else if (0 == quoted) { 1702 if (' ' == cp[0]) { 1703 /* Unescaped blanks end unquoted args. */ 1704 white = 1; 1705 break; 1706 } 1707 } else if ('"' == cp[0]) { 1708 if ('"' == cp[1]) { 1709 /* Quoted quotes collapse. */ 1710 pairs++; 1711 cp++; 1712 } else { 1713 /* Unquoted quotes end quoted args. */ 1714 quoted = 2; 1715 break; 1716 } 1717 } 1718 } 1719 1720 /* Quoted argument without a closing quote. */ 1721 if (1 == quoted) 1722 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL); 1723 1724 /* NUL-terminate this argument and move to the next one. */ 1725 if (pairs) 1726 cp[-pairs] = '\0'; 1727 if ('\0' != *cp) { 1728 *cp++ = '\0'; 1729 while (' ' == *cp) 1730 cp++; 1731 } 1732 *pos += (int)(cp - start) + (quoted ? 1 : 0); 1733 *cpp = cp; 1734 1735 if ('\0' == *cp && (white || ' ' == cp[-1])) 1736 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); 1737 1738 start = mandoc_strdup(start); 1739 if (newesc == 0) 1740 return start; 1741 1742 buf.buf = start; 1743 buf.sz = strlen(start) + 1; 1744 buf.next = NULL; 1745 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { 1746 free(buf.buf); 1747 buf.buf = mandoc_strdup(""); 1748 } 1749 return buf.buf; 1750 } 1751 1752 1753 /* 1754 * Process text streams. 1755 */ 1756 static int 1757 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) 1758 { 1759 size_t sz; 1760 const char *start; 1761 char *p; 1762 int isz; 1763 enum mandoc_esc esc; 1764 1765 /* Spring the input line trap. */ 1766 1767 if (roffit_lines == 1) { 1768 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1769 free(buf->buf); 1770 buf->buf = p; 1771 buf->sz = isz + 1; 1772 *offs = 0; 1773 free(roffit_macro); 1774 roffit_lines = 0; 1775 return ROFF_REPARSE; 1776 } else if (roffit_lines > 1) 1777 --roffit_lines; 1778 1779 if (roffce_node != NULL && buf->buf[pos] != '\0') { 1780 if (roffce_lines < 1) { 1781 r->man->last = roffce_node; 1782 r->man->next = ROFF_NEXT_SIBLING; 1783 roffce_lines = 0; 1784 roffce_node = NULL; 1785 } else 1786 roffce_lines--; 1787 } 1788 1789 /* Convert all breakable hyphens into ASCII_HYPH. */ 1790 1791 start = p = buf->buf + pos; 1792 1793 while (*p != '\0') { 1794 sz = strcspn(p, "-\\"); 1795 p += sz; 1796 1797 if (*p == '\0') 1798 break; 1799 1800 if (*p == '\\') { 1801 /* Skip over escapes. */ 1802 p++; 1803 esc = mandoc_escape((const char **)&p, NULL, NULL); 1804 if (esc == ESCAPE_ERROR) 1805 break; 1806 while (*p == '-') 1807 p++; 1808 continue; 1809 } else if (p == start) { 1810 p++; 1811 continue; 1812 } 1813 1814 if (isalpha((unsigned char)p[-1]) && 1815 isalpha((unsigned char)p[1])) 1816 *p = ASCII_HYPH; 1817 p++; 1818 } 1819 return ROFF_CONT; 1820 } 1821 1822 int 1823 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) 1824 { 1825 enum roff_tok t; 1826 int e; 1827 int pos; /* parse point */ 1828 int spos; /* saved parse point for messages */ 1829 int ppos; /* original offset in buf->buf */ 1830 int ctl; /* macro line (boolean) */ 1831 1832 ppos = pos = *offs; 1833 1834 /* Handle in-line equation delimiters. */ 1835 1836 if (r->tbl == NULL && 1837 r->last_eqn != NULL && r->last_eqn->delim && 1838 (r->eqn == NULL || r->eqn_inline)) { 1839 e = roff_eqndelim(r, buf, pos); 1840 if (e == ROFF_REPARSE) 1841 return e; 1842 assert(e == ROFF_CONT); 1843 } 1844 1845 /* Expand some escape sequences. */ 1846 1847 e = roff_expand(r, buf, ln, pos, r->escape); 1848 if ((e & ROFF_MASK) == ROFF_IGN) 1849 return e; 1850 assert(e == ROFF_CONT); 1851 1852 ctl = roff_getcontrol(r, buf->buf, &pos); 1853 1854 /* 1855 * First, if a scope is open and we're not a macro, pass the 1856 * text through the macro's filter. 1857 * Equations process all content themselves. 1858 * Tables process almost all content themselves, but we want 1859 * to warn about macros before passing it there. 1860 */ 1861 1862 if (r->last != NULL && ! ctl) { 1863 t = r->last->tok; 1864 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1865 if ((e & ROFF_MASK) == ROFF_IGN) 1866 return e; 1867 e &= ~ROFF_MASK; 1868 } else 1869 e = ROFF_IGN; 1870 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { 1871 eqn_read(r->eqn, buf->buf + ppos); 1872 return e; 1873 } 1874 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { 1875 tbl_read(r->tbl, ln, buf->buf, ppos); 1876 roff_addtbl(r->man, ln, r->tbl); 1877 return e; 1878 } 1879 if ( ! ctl) { 1880 r->options &= ~MPARSE_COMMENT; 1881 return roff_parsetext(r, buf, pos, offs) | e; 1882 } 1883 1884 /* Skip empty request lines. */ 1885 1886 if (buf->buf[pos] == '"') { 1887 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL); 1888 return ROFF_IGN; 1889 } else if (buf->buf[pos] == '\0') 1890 return ROFF_IGN; 1891 1892 /* 1893 * If a scope is open, go to the child handler for that macro, 1894 * as it may want to preprocess before doing anything with it. 1895 * Don't do so if an equation is open. 1896 */ 1897 1898 if (r->last) { 1899 t = r->last->tok; 1900 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1901 } 1902 1903 /* No scope is open. This is a new request or macro. */ 1904 1905 r->options &= ~MPARSE_COMMENT; 1906 spos = pos; 1907 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1908 1909 /* Tables ignore most macros. */ 1910 1911 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS || 1912 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) { 1913 mandoc_msg(MANDOCERR_TBLMACRO, 1914 ln, pos, "%s", buf->buf + spos); 1915 if (t != TOKEN_NONE) 1916 return ROFF_IGN; 1917 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1918 pos++; 1919 while (buf->buf[pos] == ' ') 1920 pos++; 1921 tbl_read(r->tbl, ln, buf->buf, pos); 1922 roff_addtbl(r->man, ln, r->tbl); 1923 return ROFF_IGN; 1924 } 1925 1926 /* For now, let high level macros abort .ce mode. */ 1927 1928 if (ctl && roffce_node != NULL && 1929 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 1930 t == ROFF_TH || t == ROFF_TS)) { 1931 r->man->last = roffce_node; 1932 r->man->next = ROFF_NEXT_SIBLING; 1933 roffce_lines = 0; 1934 roffce_node = NULL; 1935 } 1936 1937 /* 1938 * This is neither a roff request nor a user-defined macro. 1939 * Let the standard macro set parsers handle it. 1940 */ 1941 1942 if (t == TOKEN_NONE) 1943 return ROFF_CONT; 1944 1945 /* Execute a roff request or a user defined macro. */ 1946 1947 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); 1948 } 1949 1950 /* 1951 * Internal interface function to tell the roff parser that execution 1952 * of the current macro ended. This is required because macro 1953 * definitions usually do not end with a .return request. 1954 */ 1955 void 1956 roff_userret(struct roff *r) 1957 { 1958 struct mctx *ctx; 1959 int i; 1960 1961 assert(r->mstackpos >= 0); 1962 ctx = r->mstack + r->mstackpos; 1963 for (i = 0; i < ctx->argc; i++) 1964 free(ctx->argv[i]); 1965 ctx->argc = 0; 1966 r->mstackpos--; 1967 } 1968 1969 void 1970 roff_endparse(struct roff *r) 1971 { 1972 if (r->last != NULL) 1973 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line, 1974 r->last->col, "%s", roff_name[r->last->tok]); 1975 1976 if (r->eqn != NULL) { 1977 mandoc_msg(MANDOCERR_BLK_NOEND, 1978 r->eqn->node->line, r->eqn->node->pos, "EQ"); 1979 eqn_parse(r->eqn); 1980 r->eqn = NULL; 1981 } 1982 1983 if (r->tbl != NULL) { 1984 tbl_end(r->tbl, 1); 1985 r->tbl = NULL; 1986 } 1987 } 1988 1989 /* 1990 * Parse a roff node's type from the input buffer. This must be in the 1991 * form of ".foo xxx" in the usual way. 1992 */ 1993 static enum roff_tok 1994 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 1995 { 1996 char *cp; 1997 const char *mac; 1998 size_t maclen; 1999 int deftype; 2000 enum roff_tok t; 2001 2002 cp = buf + *pos; 2003 2004 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 2005 return TOKEN_NONE; 2006 2007 mac = cp; 2008 maclen = roff_getname(r, &cp, ln, ppos); 2009 2010 deftype = ROFFDEF_USER | ROFFDEF_REN; 2011 r->current_string = roff_getstrn(r, mac, maclen, &deftype); 2012 switch (deftype) { 2013 case ROFFDEF_USER: 2014 t = ROFF_USERDEF; 2015 break; 2016 case ROFFDEF_REN: 2017 t = ROFF_RENAMED; 2018 break; 2019 default: 2020 t = roffhash_find(r->reqtab, mac, maclen); 2021 break; 2022 } 2023 if (t != TOKEN_NONE) 2024 *pos = cp - buf; 2025 else if (deftype == ROFFDEF_UNDEF) { 2026 /* Using an undefined macro defines it to be empty. */ 2027 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); 2028 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); 2029 } 2030 return t; 2031 } 2032 2033 /* --- handling of request blocks ----------------------------------------- */ 2034 2035 static int 2036 roff_cblock(ROFF_ARGS) 2037 { 2038 2039 /* 2040 * A block-close `..' should only be invoked as a child of an 2041 * ignore macro, otherwise raise a warning and just ignore it. 2042 */ 2043 2044 if (r->last == NULL) { 2045 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2046 return ROFF_IGN; 2047 } 2048 2049 switch (r->last->tok) { 2050 case ROFF_am: 2051 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */ 2052 case ROFF_ami: 2053 case ROFF_de: 2054 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 2055 case ROFF_dei: 2056 case ROFF_ig: 2057 break; 2058 default: 2059 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2060 return ROFF_IGN; 2061 } 2062 2063 if (buf->buf[pos] != '\0') 2064 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 2065 ".. %s", buf->buf + pos); 2066 2067 roffnode_pop(r); 2068 roffnode_cleanscope(r); 2069 return ROFF_IGN; 2070 2071 } 2072 2073 /* 2074 * Pop all nodes ending at the end of the current input line. 2075 * Return the number of loops ended. 2076 */ 2077 static int 2078 roffnode_cleanscope(struct roff *r) 2079 { 2080 int inloop; 2081 2082 inloop = 0; 2083 while (r->last != NULL) { 2084 if (--r->last->endspan != 0) 2085 break; 2086 inloop += roffnode_pop(r); 2087 } 2088 return inloop; 2089 } 2090 2091 /* 2092 * Handle the closing \} of a conditional block. 2093 * Apart from generating warnings, this only pops nodes. 2094 * Return the number of loops ended. 2095 */ 2096 static int 2097 roff_ccond(struct roff *r, int ln, int ppos) 2098 { 2099 if (NULL == r->last) { 2100 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2101 return 0; 2102 } 2103 2104 switch (r->last->tok) { 2105 case ROFF_el: 2106 case ROFF_ie: 2107 case ROFF_if: 2108 case ROFF_while: 2109 break; 2110 default: 2111 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2112 return 0; 2113 } 2114 2115 if (r->last->endspan > -1) { 2116 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2117 return 0; 2118 } 2119 2120 return roffnode_pop(r) + roffnode_cleanscope(r); 2121 } 2122 2123 static int 2124 roff_block(ROFF_ARGS) 2125 { 2126 const char *name, *value; 2127 char *call, *cp, *iname, *rname; 2128 size_t csz, namesz, rsz; 2129 int deftype; 2130 2131 /* Ignore groff compatibility mode for now. */ 2132 2133 if (tok == ROFF_de1) 2134 tok = ROFF_de; 2135 else if (tok == ROFF_dei1) 2136 tok = ROFF_dei; 2137 else if (tok == ROFF_am1) 2138 tok = ROFF_am; 2139 else if (tok == ROFF_ami1) 2140 tok = ROFF_ami; 2141 2142 /* Parse the macro name argument. */ 2143 2144 cp = buf->buf + pos; 2145 if (tok == ROFF_ig) { 2146 iname = NULL; 2147 namesz = 0; 2148 } else { 2149 iname = cp; 2150 namesz = roff_getname(r, &cp, ln, ppos); 2151 iname[namesz] = '\0'; 2152 } 2153 2154 /* Resolve the macro name argument if it is indirect. */ 2155 2156 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2157 deftype = ROFFDEF_USER; 2158 name = roff_getstrn(r, iname, namesz, &deftype); 2159 if (name == NULL) { 2160 mandoc_msg(MANDOCERR_STR_UNDEF, 2161 ln, (int)(iname - buf->buf), 2162 "%.*s", (int)namesz, iname); 2163 namesz = 0; 2164 } else 2165 namesz = strlen(name); 2166 } else 2167 name = iname; 2168 2169 if (namesz == 0 && tok != ROFF_ig) { 2170 mandoc_msg(MANDOCERR_REQ_EMPTY, 2171 ln, ppos, "%s", roff_name[tok]); 2172 return ROFF_IGN; 2173 } 2174 2175 roffnode_push(r, tok, name, ln, ppos); 2176 2177 /* 2178 * At the beginning of a `de' macro, clear the existing string 2179 * with the same name, if there is one. New content will be 2180 * appended from roff_block_text() in multiline mode. 2181 */ 2182 2183 if (tok == ROFF_de || tok == ROFF_dei) { 2184 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 2185 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2186 } else if (tok == ROFF_am || tok == ROFF_ami) { 2187 deftype = ROFFDEF_ANY; 2188 value = roff_getstrn(r, iname, namesz, &deftype); 2189 switch (deftype) { /* Before appending, ... */ 2190 case ROFFDEF_PRE: /* copy predefined to user-defined. */ 2191 roff_setstrn(&r->strtab, name, namesz, 2192 value, strlen(value), 0); 2193 break; 2194 case ROFFDEF_REN: /* call original standard macro. */ 2195 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2196 (int)strlen(value), value); 2197 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2198 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2199 free(call); 2200 break; 2201 case ROFFDEF_STD: /* rename and call standard macro. */ 2202 rsz = mandoc_asprintf(&rname, "__%s_renamed", name); 2203 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); 2204 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2205 (int)rsz, rname); 2206 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2207 free(call); 2208 free(rname); 2209 break; 2210 default: 2211 break; 2212 } 2213 } 2214 2215 if (*cp == '\0') 2216 return ROFF_IGN; 2217 2218 /* Get the custom end marker. */ 2219 2220 iname = cp; 2221 namesz = roff_getname(r, &cp, ln, ppos); 2222 2223 /* Resolve the end marker if it is indirect. */ 2224 2225 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2226 deftype = ROFFDEF_USER; 2227 name = roff_getstrn(r, iname, namesz, &deftype); 2228 if (name == NULL) { 2229 mandoc_msg(MANDOCERR_STR_UNDEF, 2230 ln, (int)(iname - buf->buf), 2231 "%.*s", (int)namesz, iname); 2232 namesz = 0; 2233 } else 2234 namesz = strlen(name); 2235 } else 2236 name = iname; 2237 2238 if (namesz) 2239 r->last->end = mandoc_strndup(name, namesz); 2240 2241 if (*cp != '\0') 2242 mandoc_msg(MANDOCERR_ARG_EXCESS, 2243 ln, pos, ".%s ... %s", roff_name[tok], cp); 2244 2245 return ROFF_IGN; 2246 } 2247 2248 static int 2249 roff_block_sub(ROFF_ARGS) 2250 { 2251 enum roff_tok t; 2252 int i, j; 2253 2254 /* 2255 * First check whether a custom macro exists at this level. If 2256 * it does, then check against it. This is some of groff's 2257 * stranger behaviours. If we encountered a custom end-scope 2258 * tag and that tag also happens to be a "real" macro, then we 2259 * need to try interpreting it again as a real macro. If it's 2260 * not, then return ignore. Else continue. 2261 */ 2262 2263 if (r->last->end) { 2264 for (i = pos, j = 0; r->last->end[j]; j++, i++) 2265 if (buf->buf[i] != r->last->end[j]) 2266 break; 2267 2268 if (r->last->end[j] == '\0' && 2269 (buf->buf[i] == '\0' || 2270 buf->buf[i] == ' ' || 2271 buf->buf[i] == '\t')) { 2272 roffnode_pop(r); 2273 roffnode_cleanscope(r); 2274 2275 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 2276 i++; 2277 2278 pos = i; 2279 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 2280 TOKEN_NONE) 2281 return ROFF_RERUN; 2282 return ROFF_IGN; 2283 } 2284 } 2285 2286 /* 2287 * If we have no custom end-query or lookup failed, then try 2288 * pulling it out of the hashtable. 2289 */ 2290 2291 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2292 2293 if (t != ROFF_cblock) { 2294 if (tok != ROFF_ig) 2295 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 2296 return ROFF_IGN; 2297 } 2298 2299 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2300 } 2301 2302 static int 2303 roff_block_text(ROFF_ARGS) 2304 { 2305 2306 if (tok != ROFF_ig) 2307 roff_setstr(r, r->last->name, buf->buf + pos, 2); 2308 2309 return ROFF_IGN; 2310 } 2311 2312 static int 2313 roff_cond_sub(ROFF_ARGS) 2314 { 2315 struct roffnode *bl; 2316 char *ep; 2317 int endloop, irc, rr; 2318 enum roff_tok t; 2319 2320 irc = ROFF_IGN; 2321 rr = r->last->rule; 2322 endloop = tok != ROFF_while ? ROFF_IGN : 2323 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2324 if (roffnode_cleanscope(r)) 2325 irc |= endloop; 2326 2327 /* 2328 * If `\}' occurs on a macro line without a preceding macro, 2329 * drop the line completely. 2330 */ 2331 2332 ep = buf->buf + pos; 2333 if (ep[0] == '\\' && ep[1] == '}') 2334 rr = 0; 2335 2336 /* 2337 * The closing delimiter `\}' rewinds the conditional scope 2338 * but is otherwise ignored when interpreting the line. 2339 */ 2340 2341 while ((ep = strchr(ep, '\\')) != NULL) { 2342 switch (ep[1]) { 2343 case '}': 2344 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2345 if (roff_ccond(r, ln, ep - buf->buf)) 2346 irc |= endloop; 2347 break; 2348 case '\0': 2349 ++ep; 2350 break; 2351 default: 2352 ep += 2; 2353 break; 2354 } 2355 } 2356 2357 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2358 2359 /* For now, let high level macros abort .ce mode. */ 2360 2361 if (roffce_node != NULL && 2362 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 2363 t == ROFF_TH || t == ROFF_TS)) { 2364 r->man->last = roffce_node; 2365 r->man->next = ROFF_NEXT_SIBLING; 2366 roffce_lines = 0; 2367 roffce_node = NULL; 2368 } 2369 2370 /* 2371 * Fully handle known macros when they are structurally 2372 * required or when the conditional evaluated to true. 2373 */ 2374 2375 if (t == ROFF_break) { 2376 if (irc & ROFF_LOOPMASK) 2377 irc = ROFF_IGN | ROFF_LOOPEXIT; 2378 else if (rr) { 2379 for (bl = r->last; bl != NULL; bl = bl->parent) { 2380 bl->rule = 0; 2381 if (bl->tok == ROFF_while) 2382 break; 2383 } 2384 } 2385 } else if (t != TOKEN_NONE && 2386 (rr || roffs[t].flags & ROFFMAC_STRUCT)) 2387 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2388 else 2389 irc |= rr ? ROFF_CONT : ROFF_IGN; 2390 return irc; 2391 } 2392 2393 static int 2394 roff_cond_text(ROFF_ARGS) 2395 { 2396 char *ep; 2397 int endloop, irc, rr; 2398 2399 irc = ROFF_IGN; 2400 rr = r->last->rule; 2401 endloop = tok != ROFF_while ? ROFF_IGN : 2402 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2403 if (roffnode_cleanscope(r)) 2404 irc |= endloop; 2405 2406 /* 2407 * If `\}' occurs on a text line with neither preceding 2408 * nor following characters, drop the line completely. 2409 */ 2410 2411 ep = buf->buf + pos; 2412 if (strcmp(ep, "\\}") == 0) 2413 rr = 0; 2414 2415 /* 2416 * The closing delimiter `\}' rewinds the conditional scope 2417 * but is otherwise ignored when interpreting the line. 2418 */ 2419 2420 while ((ep = strchr(ep, '\\')) != NULL) { 2421 switch (ep[1]) { 2422 case '}': 2423 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2424 if (roff_ccond(r, ln, ep - buf->buf)) 2425 irc |= endloop; 2426 break; 2427 case '\0': 2428 ++ep; 2429 break; 2430 default: 2431 ep += 2; 2432 break; 2433 } 2434 } 2435 if (rr) 2436 irc |= ROFF_CONT; 2437 return irc; 2438 } 2439 2440 /* --- handling of numeric and conditional expressions -------------------- */ 2441 2442 /* 2443 * Parse a single signed integer number. Stop at the first non-digit. 2444 * If there is at least one digit, return success and advance the 2445 * parse point, else return failure and let the parse point unchanged. 2446 * Ignore overflows, treat them just like the C language. 2447 */ 2448 static int 2449 roff_getnum(const char *v, int *pos, int *res, int flags) 2450 { 2451 int myres, scaled, n, p; 2452 2453 if (NULL == res) 2454 res = &myres; 2455 2456 p = *pos; 2457 n = v[p] == '-'; 2458 if (n || v[p] == '+') 2459 p++; 2460 2461 if (flags & ROFFNUM_WHITE) 2462 while (isspace((unsigned char)v[p])) 2463 p++; 2464 2465 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2466 *res = 10 * *res + v[p] - '0'; 2467 if (p == *pos + n) 2468 return 0; 2469 2470 if (n) 2471 *res = -*res; 2472 2473 /* Each number may be followed by one optional scaling unit. */ 2474 2475 switch (v[p]) { 2476 case 'f': 2477 scaled = *res * 65536; 2478 break; 2479 case 'i': 2480 scaled = *res * 240; 2481 break; 2482 case 'c': 2483 scaled = *res * 240 / 2.54; 2484 break; 2485 case 'v': 2486 case 'P': 2487 scaled = *res * 40; 2488 break; 2489 case 'm': 2490 case 'n': 2491 scaled = *res * 24; 2492 break; 2493 case 'p': 2494 scaled = *res * 10 / 3; 2495 break; 2496 case 'u': 2497 scaled = *res; 2498 break; 2499 case 'M': 2500 scaled = *res * 6 / 25; 2501 break; 2502 default: 2503 scaled = *res; 2504 p--; 2505 break; 2506 } 2507 if (flags & ROFFNUM_SCALE) 2508 *res = scaled; 2509 2510 *pos = p + 1; 2511 return 1; 2512 } 2513 2514 /* 2515 * Evaluate a string comparison condition. 2516 * The first character is the delimiter. 2517 * Succeed if the string up to its second occurrence 2518 * matches the string up to its third occurence. 2519 * Advance the cursor after the third occurrence 2520 * or lacking that, to the end of the line. 2521 */ 2522 static int 2523 roff_evalstrcond(const char *v, int *pos) 2524 { 2525 const char *s1, *s2, *s3; 2526 int match; 2527 2528 match = 0; 2529 s1 = v + *pos; /* initial delimiter */ 2530 s2 = s1 + 1; /* for scanning the first string */ 2531 s3 = strchr(s2, *s1); /* for scanning the second string */ 2532 2533 if (NULL == s3) /* found no middle delimiter */ 2534 goto out; 2535 2536 while ('\0' != *++s3) { 2537 if (*s2 != *s3) { /* mismatch */ 2538 s3 = strchr(s3, *s1); 2539 break; 2540 } 2541 if (*s3 == *s1) { /* found the final delimiter */ 2542 match = 1; 2543 break; 2544 } 2545 s2++; 2546 } 2547 2548 out: 2549 if (NULL == s3) 2550 s3 = strchr(s2, '\0'); 2551 else if (*s3 != '\0') 2552 s3++; 2553 *pos = s3 - v; 2554 return match; 2555 } 2556 2557 /* 2558 * Evaluate an optionally negated single character, numerical, 2559 * or string condition. 2560 */ 2561 static int 2562 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2563 { 2564 const char *start, *end; 2565 char *cp, *name; 2566 size_t sz; 2567 int deftype, len, number, savepos, istrue, wanttrue; 2568 2569 if ('!' == v[*pos]) { 2570 wanttrue = 0; 2571 (*pos)++; 2572 } else 2573 wanttrue = 1; 2574 2575 switch (v[*pos]) { 2576 case '\0': 2577 return 0; 2578 case 'n': 2579 case 'o': 2580 (*pos)++; 2581 return wanttrue; 2582 case 'e': 2583 case 't': 2584 case 'v': 2585 (*pos)++; 2586 return !wanttrue; 2587 case 'c': 2588 do { 2589 (*pos)++; 2590 } while (v[*pos] == ' '); 2591 2592 /* 2593 * Quirk for groff compatibility: 2594 * The horizontal tab is neither available nor unavailable. 2595 */ 2596 2597 if (v[*pos] == '\t') { 2598 (*pos)++; 2599 return 0; 2600 } 2601 2602 /* Printable ASCII characters are available. */ 2603 2604 if (v[*pos] != '\\') { 2605 (*pos)++; 2606 return wanttrue; 2607 } 2608 2609 end = v + ++*pos; 2610 switch (mandoc_escape(&end, &start, &len)) { 2611 case ESCAPE_SPECIAL: 2612 istrue = mchars_spec2cp(start, len) != -1; 2613 break; 2614 case ESCAPE_UNICODE: 2615 istrue = 1; 2616 break; 2617 case ESCAPE_NUMBERED: 2618 istrue = mchars_num2char(start, len) != -1; 2619 break; 2620 default: 2621 istrue = !wanttrue; 2622 break; 2623 } 2624 *pos = end - v; 2625 return istrue == wanttrue; 2626 case 'd': 2627 case 'r': 2628 cp = v + *pos + 1; 2629 while (*cp == ' ') 2630 cp++; 2631 name = cp; 2632 sz = roff_getname(r, &cp, ln, cp - v); 2633 if (sz == 0) 2634 istrue = 0; 2635 else if (v[*pos] == 'r') 2636 istrue = roff_hasregn(r, name, sz); 2637 else { 2638 deftype = ROFFDEF_ANY; 2639 roff_getstrn(r, name, sz, &deftype); 2640 istrue = !!deftype; 2641 } 2642 *pos = (name + sz) - v; 2643 return istrue == wanttrue; 2644 default: 2645 break; 2646 } 2647 2648 savepos = *pos; 2649 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2650 return (number > 0) == wanttrue; 2651 else if (*pos == savepos) 2652 return roff_evalstrcond(v, pos) == wanttrue; 2653 else 2654 return 0; 2655 } 2656 2657 static int 2658 roff_line_ignore(ROFF_ARGS) 2659 { 2660 2661 return ROFF_IGN; 2662 } 2663 2664 static int 2665 roff_insec(ROFF_ARGS) 2666 { 2667 2668 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]); 2669 return ROFF_IGN; 2670 } 2671 2672 static int 2673 roff_unsupp(ROFF_ARGS) 2674 { 2675 2676 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]); 2677 return ROFF_IGN; 2678 } 2679 2680 static int 2681 roff_cond(ROFF_ARGS) 2682 { 2683 int irc; 2684 2685 roffnode_push(r, tok, NULL, ln, ppos); 2686 2687 /* 2688 * An `.el' has no conditional body: it will consume the value 2689 * of the current rstack entry set in prior `ie' calls or 2690 * defaults to DENY. 2691 * 2692 * If we're not an `el', however, then evaluate the conditional. 2693 */ 2694 2695 r->last->rule = tok == ROFF_el ? 2696 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2697 roff_evalcond(r, ln, buf->buf, &pos); 2698 2699 /* 2700 * An if-else will put the NEGATION of the current evaluated 2701 * conditional into the stack of rules. 2702 */ 2703 2704 if (tok == ROFF_ie) { 2705 if (r->rstackpos + 1 == r->rstacksz) { 2706 r->rstacksz += 16; 2707 r->rstack = mandoc_reallocarray(r->rstack, 2708 r->rstacksz, sizeof(int)); 2709 } 2710 r->rstack[++r->rstackpos] = !r->last->rule; 2711 } 2712 2713 /* If the parent has false as its rule, then so do we. */ 2714 2715 if (r->last->parent && !r->last->parent->rule) 2716 r->last->rule = 0; 2717 2718 /* 2719 * Determine scope. 2720 * If there is nothing on the line after the conditional, 2721 * not even whitespace, use next-line scope. 2722 * Except that .while does not support next-line scope. 2723 */ 2724 2725 if (buf->buf[pos] == '\0' && tok != ROFF_while) { 2726 r->last->endspan = 2; 2727 goto out; 2728 } 2729 2730 while (buf->buf[pos] == ' ') 2731 pos++; 2732 2733 /* An opening brace requests multiline scope. */ 2734 2735 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2736 r->last->endspan = -1; 2737 pos += 2; 2738 while (buf->buf[pos] == ' ') 2739 pos++; 2740 goto out; 2741 } 2742 2743 /* 2744 * Anything else following the conditional causes 2745 * single-line scope. Warn if the scope contains 2746 * nothing but trailing whitespace. 2747 */ 2748 2749 if (buf->buf[pos] == '\0') 2750 mandoc_msg(MANDOCERR_COND_EMPTY, 2751 ln, ppos, "%s", roff_name[tok]); 2752 2753 r->last->endspan = 1; 2754 2755 out: 2756 *offs = pos; 2757 irc = ROFF_RERUN; 2758 if (tok == ROFF_while) 2759 irc |= ROFF_WHILE; 2760 return irc; 2761 } 2762 2763 static int 2764 roff_ds(ROFF_ARGS) 2765 { 2766 char *string; 2767 const char *name; 2768 size_t namesz; 2769 2770 /* Ignore groff compatibility mode for now. */ 2771 2772 if (tok == ROFF_ds1) 2773 tok = ROFF_ds; 2774 else if (tok == ROFF_as1) 2775 tok = ROFF_as; 2776 2777 /* 2778 * The first word is the name of the string. 2779 * If it is empty or terminated by an escape sequence, 2780 * abort the `ds' request without defining anything. 2781 */ 2782 2783 name = string = buf->buf + pos; 2784 if (*name == '\0') 2785 return ROFF_IGN; 2786 2787 namesz = roff_getname(r, &string, ln, pos); 2788 switch (name[namesz]) { 2789 case '\\': 2790 return ROFF_IGN; 2791 case '\t': 2792 string = buf->buf + pos + namesz; 2793 break; 2794 default: 2795 break; 2796 } 2797 2798 /* Read past the initial double-quote, if any. */ 2799 if (*string == '"') 2800 string++; 2801 2802 /* The rest is the value. */ 2803 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2804 ROFF_as == tok); 2805 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2806 return ROFF_IGN; 2807 } 2808 2809 /* 2810 * Parse a single operator, one or two characters long. 2811 * If the operator is recognized, return success and advance the 2812 * parse point, else return failure and let the parse point unchanged. 2813 */ 2814 static int 2815 roff_getop(const char *v, int *pos, char *res) 2816 { 2817 2818 *res = v[*pos]; 2819 2820 switch (*res) { 2821 case '+': 2822 case '-': 2823 case '*': 2824 case '/': 2825 case '%': 2826 case '&': 2827 case ':': 2828 break; 2829 case '<': 2830 switch (v[*pos + 1]) { 2831 case '=': 2832 *res = 'l'; 2833 (*pos)++; 2834 break; 2835 case '>': 2836 *res = '!'; 2837 (*pos)++; 2838 break; 2839 case '?': 2840 *res = 'i'; 2841 (*pos)++; 2842 break; 2843 default: 2844 break; 2845 } 2846 break; 2847 case '>': 2848 switch (v[*pos + 1]) { 2849 case '=': 2850 *res = 'g'; 2851 (*pos)++; 2852 break; 2853 case '?': 2854 *res = 'a'; 2855 (*pos)++; 2856 break; 2857 default: 2858 break; 2859 } 2860 break; 2861 case '=': 2862 if ('=' == v[*pos + 1]) 2863 (*pos)++; 2864 break; 2865 default: 2866 return 0; 2867 } 2868 (*pos)++; 2869 2870 return *res; 2871 } 2872 2873 /* 2874 * Evaluate either a parenthesized numeric expression 2875 * or a single signed integer number. 2876 */ 2877 static int 2878 roff_evalpar(struct roff *r, int ln, 2879 const char *v, int *pos, int *res, int flags) 2880 { 2881 2882 if ('(' != v[*pos]) 2883 return roff_getnum(v, pos, res, flags); 2884 2885 (*pos)++; 2886 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2887 return 0; 2888 2889 /* 2890 * Omission of the closing parenthesis 2891 * is an error in validation mode, 2892 * but ignored in evaluation mode. 2893 */ 2894 2895 if (')' == v[*pos]) 2896 (*pos)++; 2897 else if (NULL == res) 2898 return 0; 2899 2900 return 1; 2901 } 2902 2903 /* 2904 * Evaluate a complete numeric expression. 2905 * Proceed left to right, there is no concept of precedence. 2906 */ 2907 static int 2908 roff_evalnum(struct roff *r, int ln, const char *v, 2909 int *pos, int *res, int flags) 2910 { 2911 int mypos, operand2; 2912 char operator; 2913 2914 if (NULL == pos) { 2915 mypos = 0; 2916 pos = &mypos; 2917 } 2918 2919 if (flags & ROFFNUM_WHITE) 2920 while (isspace((unsigned char)v[*pos])) 2921 (*pos)++; 2922 2923 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2924 return 0; 2925 2926 while (1) { 2927 if (flags & ROFFNUM_WHITE) 2928 while (isspace((unsigned char)v[*pos])) 2929 (*pos)++; 2930 2931 if ( ! roff_getop(v, pos, &operator)) 2932 break; 2933 2934 if (flags & ROFFNUM_WHITE) 2935 while (isspace((unsigned char)v[*pos])) 2936 (*pos)++; 2937 2938 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2939 return 0; 2940 2941 if (flags & ROFFNUM_WHITE) 2942 while (isspace((unsigned char)v[*pos])) 2943 (*pos)++; 2944 2945 if (NULL == res) 2946 continue; 2947 2948 switch (operator) { 2949 case '+': 2950 *res += operand2; 2951 break; 2952 case '-': 2953 *res -= operand2; 2954 break; 2955 case '*': 2956 *res *= operand2; 2957 break; 2958 case '/': 2959 if (operand2 == 0) { 2960 mandoc_msg(MANDOCERR_DIVZERO, 2961 ln, *pos, "%s", v); 2962 *res = 0; 2963 break; 2964 } 2965 *res /= operand2; 2966 break; 2967 case '%': 2968 if (operand2 == 0) { 2969 mandoc_msg(MANDOCERR_DIVZERO, 2970 ln, *pos, "%s", v); 2971 *res = 0; 2972 break; 2973 } 2974 *res %= operand2; 2975 break; 2976 case '<': 2977 *res = *res < operand2; 2978 break; 2979 case '>': 2980 *res = *res > operand2; 2981 break; 2982 case 'l': 2983 *res = *res <= operand2; 2984 break; 2985 case 'g': 2986 *res = *res >= operand2; 2987 break; 2988 case '=': 2989 *res = *res == operand2; 2990 break; 2991 case '!': 2992 *res = *res != operand2; 2993 break; 2994 case '&': 2995 *res = *res && operand2; 2996 break; 2997 case ':': 2998 *res = *res || operand2; 2999 break; 3000 case 'i': 3001 if (operand2 < *res) 3002 *res = operand2; 3003 break; 3004 case 'a': 3005 if (operand2 > *res) 3006 *res = operand2; 3007 break; 3008 default: 3009 abort(); 3010 } 3011 } 3012 return 1; 3013 } 3014 3015 /* --- register management ------------------------------------------------ */ 3016 3017 void 3018 roff_setreg(struct roff *r, const char *name, int val, char sign) 3019 { 3020 roff_setregn(r, name, strlen(name), val, sign, INT_MIN); 3021 } 3022 3023 static void 3024 roff_setregn(struct roff *r, const char *name, size_t len, 3025 int val, char sign, int step) 3026 { 3027 struct roffreg *reg; 3028 3029 /* Search for an existing register with the same name. */ 3030 reg = r->regtab; 3031 3032 while (reg != NULL && (reg->key.sz != len || 3033 strncmp(reg->key.p, name, len) != 0)) 3034 reg = reg->next; 3035 3036 if (NULL == reg) { 3037 /* Create a new register. */ 3038 reg = mandoc_malloc(sizeof(struct roffreg)); 3039 reg->key.p = mandoc_strndup(name, len); 3040 reg->key.sz = len; 3041 reg->val = 0; 3042 reg->step = 0; 3043 reg->next = r->regtab; 3044 r->regtab = reg; 3045 } 3046 3047 if ('+' == sign) 3048 reg->val += val; 3049 else if ('-' == sign) 3050 reg->val -= val; 3051 else 3052 reg->val = val; 3053 if (step != INT_MIN) 3054 reg->step = step; 3055 } 3056 3057 /* 3058 * Handle some predefined read-only number registers. 3059 * For now, return -1 if the requested register is not predefined; 3060 * in case a predefined read-only register having the value -1 3061 * were to turn up, another special value would have to be chosen. 3062 */ 3063 static int 3064 roff_getregro(const struct roff *r, const char *name) 3065 { 3066 3067 switch (*name) { 3068 case '$': /* Number of arguments of the last macro evaluated. */ 3069 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; 3070 case 'A': /* ASCII approximation mode is always off. */ 3071 return 0; 3072 case 'g': /* Groff compatibility mode is always on. */ 3073 return 1; 3074 case 'H': /* Fixed horizontal resolution. */ 3075 return 24; 3076 case 'j': /* Always adjust left margin only. */ 3077 return 0; 3078 case 'T': /* Some output device is always defined. */ 3079 return 1; 3080 case 'V': /* Fixed vertical resolution. */ 3081 return 40; 3082 default: 3083 return -1; 3084 } 3085 } 3086 3087 int 3088 roff_getreg(struct roff *r, const char *name) 3089 { 3090 return roff_getregn(r, name, strlen(name), '\0'); 3091 } 3092 3093 static int 3094 roff_getregn(struct roff *r, const char *name, size_t len, char sign) 3095 { 3096 struct roffreg *reg; 3097 int val; 3098 3099 if ('.' == name[0] && 2 == len) { 3100 val = roff_getregro(r, name + 1); 3101 if (-1 != val) 3102 return val; 3103 } 3104 3105 for (reg = r->regtab; reg; reg = reg->next) { 3106 if (len == reg->key.sz && 3107 0 == strncmp(name, reg->key.p, len)) { 3108 switch (sign) { 3109 case '+': 3110 reg->val += reg->step; 3111 break; 3112 case '-': 3113 reg->val -= reg->step; 3114 break; 3115 default: 3116 break; 3117 } 3118 return reg->val; 3119 } 3120 } 3121 3122 roff_setregn(r, name, len, 0, '\0', INT_MIN); 3123 return 0; 3124 } 3125 3126 static int 3127 roff_hasregn(const struct roff *r, const char *name, size_t len) 3128 { 3129 struct roffreg *reg; 3130 int val; 3131 3132 if ('.' == name[0] && 2 == len) { 3133 val = roff_getregro(r, name + 1); 3134 if (-1 != val) 3135 return 1; 3136 } 3137 3138 for (reg = r->regtab; reg; reg = reg->next) 3139 if (len == reg->key.sz && 3140 0 == strncmp(name, reg->key.p, len)) 3141 return 1; 3142 3143 return 0; 3144 } 3145 3146 static void 3147 roff_freereg(struct roffreg *reg) 3148 { 3149 struct roffreg *old_reg; 3150 3151 while (NULL != reg) { 3152 free(reg->key.p); 3153 old_reg = reg; 3154 reg = reg->next; 3155 free(old_reg); 3156 } 3157 } 3158 3159 static int 3160 roff_nr(ROFF_ARGS) 3161 { 3162 char *key, *val, *step; 3163 size_t keysz; 3164 int iv, is, len; 3165 char sign; 3166 3167 key = val = buf->buf + pos; 3168 if (*key == '\0') 3169 return ROFF_IGN; 3170 3171 keysz = roff_getname(r, &val, ln, pos); 3172 if (key[keysz] == '\\' || key[keysz] == '\t') 3173 return ROFF_IGN; 3174 3175 sign = *val; 3176 if (sign == '+' || sign == '-') 3177 val++; 3178 3179 len = 0; 3180 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) 3181 return ROFF_IGN; 3182 3183 step = val + len; 3184 while (isspace((unsigned char)*step)) 3185 step++; 3186 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) 3187 is = INT_MIN; 3188 3189 roff_setregn(r, key, keysz, iv, sign, is); 3190 return ROFF_IGN; 3191 } 3192 3193 static int 3194 roff_rr(ROFF_ARGS) 3195 { 3196 struct roffreg *reg, **prev; 3197 char *name, *cp; 3198 size_t namesz; 3199 3200 name = cp = buf->buf + pos; 3201 if (*name == '\0') 3202 return ROFF_IGN; 3203 namesz = roff_getname(r, &cp, ln, pos); 3204 name[namesz] = '\0'; 3205 3206 prev = &r->regtab; 3207 while (1) { 3208 reg = *prev; 3209 if (reg == NULL || !strcmp(name, reg->key.p)) 3210 break; 3211 prev = ®->next; 3212 } 3213 if (reg != NULL) { 3214 *prev = reg->next; 3215 free(reg->key.p); 3216 free(reg); 3217 } 3218 return ROFF_IGN; 3219 } 3220 3221 /* --- handler functions for roff requests -------------------------------- */ 3222 3223 static int 3224 roff_rm(ROFF_ARGS) 3225 { 3226 const char *name; 3227 char *cp; 3228 size_t namesz; 3229 3230 cp = buf->buf + pos; 3231 while (*cp != '\0') { 3232 name = cp; 3233 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 3234 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 3235 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3236 if (name[namesz] == '\\' || name[namesz] == '\t') 3237 break; 3238 } 3239 return ROFF_IGN; 3240 } 3241 3242 static int 3243 roff_it(ROFF_ARGS) 3244 { 3245 int iv; 3246 3247 /* Parse the number of lines. */ 3248 3249 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 3250 mandoc_msg(MANDOCERR_IT_NONUM, 3251 ln, ppos, "%s", buf->buf + 1); 3252 return ROFF_IGN; 3253 } 3254 3255 while (isspace((unsigned char)buf->buf[pos])) 3256 pos++; 3257 3258 /* 3259 * Arm the input line trap. 3260 * Special-casing "an-trap" is an ugly workaround to cope 3261 * with DocBook stupidly fiddling with man(7) internals. 3262 */ 3263 3264 roffit_lines = iv; 3265 roffit_macro = mandoc_strdup(iv != 1 || 3266 strcmp(buf->buf + pos, "an-trap") ? 3267 buf->buf + pos : "br"); 3268 return ROFF_IGN; 3269 } 3270 3271 static int 3272 roff_Dd(ROFF_ARGS) 3273 { 3274 int mask; 3275 enum roff_tok t, te; 3276 3277 switch (tok) { 3278 case ROFF_Dd: 3279 tok = MDOC_Dd; 3280 te = MDOC_MAX; 3281 if (r->format == 0) 3282 r->format = MPARSE_MDOC; 3283 mask = MPARSE_MDOC | MPARSE_QUICK; 3284 break; 3285 case ROFF_TH: 3286 tok = MAN_TH; 3287 te = MAN_MAX; 3288 if (r->format == 0) 3289 r->format = MPARSE_MAN; 3290 mask = MPARSE_QUICK; 3291 break; 3292 default: 3293 abort(); 3294 } 3295 if ((r->options & mask) == 0) 3296 for (t = tok; t < te; t++) 3297 roff_setstr(r, roff_name[t], NULL, 0); 3298 return ROFF_CONT; 3299 } 3300 3301 static int 3302 roff_TE(ROFF_ARGS) 3303 { 3304 r->man->flags &= ~ROFF_NONOFILL; 3305 if (r->tbl == NULL) { 3306 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE"); 3307 return ROFF_IGN; 3308 } 3309 if (tbl_end(r->tbl, 0) == 0) { 3310 r->tbl = NULL; 3311 free(buf->buf); 3312 buf->buf = mandoc_strdup(".sp"); 3313 buf->sz = 4; 3314 *offs = 0; 3315 return ROFF_REPARSE; 3316 } 3317 r->tbl = NULL; 3318 return ROFF_IGN; 3319 } 3320 3321 static int 3322 roff_T_(ROFF_ARGS) 3323 { 3324 3325 if (NULL == r->tbl) 3326 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&"); 3327 else 3328 tbl_restart(ln, ppos, r->tbl); 3329 3330 return ROFF_IGN; 3331 } 3332 3333 /* 3334 * Handle in-line equation delimiters. 3335 */ 3336 static int 3337 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 3338 { 3339 char *cp1, *cp2; 3340 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 3341 3342 /* 3343 * Outside equations, look for an opening delimiter. 3344 * If we are inside an equation, we already know it is 3345 * in-line, or this function wouldn't have been called; 3346 * so look for a closing delimiter. 3347 */ 3348 3349 cp1 = buf->buf + pos; 3350 cp2 = strchr(cp1, r->eqn == NULL ? 3351 r->last_eqn->odelim : r->last_eqn->cdelim); 3352 if (cp2 == NULL) 3353 return ROFF_CONT; 3354 3355 *cp2++ = '\0'; 3356 bef_pr = bef_nl = aft_nl = aft_pr = ""; 3357 3358 /* Handle preceding text, protecting whitespace. */ 3359 3360 if (*buf->buf != '\0') { 3361 if (r->eqn == NULL) 3362 bef_pr = "\\&"; 3363 bef_nl = "\n"; 3364 } 3365 3366 /* 3367 * Prepare replacing the delimiter with an equation macro 3368 * and drop leading white space from the equation. 3369 */ 3370 3371 if (r->eqn == NULL) { 3372 while (*cp2 == ' ') 3373 cp2++; 3374 mac = ".EQ"; 3375 } else 3376 mac = ".EN"; 3377 3378 /* Handle following text, protecting whitespace. */ 3379 3380 if (*cp2 != '\0') { 3381 aft_nl = "\n"; 3382 if (r->eqn != NULL) 3383 aft_pr = "\\&"; 3384 } 3385 3386 /* Do the actual replacement. */ 3387 3388 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 3389 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 3390 free(buf->buf); 3391 buf->buf = cp1; 3392 3393 /* Toggle the in-line state of the eqn subsystem. */ 3394 3395 r->eqn_inline = r->eqn == NULL; 3396 return ROFF_REPARSE; 3397 } 3398 3399 static int 3400 roff_EQ(ROFF_ARGS) 3401 { 3402 struct roff_node *n; 3403 3404 if (r->man->meta.macroset == MACROSET_MAN) 3405 man_breakscope(r->man, ROFF_EQ); 3406 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); 3407 if (ln > r->man->last->line) 3408 n->flags |= NODE_LINE; 3409 n->eqn = eqn_box_new(); 3410 roff_node_append(r->man, n); 3411 r->man->next = ROFF_NEXT_SIBLING; 3412 3413 assert(r->eqn == NULL); 3414 if (r->last_eqn == NULL) 3415 r->last_eqn = eqn_alloc(); 3416 else 3417 eqn_reset(r->last_eqn); 3418 r->eqn = r->last_eqn; 3419 r->eqn->node = n; 3420 3421 if (buf->buf[pos] != '\0') 3422 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3423 ".EQ %s", buf->buf + pos); 3424 3425 return ROFF_IGN; 3426 } 3427 3428 static int 3429 roff_EN(ROFF_ARGS) 3430 { 3431 if (r->eqn != NULL) { 3432 eqn_parse(r->eqn); 3433 r->eqn = NULL; 3434 } else 3435 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN"); 3436 if (buf->buf[pos] != '\0') 3437 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3438 "EN %s", buf->buf + pos); 3439 return ROFF_IGN; 3440 } 3441 3442 static int 3443 roff_TS(ROFF_ARGS) 3444 { 3445 if (r->tbl != NULL) { 3446 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); 3447 tbl_end(r->tbl, 0); 3448 } 3449 r->man->flags |= ROFF_NONOFILL; 3450 r->tbl = tbl_alloc(ppos, ln, r->last_tbl); 3451 if (r->last_tbl == NULL) 3452 r->first_tbl = r->tbl; 3453 r->last_tbl = r->tbl; 3454 return ROFF_IGN; 3455 } 3456 3457 static int 3458 roff_noarg(ROFF_ARGS) 3459 { 3460 if (r->man->flags & (MAN_BLINE | MAN_ELINE)) 3461 man_breakscope(r->man, tok); 3462 if (tok == ROFF_brp) 3463 tok = ROFF_br; 3464 roff_elem_alloc(r->man, ln, ppos, tok); 3465 if (buf->buf[pos] != '\0') 3466 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3467 "%s %s", roff_name[tok], buf->buf + pos); 3468 if (tok == ROFF_nf) 3469 r->man->flags |= ROFF_NOFILL; 3470 else if (tok == ROFF_fi) 3471 r->man->flags &= ~ROFF_NOFILL; 3472 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3473 r->man->next = ROFF_NEXT_SIBLING; 3474 return ROFF_IGN; 3475 } 3476 3477 static int 3478 roff_onearg(ROFF_ARGS) 3479 { 3480 struct roff_node *n; 3481 char *cp; 3482 int npos; 3483 3484 if (r->man->flags & (MAN_BLINE | MAN_ELINE) && 3485 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || 3486 tok == ROFF_ti)) 3487 man_breakscope(r->man, tok); 3488 3489 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { 3490 r->man->last = roffce_node; 3491 r->man->next = ROFF_NEXT_SIBLING; 3492 } 3493 3494 roff_elem_alloc(r->man, ln, ppos, tok); 3495 n = r->man->last; 3496 3497 cp = buf->buf + pos; 3498 if (*cp != '\0') { 3499 while (*cp != '\0' && *cp != ' ') 3500 cp++; 3501 while (*cp == ' ') 3502 *cp++ = '\0'; 3503 if (*cp != '\0') 3504 mandoc_msg(MANDOCERR_ARG_EXCESS, 3505 ln, (int)(cp - buf->buf), 3506 "%s ... %s", roff_name[tok], cp); 3507 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3508 } 3509 3510 if (tok == ROFF_ce || tok == ROFF_rj) { 3511 if (r->man->last->type == ROFFT_ELEM) { 3512 roff_word_alloc(r->man, ln, pos, "1"); 3513 r->man->last->flags |= NODE_NOSRC; 3514 } 3515 npos = 0; 3516 if (roff_evalnum(r, ln, r->man->last->string, &npos, 3517 &roffce_lines, 0) == 0) { 3518 mandoc_msg(MANDOCERR_CE_NONUM, 3519 ln, pos, "ce %s", buf->buf + pos); 3520 roffce_lines = 1; 3521 } 3522 if (roffce_lines < 1) { 3523 r->man->last = r->man->last->parent; 3524 roffce_node = NULL; 3525 roffce_lines = 0; 3526 } else 3527 roffce_node = r->man->last->parent; 3528 } else { 3529 n->flags |= NODE_VALID | NODE_ENDED; 3530 r->man->last = n; 3531 } 3532 n->flags |= NODE_LINE; 3533 r->man->next = ROFF_NEXT_SIBLING; 3534 return ROFF_IGN; 3535 } 3536 3537 static int 3538 roff_manyarg(ROFF_ARGS) 3539 { 3540 struct roff_node *n; 3541 char *sp, *ep; 3542 3543 roff_elem_alloc(r->man, ln, ppos, tok); 3544 n = r->man->last; 3545 3546 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { 3547 while (*ep != '\0' && *ep != ' ') 3548 ep++; 3549 while (*ep == ' ') 3550 *ep++ = '\0'; 3551 roff_word_alloc(r->man, ln, sp - buf->buf, sp); 3552 } 3553 3554 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3555 r->man->last = n; 3556 r->man->next = ROFF_NEXT_SIBLING; 3557 return ROFF_IGN; 3558 } 3559 3560 static int 3561 roff_als(ROFF_ARGS) 3562 { 3563 char *oldn, *newn, *end, *value; 3564 size_t oldsz, newsz, valsz; 3565 3566 newn = oldn = buf->buf + pos; 3567 if (*newn == '\0') 3568 return ROFF_IGN; 3569 3570 newsz = roff_getname(r, &oldn, ln, pos); 3571 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0') 3572 return ROFF_IGN; 3573 3574 end = oldn; 3575 oldsz = roff_getname(r, &end, ln, oldn - buf->buf); 3576 if (oldsz == 0) 3577 return ROFF_IGN; 3578 3579 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", 3580 (int)oldsz, oldn); 3581 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); 3582 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3583 free(value); 3584 return ROFF_IGN; 3585 } 3586 3587 /* 3588 * The .break request only makes sense inside conditionals, 3589 * and that case is already handled in roff_cond_sub(). 3590 */ 3591 static int 3592 roff_break(ROFF_ARGS) 3593 { 3594 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break"); 3595 return ROFF_IGN; 3596 } 3597 3598 static int 3599 roff_cc(ROFF_ARGS) 3600 { 3601 const char *p; 3602 3603 p = buf->buf + pos; 3604 3605 if (*p == '\0' || (r->control = *p++) == '.') 3606 r->control = '\0'; 3607 3608 if (*p != '\0') 3609 mandoc_msg(MANDOCERR_ARG_EXCESS, 3610 ln, p - buf->buf, "cc ... %s", p); 3611 3612 return ROFF_IGN; 3613 } 3614 3615 static int 3616 roff_char(ROFF_ARGS) 3617 { 3618 const char *p, *kp, *vp; 3619 size_t ksz, vsz; 3620 int font; 3621 3622 /* Parse the character to be replaced. */ 3623 3624 kp = buf->buf + pos; 3625 p = kp + 1; 3626 if (*kp == '\0' || (*kp == '\\' && 3627 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) || 3628 (*p != ' ' && *p != '\0')) { 3629 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp); 3630 return ROFF_IGN; 3631 } 3632 ksz = p - kp; 3633 while (*p == ' ') 3634 p++; 3635 3636 /* 3637 * If the replacement string contains a font escape sequence, 3638 * we have to restore the font at the end. 3639 */ 3640 3641 vp = p; 3642 vsz = strlen(p); 3643 font = 0; 3644 while (*p != '\0') { 3645 if (*p++ != '\\') 3646 continue; 3647 switch (mandoc_escape(&p, NULL, NULL)) { 3648 case ESCAPE_FONT: 3649 case ESCAPE_FONTROMAN: 3650 case ESCAPE_FONTITALIC: 3651 case ESCAPE_FONTBOLD: 3652 case ESCAPE_FONTBI: 3653 case ESCAPE_FONTCW: 3654 case ESCAPE_FONTPREV: 3655 font++; 3656 break; 3657 default: 3658 break; 3659 } 3660 } 3661 if (font > 1) 3662 mandoc_msg(MANDOCERR_CHAR_FONT, 3663 ln, (int)(vp - buf->buf), "%s", vp); 3664 3665 /* 3666 * Approximate the effect of .char using the .tr tables. 3667 * XXX In groff, .char and .tr interact differently. 3668 */ 3669 3670 if (ksz == 1) { 3671 if (r->xtab == NULL) 3672 r->xtab = mandoc_calloc(128, sizeof(*r->xtab)); 3673 assert((unsigned int)*kp < 128); 3674 free(r->xtab[(int)*kp].p); 3675 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p, 3676 "%s%s", vp, font ? "\fP" : ""); 3677 } else { 3678 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0); 3679 if (font) 3680 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1); 3681 } 3682 return ROFF_IGN; 3683 } 3684 3685 static int 3686 roff_ec(ROFF_ARGS) 3687 { 3688 const char *p; 3689 3690 p = buf->buf + pos; 3691 if (*p == '\0') 3692 r->escape = '\\'; 3693 else { 3694 r->escape = *p; 3695 if (*++p != '\0') 3696 mandoc_msg(MANDOCERR_ARG_EXCESS, ln, 3697 (int)(p - buf->buf), "ec ... %s", p); 3698 } 3699 return ROFF_IGN; 3700 } 3701 3702 static int 3703 roff_eo(ROFF_ARGS) 3704 { 3705 r->escape = '\0'; 3706 if (buf->buf[pos] != '\0') 3707 mandoc_msg(MANDOCERR_ARG_SKIP, 3708 ln, pos, "eo %s", buf->buf + pos); 3709 return ROFF_IGN; 3710 } 3711 3712 static int 3713 roff_nop(ROFF_ARGS) 3714 { 3715 while (buf->buf[pos] == ' ') 3716 pos++; 3717 *offs = pos; 3718 return ROFF_RERUN; 3719 } 3720 3721 static int 3722 roff_tr(ROFF_ARGS) 3723 { 3724 const char *p, *first, *second; 3725 size_t fsz, ssz; 3726 enum mandoc_esc esc; 3727 3728 p = buf->buf + pos; 3729 3730 if (*p == '\0') { 3731 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr"); 3732 return ROFF_IGN; 3733 } 3734 3735 while (*p != '\0') { 3736 fsz = ssz = 1; 3737 3738 first = p++; 3739 if (*first == '\\') { 3740 esc = mandoc_escape(&p, NULL, NULL); 3741 if (esc == ESCAPE_ERROR) { 3742 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3743 (int)(p - buf->buf), "%s", first); 3744 return ROFF_IGN; 3745 } 3746 fsz = (size_t)(p - first); 3747 } 3748 3749 second = p++; 3750 if (*second == '\\') { 3751 esc = mandoc_escape(&p, NULL, NULL); 3752 if (esc == ESCAPE_ERROR) { 3753 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3754 (int)(p - buf->buf), "%s", second); 3755 return ROFF_IGN; 3756 } 3757 ssz = (size_t)(p - second); 3758 } else if (*second == '\0') { 3759 mandoc_msg(MANDOCERR_TR_ODD, ln, 3760 (int)(first - buf->buf), "tr %s", first); 3761 second = " "; 3762 p--; 3763 } 3764 3765 if (fsz > 1) { 3766 roff_setstrn(&r->xmbtab, first, fsz, 3767 second, ssz, 0); 3768 continue; 3769 } 3770 3771 if (r->xtab == NULL) 3772 r->xtab = mandoc_calloc(128, 3773 sizeof(struct roffstr)); 3774 3775 free(r->xtab[(int)*first].p); 3776 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 3777 r->xtab[(int)*first].sz = ssz; 3778 } 3779 3780 return ROFF_IGN; 3781 } 3782 3783 /* 3784 * Implementation of the .return request. 3785 * There is no need to call roff_userret() from here. 3786 * The read module will call that after rewinding the reader stack 3787 * to the place from where the current macro was called. 3788 */ 3789 static int 3790 roff_return(ROFF_ARGS) 3791 { 3792 if (r->mstackpos >= 0) 3793 return ROFF_IGN | ROFF_USERRET; 3794 3795 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return"); 3796 return ROFF_IGN; 3797 } 3798 3799 static int 3800 roff_rn(ROFF_ARGS) 3801 { 3802 const char *value; 3803 char *oldn, *newn, *end; 3804 size_t oldsz, newsz; 3805 int deftype; 3806 3807 oldn = newn = buf->buf + pos; 3808 if (*oldn == '\0') 3809 return ROFF_IGN; 3810 3811 oldsz = roff_getname(r, &newn, ln, pos); 3812 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0') 3813 return ROFF_IGN; 3814 3815 end = newn; 3816 newsz = roff_getname(r, &end, ln, newn - buf->buf); 3817 if (newsz == 0) 3818 return ROFF_IGN; 3819 3820 deftype = ROFFDEF_ANY; 3821 value = roff_getstrn(r, oldn, oldsz, &deftype); 3822 switch (deftype) { 3823 case ROFFDEF_USER: 3824 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3825 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); 3826 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3827 break; 3828 case ROFFDEF_PRE: 3829 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3830 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3831 break; 3832 case ROFFDEF_REN: 3833 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); 3834 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); 3835 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3836 break; 3837 case ROFFDEF_STD: 3838 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); 3839 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3840 break; 3841 default: 3842 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3843 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3844 break; 3845 } 3846 return ROFF_IGN; 3847 } 3848 3849 static int 3850 roff_shift(ROFF_ARGS) 3851 { 3852 struct mctx *ctx; 3853 int levels, i; 3854 3855 levels = 1; 3856 if (buf->buf[pos] != '\0' && 3857 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { 3858 mandoc_msg(MANDOCERR_CE_NONUM, 3859 ln, pos, "shift %s", buf->buf + pos); 3860 levels = 1; 3861 } 3862 if (r->mstackpos < 0) { 3863 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift"); 3864 return ROFF_IGN; 3865 } 3866 ctx = r->mstack + r->mstackpos; 3867 if (levels > ctx->argc) { 3868 mandoc_msg(MANDOCERR_SHIFT, 3869 ln, pos, "%d, but max is %d", levels, ctx->argc); 3870 levels = ctx->argc; 3871 } 3872 if (levels == 0) 3873 return ROFF_IGN; 3874 for (i = 0; i < levels; i++) 3875 free(ctx->argv[i]); 3876 ctx->argc -= levels; 3877 for (i = 0; i < ctx->argc; i++) 3878 ctx->argv[i] = ctx->argv[i + levels]; 3879 return ROFF_IGN; 3880 } 3881 3882 static int 3883 roff_so(ROFF_ARGS) 3884 { 3885 char *name, *cp; 3886 3887 name = buf->buf + pos; 3888 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name); 3889 3890 /* 3891 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3892 * opening anything that's not in our cwd or anything beneath 3893 * it. Thus, explicitly disallow traversing up the file-system 3894 * or using absolute paths. 3895 */ 3896 3897 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3898 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name); 3899 buf->sz = mandoc_asprintf(&cp, 3900 ".sp\nSee the file %s.\n.sp", name) + 1; 3901 free(buf->buf); 3902 buf->buf = cp; 3903 *offs = 0; 3904 return ROFF_REPARSE; 3905 } 3906 3907 *offs = pos; 3908 return ROFF_SO; 3909 } 3910 3911 /* --- user defined strings and macros ------------------------------------ */ 3912 3913 static int 3914 roff_userdef(ROFF_ARGS) 3915 { 3916 struct mctx *ctx; 3917 char *arg, *ap, *dst, *src; 3918 size_t sz; 3919 3920 /* If the macro is empty, ignore it altogether. */ 3921 3922 if (*r->current_string == '\0') 3923 return ROFF_IGN; 3924 3925 /* Initialize a new macro stack context. */ 3926 3927 if (++r->mstackpos == r->mstacksz) { 3928 r->mstack = mandoc_recallocarray(r->mstack, 3929 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); 3930 r->mstacksz += 8; 3931 } 3932 ctx = r->mstack + r->mstackpos; 3933 ctx->argsz = 0; 3934 ctx->argc = 0; 3935 ctx->argv = NULL; 3936 3937 /* 3938 * Collect pointers to macro argument strings, 3939 * NUL-terminating them and escaping quotes. 3940 */ 3941 3942 src = buf->buf + pos; 3943 while (*src != '\0') { 3944 if (ctx->argc == ctx->argsz) { 3945 ctx->argsz += 8; 3946 ctx->argv = mandoc_reallocarray(ctx->argv, 3947 ctx->argsz, sizeof(*ctx->argv)); 3948 } 3949 arg = roff_getarg(r, &src, ln, &pos); 3950 sz = 1; /* For the terminating NUL. */ 3951 for (ap = arg; *ap != '\0'; ap++) 3952 sz += *ap == '"' ? 4 : 1; 3953 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); 3954 for (ap = arg; *ap != '\0'; ap++) { 3955 if (*ap == '"') { 3956 memcpy(dst, "\\(dq", 4); 3957 dst += 4; 3958 } else 3959 *dst++ = *ap; 3960 } 3961 *dst = '\0'; 3962 free(arg); 3963 } 3964 3965 /* Replace the macro invocation by the macro definition. */ 3966 3967 free(buf->buf); 3968 buf->buf = mandoc_strdup(r->current_string); 3969 buf->sz = strlen(buf->buf) + 1; 3970 *offs = 0; 3971 3972 return buf->buf[buf->sz - 2] == '\n' ? 3973 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND; 3974 } 3975 3976 /* 3977 * Calling a high-level macro that was renamed with .rn. 3978 * r->current_string has already been set up by roff_parse(). 3979 */ 3980 static int 3981 roff_renamed(ROFF_ARGS) 3982 { 3983 char *nbuf; 3984 3985 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, 3986 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; 3987 free(buf->buf); 3988 buf->buf = nbuf; 3989 *offs = 0; 3990 return ROFF_CONT; 3991 } 3992 3993 /* 3994 * Measure the length in bytes of the roff identifier at *cpp 3995 * and advance the pointer to the next word. 3996 */ 3997 static size_t 3998 roff_getname(struct roff *r, char **cpp, int ln, int pos) 3999 { 4000 char *name, *cp; 4001 size_t namesz; 4002 4003 name = *cpp; 4004 if (*name == '\0') 4005 return 0; 4006 4007 /* Advance cp to the byte after the end of the name. */ 4008 4009 for (cp = name; 1; cp++) { 4010 namesz = cp - name; 4011 if (*cp == '\0') 4012 break; 4013 if (*cp == ' ' || *cp == '\t') { 4014 cp++; 4015 break; 4016 } 4017 if (*cp != '\\') 4018 continue; 4019 if (cp[1] == '{' || cp[1] == '}') 4020 break; 4021 if (*++cp == '\\') 4022 continue; 4023 mandoc_msg(MANDOCERR_NAMESC, ln, pos, 4024 "%.*s", (int)(cp - name + 1), name); 4025 mandoc_escape((const char **)&cp, NULL, NULL); 4026 break; 4027 } 4028 4029 /* Read past spaces. */ 4030 4031 while (*cp == ' ') 4032 cp++; 4033 4034 *cpp = cp; 4035 return namesz; 4036 } 4037 4038 /* 4039 * Store *string into the user-defined string called *name. 4040 * To clear an existing entry, call with (*r, *name, NULL, 0). 4041 * append == 0: replace mode 4042 * append == 1: single-line append mode 4043 * append == 2: multiline append mode, append '\n' after each call 4044 */ 4045 static void 4046 roff_setstr(struct roff *r, const char *name, const char *string, 4047 int append) 4048 { 4049 size_t namesz; 4050 4051 namesz = strlen(name); 4052 roff_setstrn(&r->strtab, name, namesz, string, 4053 string ? strlen(string) : 0, append); 4054 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 4055 } 4056 4057 static void 4058 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 4059 const char *string, size_t stringsz, int append) 4060 { 4061 struct roffkv *n; 4062 char *c; 4063 int i; 4064 size_t oldch, newch; 4065 4066 /* Search for an existing string with the same name. */ 4067 n = *r; 4068 4069 while (n && (namesz != n->key.sz || 4070 strncmp(n->key.p, name, namesz))) 4071 n = n->next; 4072 4073 if (NULL == n) { 4074 /* Create a new string table entry. */ 4075 n = mandoc_malloc(sizeof(struct roffkv)); 4076 n->key.p = mandoc_strndup(name, namesz); 4077 n->key.sz = namesz; 4078 n->val.p = NULL; 4079 n->val.sz = 0; 4080 n->next = *r; 4081 *r = n; 4082 } else if (0 == append) { 4083 free(n->val.p); 4084 n->val.p = NULL; 4085 n->val.sz = 0; 4086 } 4087 4088 if (NULL == string) 4089 return; 4090 4091 /* 4092 * One additional byte for the '\n' in multiline mode, 4093 * and one for the terminating '\0'. 4094 */ 4095 newch = stringsz + (1 < append ? 2u : 1u); 4096 4097 if (NULL == n->val.p) { 4098 n->val.p = mandoc_malloc(newch); 4099 *n->val.p = '\0'; 4100 oldch = 0; 4101 } else { 4102 oldch = n->val.sz; 4103 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 4104 } 4105 4106 /* Skip existing content in the destination buffer. */ 4107 c = n->val.p + (int)oldch; 4108 4109 /* Append new content to the destination buffer. */ 4110 i = 0; 4111 while (i < (int)stringsz) { 4112 /* 4113 * Rudimentary roff copy mode: 4114 * Handle escaped backslashes. 4115 */ 4116 if ('\\' == string[i] && '\\' == string[i + 1]) 4117 i++; 4118 *c++ = string[i++]; 4119 } 4120 4121 /* Append terminating bytes. */ 4122 if (1 < append) 4123 *c++ = '\n'; 4124 4125 *c = '\0'; 4126 n->val.sz = (int)(c - n->val.p); 4127 } 4128 4129 static const char * 4130 roff_getstrn(struct roff *r, const char *name, size_t len, 4131 int *deftype) 4132 { 4133 const struct roffkv *n; 4134 int found, i; 4135 enum roff_tok tok; 4136 4137 found = 0; 4138 for (n = r->strtab; n != NULL; n = n->next) { 4139 if (strncmp(name, n->key.p, len) != 0 || 4140 n->key.p[len] != '\0' || n->val.p == NULL) 4141 continue; 4142 if (*deftype & ROFFDEF_USER) { 4143 *deftype = ROFFDEF_USER; 4144 return n->val.p; 4145 } else { 4146 found = 1; 4147 break; 4148 } 4149 } 4150 for (n = r->rentab; n != NULL; n = n->next) { 4151 if (strncmp(name, n->key.p, len) != 0 || 4152 n->key.p[len] != '\0' || n->val.p == NULL) 4153 continue; 4154 if (*deftype & ROFFDEF_REN) { 4155 *deftype = ROFFDEF_REN; 4156 return n->val.p; 4157 } else { 4158 found = 1; 4159 break; 4160 } 4161 } 4162 for (i = 0; i < PREDEFS_MAX; i++) { 4163 if (strncmp(name, predefs[i].name, len) != 0 || 4164 predefs[i].name[len] != '\0') 4165 continue; 4166 if (*deftype & ROFFDEF_PRE) { 4167 *deftype = ROFFDEF_PRE; 4168 return predefs[i].str; 4169 } else { 4170 found = 1; 4171 break; 4172 } 4173 } 4174 if (r->man->meta.macroset != MACROSET_MAN) { 4175 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { 4176 if (strncmp(name, roff_name[tok], len) != 0 || 4177 roff_name[tok][len] != '\0') 4178 continue; 4179 if (*deftype & ROFFDEF_STD) { 4180 *deftype = ROFFDEF_STD; 4181 return NULL; 4182 } else { 4183 found = 1; 4184 break; 4185 } 4186 } 4187 } 4188 if (r->man->meta.macroset != MACROSET_MDOC) { 4189 for (tok = MAN_TH; tok < MAN_MAX; tok++) { 4190 if (strncmp(name, roff_name[tok], len) != 0 || 4191 roff_name[tok][len] != '\0') 4192 continue; 4193 if (*deftype & ROFFDEF_STD) { 4194 *deftype = ROFFDEF_STD; 4195 return NULL; 4196 } else { 4197 found = 1; 4198 break; 4199 } 4200 } 4201 } 4202 4203 if (found == 0 && *deftype != ROFFDEF_ANY) { 4204 if (*deftype & ROFFDEF_REN) { 4205 /* 4206 * This might still be a request, 4207 * so do not treat it as undefined yet. 4208 */ 4209 *deftype = ROFFDEF_UNDEF; 4210 return NULL; 4211 } 4212 4213 /* Using an undefined string defines it to be empty. */ 4214 4215 roff_setstrn(&r->strtab, name, len, "", 0, 0); 4216 roff_setstrn(&r->rentab, name, len, NULL, 0, 0); 4217 } 4218 4219 *deftype = 0; 4220 return NULL; 4221 } 4222 4223 static void 4224 roff_freestr(struct roffkv *r) 4225 { 4226 struct roffkv *n, *nn; 4227 4228 for (n = r; n; n = nn) { 4229 free(n->key.p); 4230 free(n->val.p); 4231 nn = n->next; 4232 free(n); 4233 } 4234 } 4235 4236 /* --- accessors and utility functions ------------------------------------ */ 4237 4238 /* 4239 * Duplicate an input string, making the appropriate character 4240 * conversations (as stipulated by `tr') along the way. 4241 * Returns a heap-allocated string with all the replacements made. 4242 */ 4243 char * 4244 roff_strdup(const struct roff *r, const char *p) 4245 { 4246 const struct roffkv *cp; 4247 char *res; 4248 const char *pp; 4249 size_t ssz, sz; 4250 enum mandoc_esc esc; 4251 4252 if (NULL == r->xmbtab && NULL == r->xtab) 4253 return mandoc_strdup(p); 4254 else if ('\0' == *p) 4255 return mandoc_strdup(""); 4256 4257 /* 4258 * Step through each character looking for term matches 4259 * (remember that a `tr' can be invoked with an escape, which is 4260 * a glyph but the escape is multi-character). 4261 * We only do this if the character hash has been initialised 4262 * and the string is >0 length. 4263 */ 4264 4265 res = NULL; 4266 ssz = 0; 4267 4268 while ('\0' != *p) { 4269 assert((unsigned int)*p < 128); 4270 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { 4271 sz = r->xtab[(int)*p].sz; 4272 res = mandoc_realloc(res, ssz + sz + 1); 4273 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 4274 ssz += sz; 4275 p++; 4276 continue; 4277 } else if ('\\' != *p) { 4278 res = mandoc_realloc(res, ssz + 2); 4279 res[ssz++] = *p++; 4280 continue; 4281 } 4282 4283 /* Search for term matches. */ 4284 for (cp = r->xmbtab; cp; cp = cp->next) 4285 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 4286 break; 4287 4288 if (NULL != cp) { 4289 /* 4290 * A match has been found. 4291 * Append the match to the array and move 4292 * forward by its keysize. 4293 */ 4294 res = mandoc_realloc(res, 4295 ssz + cp->val.sz + 1); 4296 memcpy(res + ssz, cp->val.p, cp->val.sz); 4297 ssz += cp->val.sz; 4298 p += (int)cp->key.sz; 4299 continue; 4300 } 4301 4302 /* 4303 * Handle escapes carefully: we need to copy 4304 * over just the escape itself, or else we might 4305 * do replacements within the escape itself. 4306 * Make sure to pass along the bogus string. 4307 */ 4308 pp = p++; 4309 esc = mandoc_escape(&p, NULL, NULL); 4310 if (ESCAPE_ERROR == esc) { 4311 sz = strlen(pp); 4312 res = mandoc_realloc(res, ssz + sz + 1); 4313 memcpy(res + ssz, pp, sz); 4314 break; 4315 } 4316 /* 4317 * We bail out on bad escapes. 4318 * No need to warn: we already did so when 4319 * roff_expand() was called. 4320 */ 4321 sz = (int)(p - pp); 4322 res = mandoc_realloc(res, ssz + sz + 1); 4323 memcpy(res + ssz, pp, sz); 4324 ssz += sz; 4325 } 4326 4327 res[(int)ssz] = '\0'; 4328 return res; 4329 } 4330 4331 int 4332 roff_getformat(const struct roff *r) 4333 { 4334 4335 return r->format; 4336 } 4337 4338 /* 4339 * Find out whether a line is a macro line or not. 4340 * If it is, adjust the current position and return one; if it isn't, 4341 * return zero and don't change the current position. 4342 * If the control character has been set with `.cc', then let that grain 4343 * precedence. 4344 * This is slighly contrary to groff, where using the non-breaking 4345 * control character when `cc' has been invoked will cause the 4346 * non-breaking macro contents to be printed verbatim. 4347 */ 4348 int 4349 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 4350 { 4351 int pos; 4352 4353 pos = *ppos; 4354 4355 if (r->control != '\0' && cp[pos] == r->control) 4356 pos++; 4357 else if (r->control != '\0') 4358 return 0; 4359 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 4360 pos += 2; 4361 else if ('.' == cp[pos] || '\'' == cp[pos]) 4362 pos++; 4363 else 4364 return 0; 4365 4366 while (' ' == cp[pos] || '\t' == cp[pos]) 4367 pos++; 4368 4369 *ppos = pos; 4370 return 1; 4371 } 4372