1 /* $OpenBSD: roff.c,v 1.200 2018/04/11 17:10:35 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <limits.h> 23 #include <stddef.h> 24 #include <stdint.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 29 #include "mandoc.h" 30 #include "mandoc_aux.h" 31 #include "mandoc_ohash.h" 32 #include "roff.h" 33 #include "libmandoc.h" 34 #include "roff_int.h" 35 #include "libroff.h" 36 37 /* Maximum number of string expansions per line, to break infinite loops. */ 38 #define EXPAND_LIMIT 1000 39 40 /* Types of definitions of macros and strings. */ 41 #define ROFFDEF_USER (1 << 1) /* User-defined. */ 42 #define ROFFDEF_PRE (1 << 2) /* Predefined. */ 43 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ 44 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ 45 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ 46 ROFFDEF_REN | ROFFDEF_STD) 47 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ 48 49 /* --- data types --------------------------------------------------------- */ 50 51 /* 52 * An incredibly-simple string buffer. 53 */ 54 struct roffstr { 55 char *p; /* nil-terminated buffer */ 56 size_t sz; /* saved strlen(p) */ 57 }; 58 59 /* 60 * A key-value roffstr pair as part of a singly-linked list. 61 */ 62 struct roffkv { 63 struct roffstr key; 64 struct roffstr val; 65 struct roffkv *next; /* next in list */ 66 }; 67 68 /* 69 * A single number register as part of a singly-linked list. 70 */ 71 struct roffreg { 72 struct roffstr key; 73 int val; 74 int step; 75 struct roffreg *next; 76 }; 77 78 /* 79 * Association of request and macro names with token IDs. 80 */ 81 struct roffreq { 82 enum roff_tok tok; 83 char name[]; 84 }; 85 86 struct roff { 87 struct mparse *parse; /* parse point */ 88 struct roff_man *man; /* mdoc or man parser */ 89 struct roffnode *last; /* leaf of stack */ 90 int *rstack; /* stack of inverted `ie' values */ 91 struct ohash *reqtab; /* request lookup table */ 92 struct roffreg *regtab; /* number registers */ 93 struct roffkv *strtab; /* user-defined strings & macros */ 94 struct roffkv *rentab; /* renamed strings & macros */ 95 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 96 struct roffstr *xtab; /* single-byte trans table (`tr') */ 97 const char *current_string; /* value of last called user macro */ 98 struct tbl_node *first_tbl; /* first table parsed */ 99 struct tbl_node *last_tbl; /* last table parsed */ 100 struct tbl_node *tbl; /* current table being parsed */ 101 struct eqn_node *last_eqn; /* equation parser */ 102 struct eqn_node *eqn; /* active equation parser */ 103 int eqn_inline; /* current equation is inline */ 104 int options; /* parse options */ 105 int rstacksz; /* current size limit of rstack */ 106 int rstackpos; /* position in rstack */ 107 int format; /* current file in mdoc or man format */ 108 int argc; /* number of args of the last macro */ 109 char control; /* control character */ 110 char escape; /* escape character */ 111 }; 112 113 struct roffnode { 114 enum roff_tok tok; /* type of node */ 115 struct roffnode *parent; /* up one in stack */ 116 int line; /* parse line */ 117 int col; /* parse col */ 118 char *name; /* node name, e.g. macro name */ 119 char *end; /* end-rules: custom token */ 120 int endspan; /* end-rules: next-line or infty */ 121 int rule; /* current evaluation rule */ 122 }; 123 124 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 125 enum roff_tok tok, /* tok of macro */ \ 126 struct buf *buf, /* input buffer */ \ 127 int ln, /* parse line */ \ 128 int ppos, /* original pos in buffer */ \ 129 int pos, /* current pos in buffer */ \ 130 int *offs /* reset offset of buffer data */ 131 132 typedef enum rofferr (*roffproc)(ROFF_ARGS); 133 134 struct roffmac { 135 roffproc proc; /* process new macro */ 136 roffproc text; /* process as child text of macro */ 137 roffproc sub; /* process as child of macro */ 138 int flags; 139 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 140 }; 141 142 struct predef { 143 const char *name; /* predefined input name */ 144 const char *str; /* replacement symbol */ 145 }; 146 147 #define PREDEF(__name, __str) \ 148 { (__name), (__str) }, 149 150 /* --- function prototypes ------------------------------------------------ */ 151 152 static void roffnode_cleanscope(struct roff *); 153 static void roffnode_pop(struct roff *); 154 static void roffnode_push(struct roff *, enum roff_tok, 155 const char *, int, int); 156 static void roff_addtbl(struct roff_man *, struct tbl_node *); 157 static enum rofferr roff_als(ROFF_ARGS); 158 static enum rofferr roff_block(ROFF_ARGS); 159 static enum rofferr roff_block_text(ROFF_ARGS); 160 static enum rofferr roff_block_sub(ROFF_ARGS); 161 static enum rofferr roff_br(ROFF_ARGS); 162 static enum rofferr roff_cblock(ROFF_ARGS); 163 static enum rofferr roff_cc(ROFF_ARGS); 164 static void roff_ccond(struct roff *, int, int); 165 static enum rofferr roff_cond(ROFF_ARGS); 166 static enum rofferr roff_cond_text(ROFF_ARGS); 167 static enum rofferr roff_cond_sub(ROFF_ARGS); 168 static enum rofferr roff_ds(ROFF_ARGS); 169 static enum rofferr roff_ec(ROFF_ARGS); 170 static enum rofferr roff_eo(ROFF_ARGS); 171 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int); 172 static int roff_evalcond(struct roff *r, int, char *, int *); 173 static int roff_evalnum(struct roff *, int, 174 const char *, int *, int *, int); 175 static int roff_evalpar(struct roff *, int, 176 const char *, int *, int *, int); 177 static int roff_evalstrcond(const char *, int *); 178 static void roff_free1(struct roff *); 179 static void roff_freereg(struct roffreg *); 180 static void roff_freestr(struct roffkv *); 181 static size_t roff_getname(struct roff *, char **, int, int); 182 static int roff_getnum(const char *, int *, int *, int); 183 static int roff_getop(const char *, int *, char *); 184 static int roff_getregn(struct roff *, 185 const char *, size_t, char); 186 static int roff_getregro(const struct roff *, 187 const char *name); 188 static const char *roff_getstrn(struct roff *, 189 const char *, size_t, int *); 190 static int roff_hasregn(const struct roff *, 191 const char *, size_t); 192 static enum rofferr roff_insec(ROFF_ARGS); 193 static enum rofferr roff_it(ROFF_ARGS); 194 static enum rofferr roff_line_ignore(ROFF_ARGS); 195 static void roff_man_alloc1(struct roff_man *); 196 static void roff_man_free1(struct roff_man *); 197 static enum rofferr roff_manyarg(ROFF_ARGS); 198 static enum rofferr roff_nr(ROFF_ARGS); 199 static enum rofferr roff_onearg(ROFF_ARGS); 200 static enum roff_tok roff_parse(struct roff *, char *, int *, 201 int, int); 202 static enum rofferr roff_parsetext(struct roff *, struct buf *, 203 int, int *); 204 static enum rofferr roff_renamed(ROFF_ARGS); 205 static enum rofferr roff_res(struct roff *, struct buf *, int, int); 206 static enum rofferr roff_rm(ROFF_ARGS); 207 static enum rofferr roff_rn(ROFF_ARGS); 208 static enum rofferr roff_rr(ROFF_ARGS); 209 static void roff_setregn(struct roff *, const char *, 210 size_t, int, char, int); 211 static void roff_setstr(struct roff *, 212 const char *, const char *, int); 213 static void roff_setstrn(struct roffkv **, const char *, 214 size_t, const char *, size_t, int); 215 static enum rofferr roff_so(ROFF_ARGS); 216 static enum rofferr roff_tr(ROFF_ARGS); 217 static enum rofferr roff_Dd(ROFF_ARGS); 218 static enum rofferr roff_TE(ROFF_ARGS); 219 static enum rofferr roff_TS(ROFF_ARGS); 220 static enum rofferr roff_EQ(ROFF_ARGS); 221 static enum rofferr roff_EN(ROFF_ARGS); 222 static enum rofferr roff_T_(ROFF_ARGS); 223 static enum rofferr roff_unsupp(ROFF_ARGS); 224 static enum rofferr roff_userdef(ROFF_ARGS); 225 226 /* --- constant data ------------------------------------------------------ */ 227 228 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 229 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 230 231 const char *__roff_name[MAN_MAX + 1] = { 232 "br", "ce", "ft", "ll", 233 "mc", "po", "rj", "sp", 234 "ta", "ti", NULL, 235 "ab", "ad", "af", "aln", 236 "als", "am", "am1", "ami", 237 "ami1", "as", "as1", "asciify", 238 "backtrace", "bd", "bleedat", "blm", 239 "box", "boxa", "bp", "BP", 240 "break", "breakchar", "brnl", "brp", 241 "brpnl", "c2", "cc", 242 "cf", "cflags", "ch", "char", 243 "chop", "class", "close", "CL", 244 "color", "composite", "continue", "cp", 245 "cropat", "cs", "cu", "da", 246 "dch", "Dd", "de", "de1", 247 "defcolor", "dei", "dei1", "device", 248 "devicem", "di", "do", "ds", 249 "ds1", "dwh", "dt", "ec", 250 "ecr", "ecs", "el", "em", 251 "EN", "eo", "EP", "EQ", 252 "errprint", "ev", "evc", "ex", 253 "fallback", "fam", "fc", "fchar", 254 "fcolor", "fdeferlig", "feature", "fkern", 255 "fl", "flig", "fp", "fps", 256 "fschar", "fspacewidth", "fspecial", "ftr", 257 "fzoom", "gcolor", "hc", "hcode", 258 "hidechar", "hla", "hlm", "hpf", 259 "hpfa", "hpfcode", "hw", "hy", 260 "hylang", "hylen", "hym", "hypp", 261 "hys", "ie", "if", "ig", 262 "index", "it", "itc", "IX", 263 "kern", "kernafter", "kernbefore", "kernpair", 264 "lc", "lc_ctype", "lds", "length", 265 "letadj", "lf", "lg", "lhang", 266 "linetabs", "lnr", "lnrf", "lpfx", 267 "ls", "lsm", "lt", 268 "mediasize", "minss", "mk", "mso", 269 "na", "ne", "nh", "nhychar", 270 "nm", "nn", "nop", "nr", 271 "nrf", "nroff", "ns", "nx", 272 "open", "opena", "os", "output", 273 "padj", "papersize", "pc", "pev", 274 "pi", "PI", "pl", "pm", 275 "pn", "pnr", "ps", 276 "psbb", "pshape", "pso", "ptr", 277 "pvs", "rchar", "rd", "recursionlimit", 278 "return", "rfschar", "rhang", 279 "rm", "rn", "rnn", "rr", 280 "rs", "rt", "schar", "sentchar", 281 "shc", "shift", "sizes", "so", 282 "spacewidth", "special", "spreadwarn", "ss", 283 "sty", "substring", "sv", "sy", 284 "T&", "tc", "TE", 285 "TH", "tkf", "tl", 286 "tm", "tm1", "tmc", "tr", 287 "track", "transchar", "trf", "trimat", 288 "trin", "trnt", "troff", "TS", 289 "uf", "ul", "unformat", "unwatch", 290 "unwatchn", "vpt", "vs", "warn", 291 "warnscale", "watch", "watchlength", "watchn", 292 "wh", "while", "write", "writec", 293 "writem", "xflag", ".", NULL, 294 NULL, "text", 295 "Dd", "Dt", "Os", "Sh", 296 "Ss", "Pp", "D1", "Dl", 297 "Bd", "Ed", "Bl", "El", 298 "It", "Ad", "An", "Ap", 299 "Ar", "Cd", "Cm", "Dv", 300 "Er", "Ev", "Ex", "Fa", 301 "Fd", "Fl", "Fn", "Ft", 302 "Ic", "In", "Li", "Nd", 303 "Nm", "Op", "Ot", "Pa", 304 "Rv", "St", "Va", "Vt", 305 "Xr", "%A", "%B", "%D", 306 "%I", "%J", "%N", "%O", 307 "%P", "%R", "%T", "%V", 308 "Ac", "Ao", "Aq", "At", 309 "Bc", "Bf", "Bo", "Bq", 310 "Bsx", "Bx", "Db", "Dc", 311 "Do", "Dq", "Ec", "Ef", 312 "Em", "Eo", "Fx", "Ms", 313 "No", "Ns", "Nx", "Ox", 314 "Pc", "Pf", "Po", "Pq", 315 "Qc", "Ql", "Qo", "Qq", 316 "Re", "Rs", "Sc", "So", 317 "Sq", "Sm", "Sx", "Sy", 318 "Tn", "Ux", "Xc", "Xo", 319 "Fo", "Fc", "Oo", "Oc", 320 "Bk", "Ek", "Bt", "Hf", 321 "Fr", "Ud", "Lb", "Lp", 322 "Lk", "Mt", "Brq", "Bro", 323 "Brc", "%C", "Es", "En", 324 "Dx", "%Q", "%U", "Ta", 325 NULL, 326 "TH", "SH", "SS", "TP", 327 "LP", "PP", "P", "IP", 328 "HP", "SM", "SB", "BI", 329 "IB", "BR", "RB", "R", 330 "B", "I", "IR", "RI", 331 "nf", "fi", 332 "RE", "RS", "DT", "UC", 333 "PD", "AT", "in", 334 "OP", "EX", "EE", "UR", 335 "UE", "MT", "ME", NULL 336 }; 337 const char *const *roff_name = __roff_name; 338 339 static struct roffmac roffs[TOKEN_NONE] = { 340 { roff_br, NULL, NULL, 0 }, /* br */ 341 { roff_onearg, NULL, NULL, 0 }, /* ce */ 342 { roff_onearg, NULL, NULL, 0 }, /* ft */ 343 { roff_onearg, NULL, NULL, 0 }, /* ll */ 344 { roff_onearg, NULL, NULL, 0 }, /* mc */ 345 { roff_onearg, NULL, NULL, 0 }, /* po */ 346 { roff_onearg, NULL, NULL, 0 }, /* rj */ 347 { roff_onearg, NULL, NULL, 0 }, /* sp */ 348 { roff_manyarg, NULL, NULL, 0 }, /* ta */ 349 { roff_onearg, NULL, NULL, 0 }, /* ti */ 350 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ 351 { roff_unsupp, NULL, NULL, 0 }, /* ab */ 352 { roff_line_ignore, NULL, NULL, 0 }, /* ad */ 353 { roff_line_ignore, NULL, NULL, 0 }, /* af */ 354 { roff_unsupp, NULL, NULL, 0 }, /* aln */ 355 { roff_als, NULL, NULL, 0 }, /* als */ 356 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ 357 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ 358 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ 359 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ 360 { roff_ds, NULL, NULL, 0 }, /* as */ 361 { roff_ds, NULL, NULL, 0 }, /* as1 */ 362 { roff_unsupp, NULL, NULL, 0 }, /* asciify */ 363 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ 364 { roff_line_ignore, NULL, NULL, 0 }, /* bd */ 365 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ 366 { roff_unsupp, NULL, NULL, 0 }, /* blm */ 367 { roff_unsupp, NULL, NULL, 0 }, /* box */ 368 { roff_unsupp, NULL, NULL, 0 }, /* boxa */ 369 { roff_line_ignore, NULL, NULL, 0 }, /* bp */ 370 { roff_unsupp, NULL, NULL, 0 }, /* BP */ 371 { roff_unsupp, NULL, NULL, 0 }, /* break */ 372 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ 373 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ 374 { roff_br, NULL, NULL, 0 }, /* brp */ 375 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ 376 { roff_unsupp, NULL, NULL, 0 }, /* c2 */ 377 { roff_cc, NULL, NULL, 0 }, /* cc */ 378 { roff_insec, NULL, NULL, 0 }, /* cf */ 379 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ 380 { roff_line_ignore, NULL, NULL, 0 }, /* ch */ 381 { roff_unsupp, NULL, NULL, 0 }, /* char */ 382 { roff_unsupp, NULL, NULL, 0 }, /* chop */ 383 { roff_line_ignore, NULL, NULL, 0 }, /* class */ 384 { roff_insec, NULL, NULL, 0 }, /* close */ 385 { roff_unsupp, NULL, NULL, 0 }, /* CL */ 386 { roff_line_ignore, NULL, NULL, 0 }, /* color */ 387 { roff_unsupp, NULL, NULL, 0 }, /* composite */ 388 { roff_unsupp, NULL, NULL, 0 }, /* continue */ 389 { roff_line_ignore, NULL, NULL, 0 }, /* cp */ 390 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ 391 { roff_line_ignore, NULL, NULL, 0 }, /* cs */ 392 { roff_line_ignore, NULL, NULL, 0 }, /* cu */ 393 { roff_unsupp, NULL, NULL, 0 }, /* da */ 394 { roff_unsupp, NULL, NULL, 0 }, /* dch */ 395 { roff_Dd, NULL, NULL, 0 }, /* Dd */ 396 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ 397 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ 398 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ 399 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ 400 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ 401 { roff_unsupp, NULL, NULL, 0 }, /* device */ 402 { roff_unsupp, NULL, NULL, 0 }, /* devicem */ 403 { roff_unsupp, NULL, NULL, 0 }, /* di */ 404 { roff_unsupp, NULL, NULL, 0 }, /* do */ 405 { roff_ds, NULL, NULL, 0 }, /* ds */ 406 { roff_ds, NULL, NULL, 0 }, /* ds1 */ 407 { roff_unsupp, NULL, NULL, 0 }, /* dwh */ 408 { roff_unsupp, NULL, NULL, 0 }, /* dt */ 409 { roff_ec, NULL, NULL, 0 }, /* ec */ 410 { roff_unsupp, NULL, NULL, 0 }, /* ecr */ 411 { roff_unsupp, NULL, NULL, 0 }, /* ecs */ 412 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ 413 { roff_unsupp, NULL, NULL, 0 }, /* em */ 414 { roff_EN, NULL, NULL, 0 }, /* EN */ 415 { roff_eo, NULL, NULL, 0 }, /* eo */ 416 { roff_unsupp, NULL, NULL, 0 }, /* EP */ 417 { roff_EQ, NULL, NULL, 0 }, /* EQ */ 418 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ 419 { roff_unsupp, NULL, NULL, 0 }, /* ev */ 420 { roff_unsupp, NULL, NULL, 0 }, /* evc */ 421 { roff_unsupp, NULL, NULL, 0 }, /* ex */ 422 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ 423 { roff_line_ignore, NULL, NULL, 0 }, /* fam */ 424 { roff_unsupp, NULL, NULL, 0 }, /* fc */ 425 { roff_unsupp, NULL, NULL, 0 }, /* fchar */ 426 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ 427 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ 428 { roff_line_ignore, NULL, NULL, 0 }, /* feature */ 429 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ 430 { roff_line_ignore, NULL, NULL, 0 }, /* fl */ 431 { roff_line_ignore, NULL, NULL, 0 }, /* flig */ 432 { roff_line_ignore, NULL, NULL, 0 }, /* fp */ 433 { roff_line_ignore, NULL, NULL, 0 }, /* fps */ 434 { roff_unsupp, NULL, NULL, 0 }, /* fschar */ 435 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ 436 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ 437 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ 438 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ 439 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ 440 { roff_line_ignore, NULL, NULL, 0 }, /* hc */ 441 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ 442 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ 443 { roff_line_ignore, NULL, NULL, 0 }, /* hla */ 444 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ 445 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ 446 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ 447 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ 448 { roff_line_ignore, NULL, NULL, 0 }, /* hw */ 449 { roff_line_ignore, NULL, NULL, 0 }, /* hy */ 450 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ 451 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ 452 { roff_line_ignore, NULL, NULL, 0 }, /* hym */ 453 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ 454 { roff_line_ignore, NULL, NULL, 0 }, /* hys */ 455 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ 456 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ 457 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ 458 { roff_unsupp, NULL, NULL, 0 }, /* index */ 459 { roff_it, NULL, NULL, 0 }, /* it */ 460 { roff_unsupp, NULL, NULL, 0 }, /* itc */ 461 { roff_line_ignore, NULL, NULL, 0 }, /* IX */ 462 { roff_line_ignore, NULL, NULL, 0 }, /* kern */ 463 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ 464 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ 465 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ 466 { roff_unsupp, NULL, NULL, 0 }, /* lc */ 467 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ 468 { roff_unsupp, NULL, NULL, 0 }, /* lds */ 469 { roff_unsupp, NULL, NULL, 0 }, /* length */ 470 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ 471 { roff_insec, NULL, NULL, 0 }, /* lf */ 472 { roff_line_ignore, NULL, NULL, 0 }, /* lg */ 473 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ 474 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ 475 { roff_unsupp, NULL, NULL, 0 }, /* lnr */ 476 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ 477 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ 478 { roff_line_ignore, NULL, NULL, 0 }, /* ls */ 479 { roff_unsupp, NULL, NULL, 0 }, /* lsm */ 480 { roff_line_ignore, NULL, NULL, 0 }, /* lt */ 481 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ 482 { roff_line_ignore, NULL, NULL, 0 }, /* minss */ 483 { roff_line_ignore, NULL, NULL, 0 }, /* mk */ 484 { roff_insec, NULL, NULL, 0 }, /* mso */ 485 { roff_line_ignore, NULL, NULL, 0 }, /* na */ 486 { roff_line_ignore, NULL, NULL, 0 }, /* ne */ 487 { roff_line_ignore, NULL, NULL, 0 }, /* nh */ 488 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ 489 { roff_unsupp, NULL, NULL, 0 }, /* nm */ 490 { roff_unsupp, NULL, NULL, 0 }, /* nn */ 491 { roff_unsupp, NULL, NULL, 0 }, /* nop */ 492 { roff_nr, NULL, NULL, 0 }, /* nr */ 493 { roff_unsupp, NULL, NULL, 0 }, /* nrf */ 494 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ 495 { roff_line_ignore, NULL, NULL, 0 }, /* ns */ 496 { roff_insec, NULL, NULL, 0 }, /* nx */ 497 { roff_insec, NULL, NULL, 0 }, /* open */ 498 { roff_insec, NULL, NULL, 0 }, /* opena */ 499 { roff_line_ignore, NULL, NULL, 0 }, /* os */ 500 { roff_unsupp, NULL, NULL, 0 }, /* output */ 501 { roff_line_ignore, NULL, NULL, 0 }, /* padj */ 502 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ 503 { roff_line_ignore, NULL, NULL, 0 }, /* pc */ 504 { roff_line_ignore, NULL, NULL, 0 }, /* pev */ 505 { roff_insec, NULL, NULL, 0 }, /* pi */ 506 { roff_unsupp, NULL, NULL, 0 }, /* PI */ 507 { roff_line_ignore, NULL, NULL, 0 }, /* pl */ 508 { roff_line_ignore, NULL, NULL, 0 }, /* pm */ 509 { roff_line_ignore, NULL, NULL, 0 }, /* pn */ 510 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ 511 { roff_line_ignore, NULL, NULL, 0 }, /* ps */ 512 { roff_unsupp, NULL, NULL, 0 }, /* psbb */ 513 { roff_unsupp, NULL, NULL, 0 }, /* pshape */ 514 { roff_insec, NULL, NULL, 0 }, /* pso */ 515 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ 516 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ 517 { roff_unsupp, NULL, NULL, 0 }, /* rchar */ 518 { roff_line_ignore, NULL, NULL, 0 }, /* rd */ 519 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ 520 { roff_unsupp, NULL, NULL, 0 }, /* return */ 521 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ 522 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ 523 { roff_rm, NULL, NULL, 0 }, /* rm */ 524 { roff_rn, NULL, NULL, 0 }, /* rn */ 525 { roff_unsupp, NULL, NULL, 0 }, /* rnn */ 526 { roff_rr, NULL, NULL, 0 }, /* rr */ 527 { roff_line_ignore, NULL, NULL, 0 }, /* rs */ 528 { roff_line_ignore, NULL, NULL, 0 }, /* rt */ 529 { roff_unsupp, NULL, NULL, 0 }, /* schar */ 530 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ 531 { roff_line_ignore, NULL, NULL, 0 }, /* shc */ 532 { roff_unsupp, NULL, NULL, 0 }, /* shift */ 533 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ 534 { roff_so, NULL, NULL, 0 }, /* so */ 535 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ 536 { roff_line_ignore, NULL, NULL, 0 }, /* special */ 537 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ 538 { roff_line_ignore, NULL, NULL, 0 }, /* ss */ 539 { roff_line_ignore, NULL, NULL, 0 }, /* sty */ 540 { roff_unsupp, NULL, NULL, 0 }, /* substring */ 541 { roff_line_ignore, NULL, NULL, 0 }, /* sv */ 542 { roff_insec, NULL, NULL, 0 }, /* sy */ 543 { roff_T_, NULL, NULL, 0 }, /* T& */ 544 { roff_unsupp, NULL, NULL, 0 }, /* tc */ 545 { roff_TE, NULL, NULL, 0 }, /* TE */ 546 { roff_Dd, NULL, NULL, 0 }, /* TH */ 547 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ 548 { roff_unsupp, NULL, NULL, 0 }, /* tl */ 549 { roff_line_ignore, NULL, NULL, 0 }, /* tm */ 550 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ 551 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ 552 { roff_tr, NULL, NULL, 0 }, /* tr */ 553 { roff_line_ignore, NULL, NULL, 0 }, /* track */ 554 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ 555 { roff_insec, NULL, NULL, 0 }, /* trf */ 556 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ 557 { roff_unsupp, NULL, NULL, 0 }, /* trin */ 558 { roff_unsupp, NULL, NULL, 0 }, /* trnt */ 559 { roff_line_ignore, NULL, NULL, 0 }, /* troff */ 560 { roff_TS, NULL, NULL, 0 }, /* TS */ 561 { roff_line_ignore, NULL, NULL, 0 }, /* uf */ 562 { roff_line_ignore, NULL, NULL, 0 }, /* ul */ 563 { roff_unsupp, NULL, NULL, 0 }, /* unformat */ 564 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ 565 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ 566 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ 567 { roff_line_ignore, NULL, NULL, 0 }, /* vs */ 568 { roff_line_ignore, NULL, NULL, 0 }, /* warn */ 569 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ 570 { roff_line_ignore, NULL, NULL, 0 }, /* watch */ 571 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ 572 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ 573 { roff_unsupp, NULL, NULL, 0 }, /* wh */ 574 { roff_unsupp, NULL, NULL, 0 }, /* while */ 575 { roff_insec, NULL, NULL, 0 }, /* write */ 576 { roff_insec, NULL, NULL, 0 }, /* writec */ 577 { roff_insec, NULL, NULL, 0 }, /* writem */ 578 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ 579 { roff_cblock, NULL, NULL, 0 }, /* . */ 580 { roff_renamed, NULL, NULL, 0 }, 581 { roff_userdef, NULL, NULL, 0 } 582 }; 583 584 /* Array of injected predefined strings. */ 585 #define PREDEFS_MAX 38 586 static const struct predef predefs[PREDEFS_MAX] = { 587 #include "predefs.in" 588 }; 589 590 static int roffce_lines; /* number of input lines to center */ 591 static struct roff_node *roffce_node; /* active request */ 592 static int roffit_lines; /* number of lines to delay */ 593 static char *roffit_macro; /* nil-terminated macro line */ 594 595 596 /* --- request table ------------------------------------------------------ */ 597 598 struct ohash * 599 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) 600 { 601 struct ohash *htab; 602 struct roffreq *req; 603 enum roff_tok tok; 604 size_t sz; 605 unsigned int slot; 606 607 htab = mandoc_malloc(sizeof(*htab)); 608 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); 609 610 for (tok = mintok; tok < maxtok; tok++) { 611 if (roff_name[tok] == NULL) 612 continue; 613 sz = strlen(roff_name[tok]); 614 req = mandoc_malloc(sizeof(*req) + sz + 1); 615 req->tok = tok; 616 memcpy(req->name, roff_name[tok], sz + 1); 617 slot = ohash_qlookup(htab, req->name); 618 ohash_insert(htab, slot, req); 619 } 620 return htab; 621 } 622 623 void 624 roffhash_free(struct ohash *htab) 625 { 626 struct roffreq *req; 627 unsigned int slot; 628 629 if (htab == NULL) 630 return; 631 for (req = ohash_first(htab, &slot); req != NULL; 632 req = ohash_next(htab, &slot)) 633 free(req); 634 ohash_delete(htab); 635 free(htab); 636 } 637 638 enum roff_tok 639 roffhash_find(struct ohash *htab, const char *name, size_t sz) 640 { 641 struct roffreq *req; 642 const char *end; 643 644 if (sz) { 645 end = name + sz; 646 req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); 647 } else 648 req = ohash_find(htab, ohash_qlookup(htab, name)); 649 return req == NULL ? TOKEN_NONE : req->tok; 650 } 651 652 /* --- stack of request blocks -------------------------------------------- */ 653 654 /* 655 * Pop the current node off of the stack of roff instructions currently 656 * pending. 657 */ 658 static void 659 roffnode_pop(struct roff *r) 660 { 661 struct roffnode *p; 662 663 assert(r->last); 664 p = r->last; 665 666 r->last = r->last->parent; 667 free(p->name); 668 free(p->end); 669 free(p); 670 } 671 672 /* 673 * Push a roff node onto the instruction stack. This must later be 674 * removed with roffnode_pop(). 675 */ 676 static void 677 roffnode_push(struct roff *r, enum roff_tok tok, const char *name, 678 int line, int col) 679 { 680 struct roffnode *p; 681 682 p = mandoc_calloc(1, sizeof(struct roffnode)); 683 p->tok = tok; 684 if (name) 685 p->name = mandoc_strdup(name); 686 p->parent = r->last; 687 p->line = line; 688 p->col = col; 689 p->rule = p->parent ? p->parent->rule : 0; 690 691 r->last = p; 692 } 693 694 /* --- roff parser state data management ---------------------------------- */ 695 696 static void 697 roff_free1(struct roff *r) 698 { 699 struct tbl_node *tbl; 700 int i; 701 702 while (NULL != (tbl = r->first_tbl)) { 703 r->first_tbl = tbl->next; 704 tbl_free(tbl); 705 } 706 r->first_tbl = r->last_tbl = r->tbl = NULL; 707 708 if (r->last_eqn != NULL) 709 eqn_free(r->last_eqn); 710 r->last_eqn = r->eqn = NULL; 711 712 while (r->last) 713 roffnode_pop(r); 714 715 free (r->rstack); 716 r->rstack = NULL; 717 r->rstacksz = 0; 718 r->rstackpos = -1; 719 720 roff_freereg(r->regtab); 721 r->regtab = NULL; 722 723 roff_freestr(r->strtab); 724 roff_freestr(r->rentab); 725 roff_freestr(r->xmbtab); 726 r->strtab = r->rentab = r->xmbtab = NULL; 727 728 if (r->xtab) 729 for (i = 0; i < 128; i++) 730 free(r->xtab[i].p); 731 free(r->xtab); 732 r->xtab = NULL; 733 } 734 735 void 736 roff_reset(struct roff *r) 737 { 738 roff_free1(r); 739 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 740 r->control = '\0'; 741 r->escape = '\\'; 742 roffce_lines = 0; 743 roffce_node = NULL; 744 roffit_lines = 0; 745 roffit_macro = NULL; 746 } 747 748 void 749 roff_free(struct roff *r) 750 { 751 roff_free1(r); 752 roffhash_free(r->reqtab); 753 free(r); 754 } 755 756 struct roff * 757 roff_alloc(struct mparse *parse, int options) 758 { 759 struct roff *r; 760 761 r = mandoc_calloc(1, sizeof(struct roff)); 762 r->parse = parse; 763 r->reqtab = roffhash_alloc(0, ROFF_RENAMED); 764 r->options = options; 765 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 766 r->rstackpos = -1; 767 r->escape = '\\'; 768 return r; 769 } 770 771 /* --- syntax tree state data management ---------------------------------- */ 772 773 static void 774 roff_man_free1(struct roff_man *man) 775 { 776 777 if (man->first != NULL) 778 roff_node_delete(man, man->first); 779 free(man->meta.msec); 780 free(man->meta.vol); 781 free(man->meta.os); 782 free(man->meta.arch); 783 free(man->meta.title); 784 free(man->meta.name); 785 free(man->meta.date); 786 } 787 788 static void 789 roff_man_alloc1(struct roff_man *man) 790 { 791 792 memset(&man->meta, 0, sizeof(man->meta)); 793 man->first = mandoc_calloc(1, sizeof(*man->first)); 794 man->first->type = ROFFT_ROOT; 795 man->last = man->first; 796 man->last_es = NULL; 797 man->flags = 0; 798 man->macroset = MACROSET_NONE; 799 man->lastsec = man->lastnamed = SEC_NONE; 800 man->next = ROFF_NEXT_CHILD; 801 } 802 803 void 804 roff_man_reset(struct roff_man *man) 805 { 806 807 roff_man_free1(man); 808 roff_man_alloc1(man); 809 } 810 811 void 812 roff_man_free(struct roff_man *man) 813 { 814 815 roff_man_free1(man); 816 free(man); 817 } 818 819 struct roff_man * 820 roff_man_alloc(struct roff *roff, struct mparse *parse, 821 const char *os_s, int quick) 822 { 823 struct roff_man *man; 824 825 man = mandoc_calloc(1, sizeof(*man)); 826 man->parse = parse; 827 man->roff = roff; 828 man->os_s = os_s; 829 man->quick = quick; 830 roff_man_alloc1(man); 831 roff->man = man; 832 return man; 833 } 834 835 /* --- syntax tree handling ----------------------------------------------- */ 836 837 struct roff_node * 838 roff_node_alloc(struct roff_man *man, int line, int pos, 839 enum roff_type type, int tok) 840 { 841 struct roff_node *n; 842 843 n = mandoc_calloc(1, sizeof(*n)); 844 n->line = line; 845 n->pos = pos; 846 n->tok = tok; 847 n->type = type; 848 n->sec = man->lastsec; 849 850 if (man->flags & MDOC_SYNOPSIS) 851 n->flags |= NODE_SYNPRETTY; 852 else 853 n->flags &= ~NODE_SYNPRETTY; 854 if (man->flags & MDOC_NEWLINE) 855 n->flags |= NODE_LINE; 856 man->flags &= ~MDOC_NEWLINE; 857 858 return n; 859 } 860 861 void 862 roff_node_append(struct roff_man *man, struct roff_node *n) 863 { 864 865 switch (man->next) { 866 case ROFF_NEXT_SIBLING: 867 if (man->last->next != NULL) { 868 n->next = man->last->next; 869 man->last->next->prev = n; 870 } else 871 man->last->parent->last = n; 872 man->last->next = n; 873 n->prev = man->last; 874 n->parent = man->last->parent; 875 break; 876 case ROFF_NEXT_CHILD: 877 if (man->last->child != NULL) { 878 n->next = man->last->child; 879 man->last->child->prev = n; 880 } else 881 man->last->last = n; 882 man->last->child = n; 883 n->parent = man->last; 884 break; 885 default: 886 abort(); 887 } 888 man->last = n; 889 890 switch (n->type) { 891 case ROFFT_HEAD: 892 n->parent->head = n; 893 break; 894 case ROFFT_BODY: 895 if (n->end != ENDBODY_NOT) 896 return; 897 n->parent->body = n; 898 break; 899 case ROFFT_TAIL: 900 n->parent->tail = n; 901 break; 902 default: 903 return; 904 } 905 906 /* 907 * Copy over the normalised-data pointer of our parent. Not 908 * everybody has one, but copying a null pointer is fine. 909 */ 910 911 n->norm = n->parent->norm; 912 assert(n->parent->type == ROFFT_BLOCK); 913 } 914 915 void 916 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 917 { 918 struct roff_node *n; 919 920 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 921 n->string = roff_strdup(man->roff, word); 922 roff_node_append(man, n); 923 n->flags |= NODE_VALID | NODE_ENDED; 924 man->next = ROFF_NEXT_SIBLING; 925 } 926 927 void 928 roff_word_append(struct roff_man *man, const char *word) 929 { 930 struct roff_node *n; 931 char *addstr, *newstr; 932 933 n = man->last; 934 addstr = roff_strdup(man->roff, word); 935 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 936 free(addstr); 937 free(n->string); 938 n->string = newstr; 939 man->next = ROFF_NEXT_SIBLING; 940 } 941 942 void 943 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 944 { 945 struct roff_node *n; 946 947 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 948 roff_node_append(man, n); 949 man->next = ROFF_NEXT_CHILD; 950 } 951 952 struct roff_node * 953 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 954 { 955 struct roff_node *n; 956 957 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 958 roff_node_append(man, n); 959 man->next = ROFF_NEXT_CHILD; 960 return n; 961 } 962 963 struct roff_node * 964 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 965 { 966 struct roff_node *n; 967 968 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 969 roff_node_append(man, n); 970 man->next = ROFF_NEXT_CHILD; 971 return n; 972 } 973 974 struct roff_node * 975 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 976 { 977 struct roff_node *n; 978 979 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 980 roff_node_append(man, n); 981 man->next = ROFF_NEXT_CHILD; 982 return n; 983 } 984 985 static void 986 roff_addtbl(struct roff_man *man, struct tbl_node *tbl) 987 { 988 struct roff_node *n; 989 const struct tbl_span *span; 990 991 if (man->macroset == MACROSET_MAN) 992 man_breakscope(man, ROFF_TS); 993 while ((span = tbl_span(tbl)) != NULL) { 994 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE); 995 n->span = span; 996 roff_node_append(man, n); 997 n->flags |= NODE_VALID | NODE_ENDED; 998 man->next = ROFF_NEXT_SIBLING; 999 } 1000 } 1001 1002 void 1003 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1004 { 1005 1006 /* Adjust siblings. */ 1007 1008 if (n->prev) 1009 n->prev->next = n->next; 1010 if (n->next) 1011 n->next->prev = n->prev; 1012 1013 /* Adjust parent. */ 1014 1015 if (n->parent != NULL) { 1016 if (n->parent->child == n) 1017 n->parent->child = n->next; 1018 if (n->parent->last == n) 1019 n->parent->last = n->prev; 1020 } 1021 1022 /* Adjust parse point. */ 1023 1024 if (man == NULL) 1025 return; 1026 if (man->last == n) { 1027 if (n->prev == NULL) { 1028 man->last = n->parent; 1029 man->next = ROFF_NEXT_CHILD; 1030 } else { 1031 man->last = n->prev; 1032 man->next = ROFF_NEXT_SIBLING; 1033 } 1034 } 1035 if (man->first == n) 1036 man->first = NULL; 1037 } 1038 1039 void 1040 roff_node_free(struct roff_node *n) 1041 { 1042 1043 if (n->args != NULL) 1044 mdoc_argv_free(n->args); 1045 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1046 free(n->norm); 1047 if (n->eqn != NULL) 1048 eqn_box_free(n->eqn); 1049 free(n->string); 1050 free(n); 1051 } 1052 1053 void 1054 roff_node_delete(struct roff_man *man, struct roff_node *n) 1055 { 1056 1057 while (n->child != NULL) 1058 roff_node_delete(man, n->child); 1059 roff_node_unlink(man, n); 1060 roff_node_free(n); 1061 } 1062 1063 void 1064 deroff(char **dest, const struct roff_node *n) 1065 { 1066 char *cp; 1067 size_t sz; 1068 1069 if (n->type != ROFFT_TEXT) { 1070 for (n = n->child; n != NULL; n = n->next) 1071 deroff(dest, n); 1072 return; 1073 } 1074 1075 /* Skip leading whitespace. */ 1076 1077 for (cp = n->string; *cp != '\0'; cp++) { 1078 if (cp[0] == '\\' && cp[1] != '\0' && 1079 strchr(" %&0^|~", cp[1]) != NULL) 1080 cp++; 1081 else if ( ! isspace((unsigned char)*cp)) 1082 break; 1083 } 1084 1085 /* Skip trailing backslash. */ 1086 1087 sz = strlen(cp); 1088 if (sz > 0 && cp[sz - 1] == '\\') 1089 sz--; 1090 1091 /* Skip trailing whitespace. */ 1092 1093 for (; sz; sz--) 1094 if ( ! isspace((unsigned char)cp[sz-1])) 1095 break; 1096 1097 /* Skip empty strings. */ 1098 1099 if (sz == 0) 1100 return; 1101 1102 if (*dest == NULL) { 1103 *dest = mandoc_strndup(cp, sz); 1104 return; 1105 } 1106 1107 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1108 free(*dest); 1109 *dest = cp; 1110 } 1111 1112 /* --- main functions of the roff parser ---------------------------------- */ 1113 1114 /* 1115 * In the current line, expand escape sequences that tend to get 1116 * used in numerical expressions and conditional requests. 1117 * Also check the syntax of the remaining escape sequences. 1118 */ 1119 static enum rofferr 1120 roff_res(struct roff *r, struct buf *buf, int ln, int pos) 1121 { 1122 char ubuf[24]; /* buffer to print the number */ 1123 struct roff_node *n; /* used for header comments */ 1124 const char *start; /* start of the string to process */ 1125 char *stesc; /* start of an escape sequence ('\\') */ 1126 char *ep; /* end of comment string */ 1127 const char *stnam; /* start of the name, after "[(*" */ 1128 const char *cp; /* end of the name, e.g. before ']' */ 1129 const char *res; /* the string to be substituted */ 1130 char *nbuf; /* new buffer to copy buf->buf to */ 1131 size_t maxl; /* expected length of the escape name */ 1132 size_t naml; /* actual length of the escape name */ 1133 enum mandoc_esc esc; /* type of the escape sequence */ 1134 int inaml; /* length returned from mandoc_escape() */ 1135 int expand_count; /* to avoid infinite loops */ 1136 int npos; /* position in numeric expression */ 1137 int arg_complete; /* argument not interrupted by eol */ 1138 int done; /* no more input available */ 1139 int deftype; /* type of definition to paste */ 1140 int rcsid; /* kind of RCS id seen */ 1141 char sign; /* increment number register */ 1142 char term; /* character terminating the escape */ 1143 1144 /* Search forward for comments. */ 1145 1146 done = 0; 1147 start = buf->buf + pos; 1148 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { 1149 if (stesc[0] != r->escape || stesc[1] == '\0') 1150 continue; 1151 stesc++; 1152 if (*stesc != '"' && *stesc != '#') 1153 continue; 1154 1155 /* Comment found, look for RCS id. */ 1156 1157 rcsid = 0; 1158 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) { 1159 rcsid = 1 << MANDOC_OS_OPENBSD; 1160 cp += 8; 1161 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) { 1162 rcsid = 1 << MANDOC_OS_NETBSD; 1163 cp += 7; 1164 } 1165 if (cp != NULL && 1166 isalnum((unsigned char)*cp) == 0 && 1167 strchr(cp, '$') != NULL) { 1168 if (r->man->meta.rcsids & rcsid) 1169 mandoc_msg(MANDOCERR_RCS_REP, r->parse, 1170 ln, stesc + 1 - buf->buf, stesc + 1); 1171 r->man->meta.rcsids |= rcsid; 1172 } 1173 1174 /* Handle trailing whitespace. */ 1175 1176 ep = strchr(stesc--, '\0') - 1; 1177 if (*ep == '\n') { 1178 done = 1; 1179 ep--; 1180 } 1181 if (*ep == ' ' || *ep == '\t') 1182 mandoc_msg(MANDOCERR_SPACE_EOL, r->parse, 1183 ln, ep - buf->buf, NULL); 1184 1185 /* 1186 * Save comments preceding the title macro 1187 * in the syntax tree. 1188 */ 1189 1190 if (r->format == 0) { 1191 while (*ep == ' ' || *ep == '\t') 1192 ep--; 1193 ep[1] = '\0'; 1194 n = roff_node_alloc(r->man, 1195 ln, stesc + 1 - buf->buf, 1196 ROFFT_COMMENT, TOKEN_NONE); 1197 n->string = mandoc_strdup(stesc + 2); 1198 roff_node_append(r->man, n); 1199 n->flags |= NODE_VALID | NODE_ENDED; 1200 r->man->next = ROFF_NEXT_SIBLING; 1201 } 1202 1203 /* Discard comments. */ 1204 1205 while (stesc > start && stesc[-1] == ' ') 1206 stesc--; 1207 *stesc = '\0'; 1208 break; 1209 } 1210 if (stesc == start) 1211 return ROFF_CONT; 1212 stesc--; 1213 1214 /* Notice the end of the input. */ 1215 1216 if (*stesc == '\n') { 1217 *stesc-- = '\0'; 1218 done = 1; 1219 } 1220 1221 expand_count = 0; 1222 while (stesc >= start) { 1223 1224 /* Search backwards for the next backslash. */ 1225 1226 if (*stesc != r->escape) { 1227 if (*stesc == '\\') { 1228 *stesc = '\0'; 1229 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", 1230 buf->buf, stesc + 1) + 1; 1231 start = nbuf + pos; 1232 stesc = nbuf + (stesc - buf->buf); 1233 free(buf->buf); 1234 buf->buf = nbuf; 1235 } 1236 stesc--; 1237 continue; 1238 } 1239 1240 /* If it is escaped, skip it. */ 1241 1242 for (cp = stesc - 1; cp >= start; cp--) 1243 if (*cp != r->escape) 1244 break; 1245 1246 if ((stesc - cp) % 2 == 0) { 1247 while (stesc > cp) 1248 *stesc-- = '\\'; 1249 continue; 1250 } else if (stesc[1] != '\0') { 1251 *stesc = '\\'; 1252 } else { 1253 *stesc-- = '\0'; 1254 if (done) 1255 continue; 1256 else 1257 return ROFF_APPEND; 1258 } 1259 1260 /* Decide whether to expand or to check only. */ 1261 1262 term = '\0'; 1263 cp = stesc + 1; 1264 switch (*cp) { 1265 case '*': 1266 res = NULL; 1267 break; 1268 case 'B': 1269 case 'w': 1270 term = cp[1]; 1271 /* FALLTHROUGH */ 1272 case 'n': 1273 sign = cp[1]; 1274 if (sign == '+' || sign == '-') 1275 cp++; 1276 res = ubuf; 1277 break; 1278 default: 1279 esc = mandoc_escape(&cp, &stnam, &inaml); 1280 if (esc == ESCAPE_ERROR || 1281 (esc == ESCAPE_SPECIAL && 1282 mchars_spec2cp(stnam, inaml) < 0)) 1283 mandoc_vmsg(MANDOCERR_ESC_BAD, 1284 r->parse, ln, (int)(stesc - buf->buf), 1285 "%.*s", (int)(cp - stesc), stesc); 1286 stesc--; 1287 continue; 1288 } 1289 1290 if (EXPAND_LIMIT < ++expand_count) { 1291 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, 1292 ln, (int)(stesc - buf->buf), NULL); 1293 return ROFF_IGN; 1294 } 1295 1296 /* 1297 * The third character decides the length 1298 * of the name of the string or register. 1299 * Save a pointer to the name. 1300 */ 1301 1302 if (term == '\0') { 1303 switch (*++cp) { 1304 case '\0': 1305 maxl = 0; 1306 break; 1307 case '(': 1308 cp++; 1309 maxl = 2; 1310 break; 1311 case '[': 1312 cp++; 1313 term = ']'; 1314 maxl = 0; 1315 break; 1316 default: 1317 maxl = 1; 1318 break; 1319 } 1320 } else { 1321 cp += 2; 1322 maxl = 0; 1323 } 1324 stnam = cp; 1325 1326 /* Advance to the end of the name. */ 1327 1328 naml = 0; 1329 arg_complete = 1; 1330 while (maxl == 0 || naml < maxl) { 1331 if (*cp == '\0') { 1332 mandoc_msg(MANDOCERR_ESC_BAD, r->parse, 1333 ln, (int)(stesc - buf->buf), stesc); 1334 arg_complete = 0; 1335 break; 1336 } 1337 if (maxl == 0 && *cp == term) { 1338 cp++; 1339 break; 1340 } 1341 if (*cp++ != '\\' || stesc[1] != 'w') { 1342 naml++; 1343 continue; 1344 } 1345 switch (mandoc_escape(&cp, NULL, NULL)) { 1346 case ESCAPE_SPECIAL: 1347 case ESCAPE_UNICODE: 1348 case ESCAPE_NUMBERED: 1349 case ESCAPE_OVERSTRIKE: 1350 naml++; 1351 break; 1352 default: 1353 break; 1354 } 1355 } 1356 1357 /* 1358 * Retrieve the replacement string; if it is 1359 * undefined, resume searching for escapes. 1360 */ 1361 1362 switch (stesc[1]) { 1363 case '*': 1364 if (arg_complete) { 1365 deftype = ROFFDEF_USER | ROFFDEF_PRE; 1366 res = roff_getstrn(r, stnam, naml, &deftype); 1367 } 1368 break; 1369 case 'B': 1370 npos = 0; 1371 ubuf[0] = arg_complete && 1372 roff_evalnum(r, ln, stnam, &npos, 1373 NULL, ROFFNUM_SCALE) && 1374 stnam + npos + 1 == cp ? '1' : '0'; 1375 ubuf[1] = '\0'; 1376 break; 1377 case 'n': 1378 if (arg_complete) 1379 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1380 roff_getregn(r, stnam, naml, sign)); 1381 else 1382 ubuf[0] = '\0'; 1383 break; 1384 case 'w': 1385 /* use even incomplete args */ 1386 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1387 24 * (int)naml); 1388 break; 1389 } 1390 1391 if (res == NULL) { 1392 mandoc_vmsg(MANDOCERR_STR_UNDEF, 1393 r->parse, ln, (int)(stesc - buf->buf), 1394 "%.*s", (int)naml, stnam); 1395 res = ""; 1396 } else if (buf->sz + strlen(res) > SHRT_MAX) { 1397 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, 1398 ln, (int)(stesc - buf->buf), NULL); 1399 return ROFF_IGN; 1400 } 1401 1402 /* Replace the escape sequence by the string. */ 1403 1404 *stesc = '\0'; 1405 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", 1406 buf->buf, res, cp) + 1; 1407 1408 /* Prepare for the next replacement. */ 1409 1410 start = nbuf + pos; 1411 stesc = nbuf + (stesc - buf->buf) + strlen(res); 1412 free(buf->buf); 1413 buf->buf = nbuf; 1414 } 1415 return ROFF_CONT; 1416 } 1417 1418 /* 1419 * Process text streams. 1420 */ 1421 static enum rofferr 1422 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) 1423 { 1424 size_t sz; 1425 const char *start; 1426 char *p; 1427 int isz; 1428 enum mandoc_esc esc; 1429 1430 /* Spring the input line trap. */ 1431 1432 if (roffit_lines == 1) { 1433 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1434 free(buf->buf); 1435 buf->buf = p; 1436 buf->sz = isz + 1; 1437 *offs = 0; 1438 free(roffit_macro); 1439 roffit_lines = 0; 1440 return ROFF_REPARSE; 1441 } else if (roffit_lines > 1) 1442 --roffit_lines; 1443 1444 if (roffce_node != NULL && buf->buf[pos] != '\0') { 1445 if (roffce_lines < 1) { 1446 r->man->last = roffce_node; 1447 r->man->next = ROFF_NEXT_SIBLING; 1448 roffce_lines = 0; 1449 roffce_node = NULL; 1450 } else 1451 roffce_lines--; 1452 } 1453 1454 /* Convert all breakable hyphens into ASCII_HYPH. */ 1455 1456 start = p = buf->buf + pos; 1457 1458 while (*p != '\0') { 1459 sz = strcspn(p, "-\\"); 1460 p += sz; 1461 1462 if (*p == '\0') 1463 break; 1464 1465 if (*p == '\\') { 1466 /* Skip over escapes. */ 1467 p++; 1468 esc = mandoc_escape((const char **)&p, NULL, NULL); 1469 if (esc == ESCAPE_ERROR) 1470 break; 1471 while (*p == '-') 1472 p++; 1473 continue; 1474 } else if (p == start) { 1475 p++; 1476 continue; 1477 } 1478 1479 if (isalpha((unsigned char)p[-1]) && 1480 isalpha((unsigned char)p[1])) 1481 *p = ASCII_HYPH; 1482 p++; 1483 } 1484 return ROFF_CONT; 1485 } 1486 1487 enum rofferr 1488 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) 1489 { 1490 enum roff_tok t; 1491 enum rofferr e; 1492 int pos; /* parse point */ 1493 int spos; /* saved parse point for messages */ 1494 int ppos; /* original offset in buf->buf */ 1495 int ctl; /* macro line (boolean) */ 1496 1497 ppos = pos = *offs; 1498 1499 /* Handle in-line equation delimiters. */ 1500 1501 if (r->tbl == NULL && 1502 r->last_eqn != NULL && r->last_eqn->delim && 1503 (r->eqn == NULL || r->eqn_inline)) { 1504 e = roff_eqndelim(r, buf, pos); 1505 if (e == ROFF_REPARSE) 1506 return e; 1507 assert(e == ROFF_CONT); 1508 } 1509 1510 /* Expand some escape sequences. */ 1511 1512 e = roff_res(r, buf, ln, pos); 1513 if (e == ROFF_IGN || e == ROFF_APPEND) 1514 return e; 1515 assert(e == ROFF_CONT); 1516 1517 ctl = roff_getcontrol(r, buf->buf, &pos); 1518 1519 /* 1520 * First, if a scope is open and we're not a macro, pass the 1521 * text through the macro's filter. 1522 * Equations process all content themselves. 1523 * Tables process almost all content themselves, but we want 1524 * to warn about macros before passing it there. 1525 */ 1526 1527 if (r->last != NULL && ! ctl) { 1528 t = r->last->tok; 1529 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1530 if (e == ROFF_IGN) 1531 return e; 1532 assert(e == ROFF_CONT); 1533 } 1534 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { 1535 eqn_read(r->eqn, buf->buf + ppos); 1536 return ROFF_IGN; 1537 } 1538 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { 1539 tbl_read(r->tbl, ln, buf->buf, ppos); 1540 roff_addtbl(r->man, r->tbl); 1541 return ROFF_IGN; 1542 } 1543 if ( ! ctl) 1544 return roff_parsetext(r, buf, pos, offs); 1545 1546 /* Skip empty request lines. */ 1547 1548 if (buf->buf[pos] == '"') { 1549 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse, 1550 ln, pos, NULL); 1551 return ROFF_IGN; 1552 } else if (buf->buf[pos] == '\0') 1553 return ROFF_IGN; 1554 1555 /* 1556 * If a scope is open, go to the child handler for that macro, 1557 * as it may want to preprocess before doing anything with it. 1558 * Don't do so if an equation is open. 1559 */ 1560 1561 if (r->last) { 1562 t = r->last->tok; 1563 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1564 } 1565 1566 /* No scope is open. This is a new request or macro. */ 1567 1568 spos = pos; 1569 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1570 1571 /* Tables ignore most macros. */ 1572 1573 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS || 1574 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) { 1575 mandoc_msg(MANDOCERR_TBLMACRO, r->parse, 1576 ln, pos, buf->buf + spos); 1577 if (t != TOKEN_NONE) 1578 return ROFF_IGN; 1579 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1580 pos++; 1581 while (buf->buf[pos] == ' ') 1582 pos++; 1583 tbl_read(r->tbl, ln, buf->buf, pos); 1584 roff_addtbl(r->man, r->tbl); 1585 return ROFF_IGN; 1586 } 1587 1588 /* For now, let high level macros abort .ce mode. */ 1589 1590 if (ctl && roffce_node != NULL && 1591 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 1592 t == ROFF_TH || t == ROFF_TS)) { 1593 r->man->last = roffce_node; 1594 r->man->next = ROFF_NEXT_SIBLING; 1595 roffce_lines = 0; 1596 roffce_node = NULL; 1597 } 1598 1599 /* 1600 * This is neither a roff request nor a user-defined macro. 1601 * Let the standard macro set parsers handle it. 1602 */ 1603 1604 if (t == TOKEN_NONE) 1605 return ROFF_CONT; 1606 1607 /* Execute a roff request or a user defined macro. */ 1608 1609 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); 1610 } 1611 1612 void 1613 roff_endparse(struct roff *r) 1614 { 1615 if (r->last != NULL) 1616 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, 1617 r->last->line, r->last->col, 1618 roff_name[r->last->tok]); 1619 1620 if (r->eqn != NULL) { 1621 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, 1622 r->eqn->node->line, r->eqn->node->pos, "EQ"); 1623 eqn_parse(r->eqn); 1624 r->eqn = NULL; 1625 } 1626 1627 if (r->tbl != NULL) { 1628 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, 1629 r->tbl->line, r->tbl->pos, "TS"); 1630 tbl_end(r->tbl); 1631 r->tbl = NULL; 1632 } 1633 } 1634 1635 /* 1636 * Parse a roff node's type from the input buffer. This must be in the 1637 * form of ".foo xxx" in the usual way. 1638 */ 1639 static enum roff_tok 1640 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 1641 { 1642 char *cp; 1643 const char *mac; 1644 size_t maclen; 1645 int deftype; 1646 enum roff_tok t; 1647 1648 cp = buf + *pos; 1649 1650 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 1651 return TOKEN_NONE; 1652 1653 mac = cp; 1654 maclen = roff_getname(r, &cp, ln, ppos); 1655 1656 deftype = ROFFDEF_USER | ROFFDEF_REN; 1657 r->current_string = roff_getstrn(r, mac, maclen, &deftype); 1658 switch (deftype) { 1659 case ROFFDEF_USER: 1660 t = ROFF_USERDEF; 1661 break; 1662 case ROFFDEF_REN: 1663 t = ROFF_RENAMED; 1664 break; 1665 default: 1666 t = roffhash_find(r->reqtab, mac, maclen); 1667 break; 1668 } 1669 if (t != TOKEN_NONE) 1670 *pos = cp - buf; 1671 else if (deftype == ROFFDEF_UNDEF) { 1672 /* Using an undefined macro defines it to be empty. */ 1673 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); 1674 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); 1675 } 1676 return t; 1677 } 1678 1679 /* --- handling of request blocks ----------------------------------------- */ 1680 1681 static enum rofferr 1682 roff_cblock(ROFF_ARGS) 1683 { 1684 1685 /* 1686 * A block-close `..' should only be invoked as a child of an 1687 * ignore macro, otherwise raise a warning and just ignore it. 1688 */ 1689 1690 if (r->last == NULL) { 1691 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1692 ln, ppos, ".."); 1693 return ROFF_IGN; 1694 } 1695 1696 switch (r->last->tok) { 1697 case ROFF_am: 1698 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */ 1699 case ROFF_ami: 1700 case ROFF_de: 1701 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 1702 case ROFF_dei: 1703 case ROFF_ig: 1704 break; 1705 default: 1706 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1707 ln, ppos, ".."); 1708 return ROFF_IGN; 1709 } 1710 1711 if (buf->buf[pos] != '\0') 1712 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, 1713 ".. %s", buf->buf + pos); 1714 1715 roffnode_pop(r); 1716 roffnode_cleanscope(r); 1717 return ROFF_IGN; 1718 1719 } 1720 1721 static void 1722 roffnode_cleanscope(struct roff *r) 1723 { 1724 1725 while (r->last) { 1726 if (--r->last->endspan != 0) 1727 break; 1728 roffnode_pop(r); 1729 } 1730 } 1731 1732 static void 1733 roff_ccond(struct roff *r, int ln, int ppos) 1734 { 1735 1736 if (NULL == r->last) { 1737 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1738 ln, ppos, "\\}"); 1739 return; 1740 } 1741 1742 switch (r->last->tok) { 1743 case ROFF_el: 1744 case ROFF_ie: 1745 case ROFF_if: 1746 break; 1747 default: 1748 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1749 ln, ppos, "\\}"); 1750 return; 1751 } 1752 1753 if (r->last->endspan > -1) { 1754 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1755 ln, ppos, "\\}"); 1756 return; 1757 } 1758 1759 roffnode_pop(r); 1760 roffnode_cleanscope(r); 1761 return; 1762 } 1763 1764 static enum rofferr 1765 roff_block(ROFF_ARGS) 1766 { 1767 const char *name, *value; 1768 char *call, *cp, *iname, *rname; 1769 size_t csz, namesz, rsz; 1770 int deftype; 1771 1772 /* Ignore groff compatibility mode for now. */ 1773 1774 if (tok == ROFF_de1) 1775 tok = ROFF_de; 1776 else if (tok == ROFF_dei1) 1777 tok = ROFF_dei; 1778 else if (tok == ROFF_am1) 1779 tok = ROFF_am; 1780 else if (tok == ROFF_ami1) 1781 tok = ROFF_ami; 1782 1783 /* Parse the macro name argument. */ 1784 1785 cp = buf->buf + pos; 1786 if (tok == ROFF_ig) { 1787 iname = NULL; 1788 namesz = 0; 1789 } else { 1790 iname = cp; 1791 namesz = roff_getname(r, &cp, ln, ppos); 1792 iname[namesz] = '\0'; 1793 } 1794 1795 /* Resolve the macro name argument if it is indirect. */ 1796 1797 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 1798 deftype = ROFFDEF_USER; 1799 name = roff_getstrn(r, iname, namesz, &deftype); 1800 if (name == NULL) { 1801 mandoc_vmsg(MANDOCERR_STR_UNDEF, 1802 r->parse, ln, (int)(iname - buf->buf), 1803 "%.*s", (int)namesz, iname); 1804 namesz = 0; 1805 } else 1806 namesz = strlen(name); 1807 } else 1808 name = iname; 1809 1810 if (namesz == 0 && tok != ROFF_ig) { 1811 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, 1812 ln, ppos, roff_name[tok]); 1813 return ROFF_IGN; 1814 } 1815 1816 roffnode_push(r, tok, name, ln, ppos); 1817 1818 /* 1819 * At the beginning of a `de' macro, clear the existing string 1820 * with the same name, if there is one. New content will be 1821 * appended from roff_block_text() in multiline mode. 1822 */ 1823 1824 if (tok == ROFF_de || tok == ROFF_dei) { 1825 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 1826 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 1827 } else if (tok == ROFF_am || tok == ROFF_ami) { 1828 deftype = ROFFDEF_ANY; 1829 value = roff_getstrn(r, iname, namesz, &deftype); 1830 switch (deftype) { /* Before appending, ... */ 1831 case ROFFDEF_PRE: /* copy predefined to user-defined. */ 1832 roff_setstrn(&r->strtab, name, namesz, 1833 value, strlen(value), 0); 1834 break; 1835 case ROFFDEF_REN: /* call original standard macro. */ 1836 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 1837 (int)strlen(value), value); 1838 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 1839 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 1840 free(call); 1841 break; 1842 case ROFFDEF_STD: /* rename and call standard macro. */ 1843 rsz = mandoc_asprintf(&rname, "__%s_renamed", name); 1844 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); 1845 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 1846 (int)rsz, rname); 1847 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 1848 free(call); 1849 free(rname); 1850 break; 1851 default: 1852 break; 1853 } 1854 } 1855 1856 if (*cp == '\0') 1857 return ROFF_IGN; 1858 1859 /* Get the custom end marker. */ 1860 1861 iname = cp; 1862 namesz = roff_getname(r, &cp, ln, ppos); 1863 1864 /* Resolve the end marker if it is indirect. */ 1865 1866 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 1867 deftype = ROFFDEF_USER; 1868 name = roff_getstrn(r, iname, namesz, &deftype); 1869 if (name == NULL) { 1870 mandoc_vmsg(MANDOCERR_STR_UNDEF, 1871 r->parse, ln, (int)(iname - buf->buf), 1872 "%.*s", (int)namesz, iname); 1873 namesz = 0; 1874 } else 1875 namesz = strlen(name); 1876 } else 1877 name = iname; 1878 1879 if (namesz) 1880 r->last->end = mandoc_strndup(name, namesz); 1881 1882 if (*cp != '\0') 1883 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, 1884 ln, pos, ".%s ... %s", roff_name[tok], cp); 1885 1886 return ROFF_IGN; 1887 } 1888 1889 static enum rofferr 1890 roff_block_sub(ROFF_ARGS) 1891 { 1892 enum roff_tok t; 1893 int i, j; 1894 1895 /* 1896 * First check whether a custom macro exists at this level. If 1897 * it does, then check against it. This is some of groff's 1898 * stranger behaviours. If we encountered a custom end-scope 1899 * tag and that tag also happens to be a "real" macro, then we 1900 * need to try interpreting it again as a real macro. If it's 1901 * not, then return ignore. Else continue. 1902 */ 1903 1904 if (r->last->end) { 1905 for (i = pos, j = 0; r->last->end[j]; j++, i++) 1906 if (buf->buf[i] != r->last->end[j]) 1907 break; 1908 1909 if (r->last->end[j] == '\0' && 1910 (buf->buf[i] == '\0' || 1911 buf->buf[i] == ' ' || 1912 buf->buf[i] == '\t')) { 1913 roffnode_pop(r); 1914 roffnode_cleanscope(r); 1915 1916 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 1917 i++; 1918 1919 pos = i; 1920 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 1921 TOKEN_NONE) 1922 return ROFF_RERUN; 1923 return ROFF_IGN; 1924 } 1925 } 1926 1927 /* 1928 * If we have no custom end-query or lookup failed, then try 1929 * pulling it out of the hashtable. 1930 */ 1931 1932 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1933 1934 if (t != ROFF_cblock) { 1935 if (tok != ROFF_ig) 1936 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 1937 return ROFF_IGN; 1938 } 1939 1940 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 1941 } 1942 1943 static enum rofferr 1944 roff_block_text(ROFF_ARGS) 1945 { 1946 1947 if (tok != ROFF_ig) 1948 roff_setstr(r, r->last->name, buf->buf + pos, 2); 1949 1950 return ROFF_IGN; 1951 } 1952 1953 static enum rofferr 1954 roff_cond_sub(ROFF_ARGS) 1955 { 1956 enum roff_tok t; 1957 char *ep; 1958 int rr; 1959 1960 rr = r->last->rule; 1961 roffnode_cleanscope(r); 1962 1963 /* 1964 * If `\}' occurs on a macro line without a preceding macro, 1965 * drop the line completely. 1966 */ 1967 1968 ep = buf->buf + pos; 1969 if (ep[0] == '\\' && ep[1] == '}') 1970 rr = 0; 1971 1972 /* Always check for the closing delimiter `\}'. */ 1973 1974 while ((ep = strchr(ep, '\\')) != NULL) { 1975 switch (ep[1]) { 1976 case '}': 1977 memmove(ep, ep + 2, strlen(ep + 2) + 1); 1978 roff_ccond(r, ln, ep - buf->buf); 1979 break; 1980 case '\0': 1981 ++ep; 1982 break; 1983 default: 1984 ep += 2; 1985 break; 1986 } 1987 } 1988 1989 /* 1990 * Fully handle known macros when they are structurally 1991 * required or when the conditional evaluated to true. 1992 */ 1993 1994 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1995 return t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT) 1996 ? (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) : rr 1997 ? ROFF_CONT : ROFF_IGN; 1998 } 1999 2000 static enum rofferr 2001 roff_cond_text(ROFF_ARGS) 2002 { 2003 char *ep; 2004 int rr; 2005 2006 rr = r->last->rule; 2007 roffnode_cleanscope(r); 2008 2009 ep = buf->buf + pos; 2010 while ((ep = strchr(ep, '\\')) != NULL) { 2011 if (*(++ep) == '}') { 2012 *ep = '&'; 2013 roff_ccond(r, ln, ep - buf->buf - 1); 2014 } 2015 if (*ep != '\0') 2016 ++ep; 2017 } 2018 return rr ? ROFF_CONT : ROFF_IGN; 2019 } 2020 2021 /* --- handling of numeric and conditional expressions -------------------- */ 2022 2023 /* 2024 * Parse a single signed integer number. Stop at the first non-digit. 2025 * If there is at least one digit, return success and advance the 2026 * parse point, else return failure and let the parse point unchanged. 2027 * Ignore overflows, treat them just like the C language. 2028 */ 2029 static int 2030 roff_getnum(const char *v, int *pos, int *res, int flags) 2031 { 2032 int myres, scaled, n, p; 2033 2034 if (NULL == res) 2035 res = &myres; 2036 2037 p = *pos; 2038 n = v[p] == '-'; 2039 if (n || v[p] == '+') 2040 p++; 2041 2042 if (flags & ROFFNUM_WHITE) 2043 while (isspace((unsigned char)v[p])) 2044 p++; 2045 2046 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2047 *res = 10 * *res + v[p] - '0'; 2048 if (p == *pos + n) 2049 return 0; 2050 2051 if (n) 2052 *res = -*res; 2053 2054 /* Each number may be followed by one optional scaling unit. */ 2055 2056 switch (v[p]) { 2057 case 'f': 2058 scaled = *res * 65536; 2059 break; 2060 case 'i': 2061 scaled = *res * 240; 2062 break; 2063 case 'c': 2064 scaled = *res * 240 / 2.54; 2065 break; 2066 case 'v': 2067 case 'P': 2068 scaled = *res * 40; 2069 break; 2070 case 'm': 2071 case 'n': 2072 scaled = *res * 24; 2073 break; 2074 case 'p': 2075 scaled = *res * 10 / 3; 2076 break; 2077 case 'u': 2078 scaled = *res; 2079 break; 2080 case 'M': 2081 scaled = *res * 6 / 25; 2082 break; 2083 default: 2084 scaled = *res; 2085 p--; 2086 break; 2087 } 2088 if (flags & ROFFNUM_SCALE) 2089 *res = scaled; 2090 2091 *pos = p + 1; 2092 return 1; 2093 } 2094 2095 /* 2096 * Evaluate a string comparison condition. 2097 * The first character is the delimiter. 2098 * Succeed if the string up to its second occurrence 2099 * matches the string up to its third occurence. 2100 * Advance the cursor after the third occurrence 2101 * or lacking that, to the end of the line. 2102 */ 2103 static int 2104 roff_evalstrcond(const char *v, int *pos) 2105 { 2106 const char *s1, *s2, *s3; 2107 int match; 2108 2109 match = 0; 2110 s1 = v + *pos; /* initial delimiter */ 2111 s2 = s1 + 1; /* for scanning the first string */ 2112 s3 = strchr(s2, *s1); /* for scanning the second string */ 2113 2114 if (NULL == s3) /* found no middle delimiter */ 2115 goto out; 2116 2117 while ('\0' != *++s3) { 2118 if (*s2 != *s3) { /* mismatch */ 2119 s3 = strchr(s3, *s1); 2120 break; 2121 } 2122 if (*s3 == *s1) { /* found the final delimiter */ 2123 match = 1; 2124 break; 2125 } 2126 s2++; 2127 } 2128 2129 out: 2130 if (NULL == s3) 2131 s3 = strchr(s2, '\0'); 2132 else if (*s3 != '\0') 2133 s3++; 2134 *pos = s3 - v; 2135 return match; 2136 } 2137 2138 /* 2139 * Evaluate an optionally negated single character, numerical, 2140 * or string condition. 2141 */ 2142 static int 2143 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2144 { 2145 char *cp, *name; 2146 size_t sz; 2147 int deftype, number, savepos, istrue, wanttrue; 2148 2149 if ('!' == v[*pos]) { 2150 wanttrue = 0; 2151 (*pos)++; 2152 } else 2153 wanttrue = 1; 2154 2155 switch (v[*pos]) { 2156 case '\0': 2157 return 0; 2158 case 'n': 2159 case 'o': 2160 (*pos)++; 2161 return wanttrue; 2162 case 'c': 2163 case 'e': 2164 case 't': 2165 case 'v': 2166 (*pos)++; 2167 return !wanttrue; 2168 case 'd': 2169 case 'r': 2170 cp = v + *pos + 1; 2171 while (*cp == ' ') 2172 cp++; 2173 name = cp; 2174 sz = roff_getname(r, &cp, ln, cp - v); 2175 if (sz == 0) 2176 istrue = 0; 2177 else if (v[*pos] == 'r') 2178 istrue = roff_hasregn(r, name, sz); 2179 else { 2180 deftype = ROFFDEF_ANY; 2181 roff_getstrn(r, name, sz, &deftype); 2182 istrue = !!deftype; 2183 } 2184 *pos = cp - v; 2185 return istrue == wanttrue; 2186 default: 2187 break; 2188 } 2189 2190 savepos = *pos; 2191 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2192 return (number > 0) == wanttrue; 2193 else if (*pos == savepos) 2194 return roff_evalstrcond(v, pos) == wanttrue; 2195 else 2196 return 0; 2197 } 2198 2199 static enum rofferr 2200 roff_line_ignore(ROFF_ARGS) 2201 { 2202 2203 return ROFF_IGN; 2204 } 2205 2206 static enum rofferr 2207 roff_insec(ROFF_ARGS) 2208 { 2209 2210 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse, 2211 ln, ppos, roff_name[tok]); 2212 return ROFF_IGN; 2213 } 2214 2215 static enum rofferr 2216 roff_unsupp(ROFF_ARGS) 2217 { 2218 2219 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse, 2220 ln, ppos, roff_name[tok]); 2221 return ROFF_IGN; 2222 } 2223 2224 static enum rofferr 2225 roff_cond(ROFF_ARGS) 2226 { 2227 2228 roffnode_push(r, tok, NULL, ln, ppos); 2229 2230 /* 2231 * An `.el' has no conditional body: it will consume the value 2232 * of the current rstack entry set in prior `ie' calls or 2233 * defaults to DENY. 2234 * 2235 * If we're not an `el', however, then evaluate the conditional. 2236 */ 2237 2238 r->last->rule = tok == ROFF_el ? 2239 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2240 roff_evalcond(r, ln, buf->buf, &pos); 2241 2242 /* 2243 * An if-else will put the NEGATION of the current evaluated 2244 * conditional into the stack of rules. 2245 */ 2246 2247 if (tok == ROFF_ie) { 2248 if (r->rstackpos + 1 == r->rstacksz) { 2249 r->rstacksz += 16; 2250 r->rstack = mandoc_reallocarray(r->rstack, 2251 r->rstacksz, sizeof(int)); 2252 } 2253 r->rstack[++r->rstackpos] = !r->last->rule; 2254 } 2255 2256 /* If the parent has false as its rule, then so do we. */ 2257 2258 if (r->last->parent && !r->last->parent->rule) 2259 r->last->rule = 0; 2260 2261 /* 2262 * Determine scope. 2263 * If there is nothing on the line after the conditional, 2264 * not even whitespace, use next-line scope. 2265 */ 2266 2267 if (buf->buf[pos] == '\0') { 2268 r->last->endspan = 2; 2269 goto out; 2270 } 2271 2272 while (buf->buf[pos] == ' ') 2273 pos++; 2274 2275 /* An opening brace requests multiline scope. */ 2276 2277 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2278 r->last->endspan = -1; 2279 pos += 2; 2280 while (buf->buf[pos] == ' ') 2281 pos++; 2282 goto out; 2283 } 2284 2285 /* 2286 * Anything else following the conditional causes 2287 * single-line scope. Warn if the scope contains 2288 * nothing but trailing whitespace. 2289 */ 2290 2291 if (buf->buf[pos] == '\0') 2292 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse, 2293 ln, ppos, roff_name[tok]); 2294 2295 r->last->endspan = 1; 2296 2297 out: 2298 *offs = pos; 2299 return ROFF_RERUN; 2300 } 2301 2302 static enum rofferr 2303 roff_ds(ROFF_ARGS) 2304 { 2305 char *string; 2306 const char *name; 2307 size_t namesz; 2308 2309 /* Ignore groff compatibility mode for now. */ 2310 2311 if (tok == ROFF_ds1) 2312 tok = ROFF_ds; 2313 else if (tok == ROFF_as1) 2314 tok = ROFF_as; 2315 2316 /* 2317 * The first word is the name of the string. 2318 * If it is empty or terminated by an escape sequence, 2319 * abort the `ds' request without defining anything. 2320 */ 2321 2322 name = string = buf->buf + pos; 2323 if (*name == '\0') 2324 return ROFF_IGN; 2325 2326 namesz = roff_getname(r, &string, ln, pos); 2327 if (name[namesz] == '\\') 2328 return ROFF_IGN; 2329 2330 /* Read past the initial double-quote, if any. */ 2331 if (*string == '"') 2332 string++; 2333 2334 /* The rest is the value. */ 2335 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2336 ROFF_as == tok); 2337 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2338 return ROFF_IGN; 2339 } 2340 2341 /* 2342 * Parse a single operator, one or two characters long. 2343 * If the operator is recognized, return success and advance the 2344 * parse point, else return failure and let the parse point unchanged. 2345 */ 2346 static int 2347 roff_getop(const char *v, int *pos, char *res) 2348 { 2349 2350 *res = v[*pos]; 2351 2352 switch (*res) { 2353 case '+': 2354 case '-': 2355 case '*': 2356 case '/': 2357 case '%': 2358 case '&': 2359 case ':': 2360 break; 2361 case '<': 2362 switch (v[*pos + 1]) { 2363 case '=': 2364 *res = 'l'; 2365 (*pos)++; 2366 break; 2367 case '>': 2368 *res = '!'; 2369 (*pos)++; 2370 break; 2371 case '?': 2372 *res = 'i'; 2373 (*pos)++; 2374 break; 2375 default: 2376 break; 2377 } 2378 break; 2379 case '>': 2380 switch (v[*pos + 1]) { 2381 case '=': 2382 *res = 'g'; 2383 (*pos)++; 2384 break; 2385 case '?': 2386 *res = 'a'; 2387 (*pos)++; 2388 break; 2389 default: 2390 break; 2391 } 2392 break; 2393 case '=': 2394 if ('=' == v[*pos + 1]) 2395 (*pos)++; 2396 break; 2397 default: 2398 return 0; 2399 } 2400 (*pos)++; 2401 2402 return *res; 2403 } 2404 2405 /* 2406 * Evaluate either a parenthesized numeric expression 2407 * or a single signed integer number. 2408 */ 2409 static int 2410 roff_evalpar(struct roff *r, int ln, 2411 const char *v, int *pos, int *res, int flags) 2412 { 2413 2414 if ('(' != v[*pos]) 2415 return roff_getnum(v, pos, res, flags); 2416 2417 (*pos)++; 2418 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2419 return 0; 2420 2421 /* 2422 * Omission of the closing parenthesis 2423 * is an error in validation mode, 2424 * but ignored in evaluation mode. 2425 */ 2426 2427 if (')' == v[*pos]) 2428 (*pos)++; 2429 else if (NULL == res) 2430 return 0; 2431 2432 return 1; 2433 } 2434 2435 /* 2436 * Evaluate a complete numeric expression. 2437 * Proceed left to right, there is no concept of precedence. 2438 */ 2439 static int 2440 roff_evalnum(struct roff *r, int ln, const char *v, 2441 int *pos, int *res, int flags) 2442 { 2443 int mypos, operand2; 2444 char operator; 2445 2446 if (NULL == pos) { 2447 mypos = 0; 2448 pos = &mypos; 2449 } 2450 2451 if (flags & ROFFNUM_WHITE) 2452 while (isspace((unsigned char)v[*pos])) 2453 (*pos)++; 2454 2455 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2456 return 0; 2457 2458 while (1) { 2459 if (flags & ROFFNUM_WHITE) 2460 while (isspace((unsigned char)v[*pos])) 2461 (*pos)++; 2462 2463 if ( ! roff_getop(v, pos, &operator)) 2464 break; 2465 2466 if (flags & ROFFNUM_WHITE) 2467 while (isspace((unsigned char)v[*pos])) 2468 (*pos)++; 2469 2470 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2471 return 0; 2472 2473 if (flags & ROFFNUM_WHITE) 2474 while (isspace((unsigned char)v[*pos])) 2475 (*pos)++; 2476 2477 if (NULL == res) 2478 continue; 2479 2480 switch (operator) { 2481 case '+': 2482 *res += operand2; 2483 break; 2484 case '-': 2485 *res -= operand2; 2486 break; 2487 case '*': 2488 *res *= operand2; 2489 break; 2490 case '/': 2491 if (operand2 == 0) { 2492 mandoc_msg(MANDOCERR_DIVZERO, 2493 r->parse, ln, *pos, v); 2494 *res = 0; 2495 break; 2496 } 2497 *res /= operand2; 2498 break; 2499 case '%': 2500 if (operand2 == 0) { 2501 mandoc_msg(MANDOCERR_DIVZERO, 2502 r->parse, ln, *pos, v); 2503 *res = 0; 2504 break; 2505 } 2506 *res %= operand2; 2507 break; 2508 case '<': 2509 *res = *res < operand2; 2510 break; 2511 case '>': 2512 *res = *res > operand2; 2513 break; 2514 case 'l': 2515 *res = *res <= operand2; 2516 break; 2517 case 'g': 2518 *res = *res >= operand2; 2519 break; 2520 case '=': 2521 *res = *res == operand2; 2522 break; 2523 case '!': 2524 *res = *res != operand2; 2525 break; 2526 case '&': 2527 *res = *res && operand2; 2528 break; 2529 case ':': 2530 *res = *res || operand2; 2531 break; 2532 case 'i': 2533 if (operand2 < *res) 2534 *res = operand2; 2535 break; 2536 case 'a': 2537 if (operand2 > *res) 2538 *res = operand2; 2539 break; 2540 default: 2541 abort(); 2542 } 2543 } 2544 return 1; 2545 } 2546 2547 /* --- register management ------------------------------------------------ */ 2548 2549 void 2550 roff_setreg(struct roff *r, const char *name, int val, char sign) 2551 { 2552 roff_setregn(r, name, strlen(name), val, sign, INT_MIN); 2553 } 2554 2555 static void 2556 roff_setregn(struct roff *r, const char *name, size_t len, 2557 int val, char sign, int step) 2558 { 2559 struct roffreg *reg; 2560 2561 /* Search for an existing register with the same name. */ 2562 reg = r->regtab; 2563 2564 while (reg != NULL && (reg->key.sz != len || 2565 strncmp(reg->key.p, name, len) != 0)) 2566 reg = reg->next; 2567 2568 if (NULL == reg) { 2569 /* Create a new register. */ 2570 reg = mandoc_malloc(sizeof(struct roffreg)); 2571 reg->key.p = mandoc_strndup(name, len); 2572 reg->key.sz = len; 2573 reg->val = 0; 2574 reg->step = 0; 2575 reg->next = r->regtab; 2576 r->regtab = reg; 2577 } 2578 2579 if ('+' == sign) 2580 reg->val += val; 2581 else if ('-' == sign) 2582 reg->val -= val; 2583 else 2584 reg->val = val; 2585 if (step != INT_MIN) 2586 reg->step = step; 2587 } 2588 2589 /* 2590 * Handle some predefined read-only number registers. 2591 * For now, return -1 if the requested register is not predefined; 2592 * in case a predefined read-only register having the value -1 2593 * were to turn up, another special value would have to be chosen. 2594 */ 2595 static int 2596 roff_getregro(const struct roff *r, const char *name) 2597 { 2598 2599 switch (*name) { 2600 case '$': /* Number of arguments of the last macro evaluated. */ 2601 return r->argc; 2602 case 'A': /* ASCII approximation mode is always off. */ 2603 return 0; 2604 case 'g': /* Groff compatibility mode is always on. */ 2605 return 1; 2606 case 'H': /* Fixed horizontal resolution. */ 2607 return 24; 2608 case 'j': /* Always adjust left margin only. */ 2609 return 0; 2610 case 'T': /* Some output device is always defined. */ 2611 return 1; 2612 case 'V': /* Fixed vertical resolution. */ 2613 return 40; 2614 default: 2615 return -1; 2616 } 2617 } 2618 2619 int 2620 roff_getreg(struct roff *r, const char *name) 2621 { 2622 return roff_getregn(r, name, strlen(name), '\0'); 2623 } 2624 2625 static int 2626 roff_getregn(struct roff *r, const char *name, size_t len, char sign) 2627 { 2628 struct roffreg *reg; 2629 int val; 2630 2631 if ('.' == name[0] && 2 == len) { 2632 val = roff_getregro(r, name + 1); 2633 if (-1 != val) 2634 return val; 2635 } 2636 2637 for (reg = r->regtab; reg; reg = reg->next) { 2638 if (len == reg->key.sz && 2639 0 == strncmp(name, reg->key.p, len)) { 2640 switch (sign) { 2641 case '+': 2642 reg->val += reg->step; 2643 break; 2644 case '-': 2645 reg->val -= reg->step; 2646 break; 2647 default: 2648 break; 2649 } 2650 return reg->val; 2651 } 2652 } 2653 2654 roff_setregn(r, name, len, 0, '\0', INT_MIN); 2655 return 0; 2656 } 2657 2658 static int 2659 roff_hasregn(const struct roff *r, const char *name, size_t len) 2660 { 2661 struct roffreg *reg; 2662 int val; 2663 2664 if ('.' == name[0] && 2 == len) { 2665 val = roff_getregro(r, name + 1); 2666 if (-1 != val) 2667 return 1; 2668 } 2669 2670 for (reg = r->regtab; reg; reg = reg->next) 2671 if (len == reg->key.sz && 2672 0 == strncmp(name, reg->key.p, len)) 2673 return 1; 2674 2675 return 0; 2676 } 2677 2678 static void 2679 roff_freereg(struct roffreg *reg) 2680 { 2681 struct roffreg *old_reg; 2682 2683 while (NULL != reg) { 2684 free(reg->key.p); 2685 old_reg = reg; 2686 reg = reg->next; 2687 free(old_reg); 2688 } 2689 } 2690 2691 static enum rofferr 2692 roff_nr(ROFF_ARGS) 2693 { 2694 char *key, *val, *step; 2695 size_t keysz; 2696 int iv, is, len; 2697 char sign; 2698 2699 key = val = buf->buf + pos; 2700 if (*key == '\0') 2701 return ROFF_IGN; 2702 2703 keysz = roff_getname(r, &val, ln, pos); 2704 if (key[keysz] == '\\') 2705 return ROFF_IGN; 2706 2707 sign = *val; 2708 if (sign == '+' || sign == '-') 2709 val++; 2710 2711 len = 0; 2712 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) 2713 return ROFF_IGN; 2714 2715 step = val + len; 2716 while (isspace((unsigned char)*step)) 2717 step++; 2718 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) 2719 is = INT_MIN; 2720 2721 roff_setregn(r, key, keysz, iv, sign, is); 2722 return ROFF_IGN; 2723 } 2724 2725 static enum rofferr 2726 roff_rr(ROFF_ARGS) 2727 { 2728 struct roffreg *reg, **prev; 2729 char *name, *cp; 2730 size_t namesz; 2731 2732 name = cp = buf->buf + pos; 2733 if (*name == '\0') 2734 return ROFF_IGN; 2735 namesz = roff_getname(r, &cp, ln, pos); 2736 name[namesz] = '\0'; 2737 2738 prev = &r->regtab; 2739 while (1) { 2740 reg = *prev; 2741 if (reg == NULL || !strcmp(name, reg->key.p)) 2742 break; 2743 prev = ®->next; 2744 } 2745 if (reg != NULL) { 2746 *prev = reg->next; 2747 free(reg->key.p); 2748 free(reg); 2749 } 2750 return ROFF_IGN; 2751 } 2752 2753 /* --- handler functions for roff requests -------------------------------- */ 2754 2755 static enum rofferr 2756 roff_rm(ROFF_ARGS) 2757 { 2758 const char *name; 2759 char *cp; 2760 size_t namesz; 2761 2762 cp = buf->buf + pos; 2763 while (*cp != '\0') { 2764 name = cp; 2765 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 2766 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 2767 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2768 if (name[namesz] == '\\') 2769 break; 2770 } 2771 return ROFF_IGN; 2772 } 2773 2774 static enum rofferr 2775 roff_it(ROFF_ARGS) 2776 { 2777 int iv; 2778 2779 /* Parse the number of lines. */ 2780 2781 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 2782 mandoc_msg(MANDOCERR_IT_NONUM, r->parse, 2783 ln, ppos, buf->buf + 1); 2784 return ROFF_IGN; 2785 } 2786 2787 while (isspace((unsigned char)buf->buf[pos])) 2788 pos++; 2789 2790 /* 2791 * Arm the input line trap. 2792 * Special-casing "an-trap" is an ugly workaround to cope 2793 * with DocBook stupidly fiddling with man(7) internals. 2794 */ 2795 2796 roffit_lines = iv; 2797 roffit_macro = mandoc_strdup(iv != 1 || 2798 strcmp(buf->buf + pos, "an-trap") ? 2799 buf->buf + pos : "br"); 2800 return ROFF_IGN; 2801 } 2802 2803 static enum rofferr 2804 roff_Dd(ROFF_ARGS) 2805 { 2806 int mask; 2807 enum roff_tok t, te; 2808 2809 switch (tok) { 2810 case ROFF_Dd: 2811 tok = MDOC_Dd; 2812 te = MDOC_MAX; 2813 if (r->format == 0) 2814 r->format = MPARSE_MDOC; 2815 mask = MPARSE_MDOC | MPARSE_QUICK; 2816 break; 2817 case ROFF_TH: 2818 tok = MAN_TH; 2819 te = MAN_MAX; 2820 if (r->format == 0) 2821 r->format = MPARSE_MAN; 2822 mask = MPARSE_QUICK; 2823 break; 2824 default: 2825 abort(); 2826 } 2827 if ((r->options & mask) == 0) 2828 for (t = tok; t < te; t++) 2829 roff_setstr(r, roff_name[t], NULL, 0); 2830 return ROFF_CONT; 2831 } 2832 2833 static enum rofferr 2834 roff_TE(ROFF_ARGS) 2835 { 2836 if (r->tbl == NULL) { 2837 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 2838 ln, ppos, "TE"); 2839 return ROFF_IGN; 2840 } 2841 if (tbl_end(r->tbl) == 0) { 2842 r->tbl = NULL; 2843 free(buf->buf); 2844 buf->buf = mandoc_strdup(".sp"); 2845 buf->sz = 4; 2846 return ROFF_REPARSE; 2847 } 2848 r->tbl = NULL; 2849 return ROFF_IGN; 2850 } 2851 2852 static enum rofferr 2853 roff_T_(ROFF_ARGS) 2854 { 2855 2856 if (NULL == r->tbl) 2857 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 2858 ln, ppos, "T&"); 2859 else 2860 tbl_restart(ln, ppos, r->tbl); 2861 2862 return ROFF_IGN; 2863 } 2864 2865 /* 2866 * Handle in-line equation delimiters. 2867 */ 2868 static enum rofferr 2869 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 2870 { 2871 char *cp1, *cp2; 2872 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 2873 2874 /* 2875 * Outside equations, look for an opening delimiter. 2876 * If we are inside an equation, we already know it is 2877 * in-line, or this function wouldn't have been called; 2878 * so look for a closing delimiter. 2879 */ 2880 2881 cp1 = buf->buf + pos; 2882 cp2 = strchr(cp1, r->eqn == NULL ? 2883 r->last_eqn->odelim : r->last_eqn->cdelim); 2884 if (cp2 == NULL) 2885 return ROFF_CONT; 2886 2887 *cp2++ = '\0'; 2888 bef_pr = bef_nl = aft_nl = aft_pr = ""; 2889 2890 /* Handle preceding text, protecting whitespace. */ 2891 2892 if (*buf->buf != '\0') { 2893 if (r->eqn == NULL) 2894 bef_pr = "\\&"; 2895 bef_nl = "\n"; 2896 } 2897 2898 /* 2899 * Prepare replacing the delimiter with an equation macro 2900 * and drop leading white space from the equation. 2901 */ 2902 2903 if (r->eqn == NULL) { 2904 while (*cp2 == ' ') 2905 cp2++; 2906 mac = ".EQ"; 2907 } else 2908 mac = ".EN"; 2909 2910 /* Handle following text, protecting whitespace. */ 2911 2912 if (*cp2 != '\0') { 2913 aft_nl = "\n"; 2914 if (r->eqn != NULL) 2915 aft_pr = "\\&"; 2916 } 2917 2918 /* Do the actual replacement. */ 2919 2920 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 2921 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 2922 free(buf->buf); 2923 buf->buf = cp1; 2924 2925 /* Toggle the in-line state of the eqn subsystem. */ 2926 2927 r->eqn_inline = r->eqn == NULL; 2928 return ROFF_REPARSE; 2929 } 2930 2931 static enum rofferr 2932 roff_EQ(ROFF_ARGS) 2933 { 2934 struct roff_node *n; 2935 2936 if (r->man->macroset == MACROSET_MAN) 2937 man_breakscope(r->man, ROFF_EQ); 2938 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); 2939 if (ln > r->man->last->line) 2940 n->flags |= NODE_LINE; 2941 n->eqn = mandoc_calloc(1, sizeof(*n->eqn)); 2942 n->eqn->expectargs = UINT_MAX; 2943 roff_node_append(r->man, n); 2944 r->man->next = ROFF_NEXT_SIBLING; 2945 2946 assert(r->eqn == NULL); 2947 if (r->last_eqn == NULL) 2948 r->last_eqn = eqn_alloc(r->parse); 2949 else 2950 eqn_reset(r->last_eqn); 2951 r->eqn = r->last_eqn; 2952 r->eqn->node = n; 2953 2954 if (buf->buf[pos] != '\0') 2955 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, 2956 ".EQ %s", buf->buf + pos); 2957 2958 return ROFF_IGN; 2959 } 2960 2961 static enum rofferr 2962 roff_EN(ROFF_ARGS) 2963 { 2964 if (r->eqn != NULL) { 2965 eqn_parse(r->eqn); 2966 r->eqn = NULL; 2967 } else 2968 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN"); 2969 if (buf->buf[pos] != '\0') 2970 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, 2971 "EN %s", buf->buf + pos); 2972 return ROFF_IGN; 2973 } 2974 2975 static enum rofferr 2976 roff_TS(ROFF_ARGS) 2977 { 2978 if (r->tbl != NULL) { 2979 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse, 2980 ln, ppos, "TS breaks TS"); 2981 tbl_end(r->tbl); 2982 } 2983 r->tbl = tbl_alloc(ppos, ln, r->parse); 2984 if (r->last_tbl) 2985 r->last_tbl->next = r->tbl; 2986 else 2987 r->first_tbl = r->tbl; 2988 r->last_tbl = r->tbl; 2989 return ROFF_IGN; 2990 } 2991 2992 static enum rofferr 2993 roff_onearg(ROFF_ARGS) 2994 { 2995 struct roff_node *n; 2996 char *cp; 2997 int npos; 2998 2999 if (r->man->flags & (MAN_BLINE | MAN_ELINE) && 3000 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || 3001 tok == ROFF_ti)) 3002 man_breakscope(r->man, tok); 3003 3004 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { 3005 r->man->last = roffce_node; 3006 r->man->next = ROFF_NEXT_SIBLING; 3007 } 3008 3009 roff_elem_alloc(r->man, ln, ppos, tok); 3010 n = r->man->last; 3011 3012 cp = buf->buf + pos; 3013 if (*cp != '\0') { 3014 while (*cp != '\0' && *cp != ' ') 3015 cp++; 3016 while (*cp == ' ') 3017 *cp++ = '\0'; 3018 if (*cp != '\0') 3019 mandoc_vmsg(MANDOCERR_ARG_EXCESS, 3020 r->parse, ln, cp - buf->buf, 3021 "%s ... %s", roff_name[tok], cp); 3022 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3023 } 3024 3025 if (tok == ROFF_ce || tok == ROFF_rj) { 3026 if (r->man->last->type == ROFFT_ELEM) { 3027 roff_word_alloc(r->man, ln, pos, "1"); 3028 r->man->last->flags |= NODE_NOSRC; 3029 } 3030 npos = 0; 3031 if (roff_evalnum(r, ln, r->man->last->string, &npos, 3032 &roffce_lines, 0) == 0) { 3033 mandoc_vmsg(MANDOCERR_CE_NONUM, 3034 r->parse, ln, pos, "ce %s", buf->buf + pos); 3035 roffce_lines = 1; 3036 } 3037 if (roffce_lines < 1) { 3038 r->man->last = r->man->last->parent; 3039 roffce_node = NULL; 3040 roffce_lines = 0; 3041 } else 3042 roffce_node = r->man->last->parent; 3043 } else { 3044 n->flags |= NODE_VALID | NODE_ENDED; 3045 r->man->last = n; 3046 } 3047 n->flags |= NODE_LINE; 3048 r->man->next = ROFF_NEXT_SIBLING; 3049 return ROFF_IGN; 3050 } 3051 3052 static enum rofferr 3053 roff_manyarg(ROFF_ARGS) 3054 { 3055 struct roff_node *n; 3056 char *sp, *ep; 3057 3058 roff_elem_alloc(r->man, ln, ppos, tok); 3059 n = r->man->last; 3060 3061 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { 3062 while (*ep != '\0' && *ep != ' ') 3063 ep++; 3064 while (*ep == ' ') 3065 *ep++ = '\0'; 3066 roff_word_alloc(r->man, ln, sp - buf->buf, sp); 3067 } 3068 3069 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3070 r->man->last = n; 3071 r->man->next = ROFF_NEXT_SIBLING; 3072 return ROFF_IGN; 3073 } 3074 3075 static enum rofferr 3076 roff_als(ROFF_ARGS) 3077 { 3078 char *oldn, *newn, *end, *value; 3079 size_t oldsz, newsz, valsz; 3080 3081 newn = oldn = buf->buf + pos; 3082 if (*newn == '\0') 3083 return ROFF_IGN; 3084 3085 newsz = roff_getname(r, &oldn, ln, pos); 3086 if (newn[newsz] == '\\' || *oldn == '\0') 3087 return ROFF_IGN; 3088 3089 end = oldn; 3090 oldsz = roff_getname(r, &end, ln, oldn - buf->buf); 3091 if (oldsz == 0) 3092 return ROFF_IGN; 3093 3094 valsz = mandoc_asprintf(&value, ".%.*s \\$*\\\"\n", 3095 (int)oldsz, oldn); 3096 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); 3097 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3098 free(value); 3099 return ROFF_IGN; 3100 } 3101 3102 static enum rofferr 3103 roff_br(ROFF_ARGS) 3104 { 3105 if (r->man->flags & (MAN_BLINE | MAN_ELINE)) 3106 man_breakscope(r->man, ROFF_br); 3107 roff_elem_alloc(r->man, ln, ppos, ROFF_br); 3108 if (buf->buf[pos] != '\0') 3109 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, 3110 "%s %s", roff_name[tok], buf->buf + pos); 3111 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3112 r->man->next = ROFF_NEXT_SIBLING; 3113 return ROFF_IGN; 3114 } 3115 3116 static enum rofferr 3117 roff_cc(ROFF_ARGS) 3118 { 3119 const char *p; 3120 3121 p = buf->buf + pos; 3122 3123 if (*p == '\0' || (r->control = *p++) == '.') 3124 r->control = '\0'; 3125 3126 if (*p != '\0') 3127 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, 3128 ln, p - buf->buf, "cc ... %s", p); 3129 3130 return ROFF_IGN; 3131 } 3132 3133 static enum rofferr 3134 roff_ec(ROFF_ARGS) 3135 { 3136 const char *p; 3137 3138 p = buf->buf + pos; 3139 if (*p == '\0') 3140 r->escape = '\\'; 3141 else { 3142 r->escape = *p; 3143 if (*++p != '\0') 3144 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, 3145 ln, p - buf->buf, "ec ... %s", p); 3146 } 3147 return ROFF_IGN; 3148 } 3149 3150 static enum rofferr 3151 roff_eo(ROFF_ARGS) 3152 { 3153 r->escape = '\0'; 3154 if (buf->buf[pos] != '\0') 3155 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, 3156 ln, pos, "eo %s", buf->buf + pos); 3157 return ROFF_IGN; 3158 } 3159 3160 static enum rofferr 3161 roff_tr(ROFF_ARGS) 3162 { 3163 const char *p, *first, *second; 3164 size_t fsz, ssz; 3165 enum mandoc_esc esc; 3166 3167 p = buf->buf + pos; 3168 3169 if (*p == '\0') { 3170 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr"); 3171 return ROFF_IGN; 3172 } 3173 3174 while (*p != '\0') { 3175 fsz = ssz = 1; 3176 3177 first = p++; 3178 if (*first == '\\') { 3179 esc = mandoc_escape(&p, NULL, NULL); 3180 if (esc == ESCAPE_ERROR) { 3181 mandoc_msg(MANDOCERR_ESC_BAD, r->parse, 3182 ln, (int)(p - buf->buf), first); 3183 return ROFF_IGN; 3184 } 3185 fsz = (size_t)(p - first); 3186 } 3187 3188 second = p++; 3189 if (*second == '\\') { 3190 esc = mandoc_escape(&p, NULL, NULL); 3191 if (esc == ESCAPE_ERROR) { 3192 mandoc_msg(MANDOCERR_ESC_BAD, r->parse, 3193 ln, (int)(p - buf->buf), second); 3194 return ROFF_IGN; 3195 } 3196 ssz = (size_t)(p - second); 3197 } else if (*second == '\0') { 3198 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse, 3199 ln, first - buf->buf, "tr %s", first); 3200 second = " "; 3201 p--; 3202 } 3203 3204 if (fsz > 1) { 3205 roff_setstrn(&r->xmbtab, first, fsz, 3206 second, ssz, 0); 3207 continue; 3208 } 3209 3210 if (r->xtab == NULL) 3211 r->xtab = mandoc_calloc(128, 3212 sizeof(struct roffstr)); 3213 3214 free(r->xtab[(int)*first].p); 3215 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 3216 r->xtab[(int)*first].sz = ssz; 3217 } 3218 3219 return ROFF_IGN; 3220 } 3221 3222 static enum rofferr 3223 roff_rn(ROFF_ARGS) 3224 { 3225 const char *value; 3226 char *oldn, *newn, *end; 3227 size_t oldsz, newsz; 3228 int deftype; 3229 3230 oldn = newn = buf->buf + pos; 3231 if (*oldn == '\0') 3232 return ROFF_IGN; 3233 3234 oldsz = roff_getname(r, &newn, ln, pos); 3235 if (oldn[oldsz] == '\\' || *newn == '\0') 3236 return ROFF_IGN; 3237 3238 end = newn; 3239 newsz = roff_getname(r, &end, ln, newn - buf->buf); 3240 if (newsz == 0) 3241 return ROFF_IGN; 3242 3243 deftype = ROFFDEF_ANY; 3244 value = roff_getstrn(r, oldn, oldsz, &deftype); 3245 switch (deftype) { 3246 case ROFFDEF_USER: 3247 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3248 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); 3249 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3250 break; 3251 case ROFFDEF_PRE: 3252 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3253 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3254 break; 3255 case ROFFDEF_REN: 3256 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); 3257 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); 3258 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3259 break; 3260 case ROFFDEF_STD: 3261 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); 3262 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3263 break; 3264 default: 3265 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3266 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3267 break; 3268 } 3269 return ROFF_IGN; 3270 } 3271 3272 static enum rofferr 3273 roff_so(ROFF_ARGS) 3274 { 3275 char *name, *cp; 3276 3277 name = buf->buf + pos; 3278 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name); 3279 3280 /* 3281 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3282 * opening anything that's not in our cwd or anything beneath 3283 * it. Thus, explicitly disallow traversing up the file-system 3284 * or using absolute paths. 3285 */ 3286 3287 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3288 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos, 3289 ".so %s", name); 3290 buf->sz = mandoc_asprintf(&cp, 3291 ".sp\nSee the file %s.\n.sp", name) + 1; 3292 free(buf->buf); 3293 buf->buf = cp; 3294 *offs = 0; 3295 return ROFF_REPARSE; 3296 } 3297 3298 *offs = pos; 3299 return ROFF_SO; 3300 } 3301 3302 /* --- user defined strings and macros ------------------------------------ */ 3303 3304 static enum rofferr 3305 roff_userdef(ROFF_ARGS) 3306 { 3307 const char *arg[16], *ap; 3308 char *cp, *n1, *n2; 3309 int expand_count, i, ib, ie; 3310 size_t asz, rsz; 3311 3312 /* 3313 * Collect pointers to macro argument strings 3314 * and NUL-terminate them. 3315 */ 3316 3317 r->argc = 0; 3318 cp = buf->buf + pos; 3319 for (i = 0; i < 16; i++) { 3320 if (*cp == '\0') 3321 arg[i] = ""; 3322 else { 3323 arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos); 3324 r->argc = i + 1; 3325 } 3326 } 3327 3328 /* 3329 * Expand macro arguments. 3330 */ 3331 3332 buf->sz = strlen(r->current_string) + 1; 3333 n1 = n2 = cp = mandoc_malloc(buf->sz); 3334 memcpy(n1, r->current_string, buf->sz); 3335 expand_count = 0; 3336 while (*cp != '\0') { 3337 3338 /* Scan ahead for the next argument invocation. */ 3339 3340 if (*cp++ != '\\') 3341 continue; 3342 if (*cp++ != '$') 3343 continue; 3344 if (*cp == '*') { /* \\$* inserts all arguments */ 3345 ib = 0; 3346 ie = r->argc - 1; 3347 } else { /* \\$1 .. \\$9 insert one argument */ 3348 ib = ie = *cp - '1'; 3349 if (ib < 0 || ib > 8) 3350 continue; 3351 } 3352 cp -= 2; 3353 3354 /* 3355 * Prevent infinite recursion. 3356 */ 3357 3358 if (cp >= n2) 3359 expand_count = 1; 3360 else if (++expand_count > EXPAND_LIMIT) { 3361 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, 3362 ln, (int)(cp - n1), NULL); 3363 free(buf->buf); 3364 buf->buf = n1; 3365 return ROFF_IGN; 3366 } 3367 3368 /* 3369 * Determine the size of the expanded argument, 3370 * taking escaping of quotes into account. 3371 */ 3372 3373 asz = ie > ib ? ie - ib : 0; /* for blanks */ 3374 for (i = ib; i <= ie; i++) { 3375 for (ap = arg[i]; *ap != '\0'; ap++) { 3376 asz++; 3377 if (*ap == '"') 3378 asz += 3; 3379 } 3380 } 3381 if (asz != 3) { 3382 3383 /* 3384 * Determine the size of the rest of the 3385 * unexpanded macro, including the NUL. 3386 */ 3387 3388 rsz = buf->sz - (cp - n1) - 3; 3389 3390 /* 3391 * When shrinking, move before 3392 * releasing the storage. 3393 */ 3394 3395 if (asz < 3) 3396 memmove(cp + asz, cp + 3, rsz); 3397 3398 /* 3399 * Resize the storage for the macro 3400 * and readjust the parse pointer. 3401 */ 3402 3403 buf->sz += asz - 3; 3404 n2 = mandoc_realloc(n1, buf->sz); 3405 cp = n2 + (cp - n1); 3406 n1 = n2; 3407 3408 /* 3409 * When growing, make room 3410 * for the expanded argument. 3411 */ 3412 3413 if (asz > 3) 3414 memmove(cp + asz, cp + 3, rsz); 3415 } 3416 3417 /* Copy the expanded argument, escaping quotes. */ 3418 3419 n2 = cp; 3420 for (i = ib; i <= ie; i++) { 3421 for (ap = arg[i]; *ap != '\0'; ap++) { 3422 if (*ap == '"') { 3423 memcpy(n2, "\\(dq", 4); 3424 n2 += 4; 3425 } else 3426 *n2++ = *ap; 3427 } 3428 if (i < ie) 3429 *n2++ = ' '; 3430 } 3431 } 3432 3433 /* 3434 * Replace the macro invocation 3435 * by the expanded macro. 3436 */ 3437 3438 free(buf->buf); 3439 buf->buf = n1; 3440 *offs = 0; 3441 3442 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ? 3443 ROFF_REPARSE : ROFF_APPEND; 3444 } 3445 3446 /* 3447 * Calling a high-level macro that was renamed with .rn. 3448 * r->current_string has already been set up by roff_parse(). 3449 */ 3450 static enum rofferr 3451 roff_renamed(ROFF_ARGS) 3452 { 3453 char *nbuf; 3454 3455 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, 3456 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; 3457 free(buf->buf); 3458 buf->buf = nbuf; 3459 return ROFF_CONT; 3460 } 3461 3462 static size_t 3463 roff_getname(struct roff *r, char **cpp, int ln, int pos) 3464 { 3465 char *name, *cp; 3466 size_t namesz; 3467 3468 name = *cpp; 3469 if ('\0' == *name) 3470 return 0; 3471 3472 /* Read until end of name and terminate it with NUL. */ 3473 for (cp = name; 1; cp++) { 3474 if ('\0' == *cp || ' ' == *cp) { 3475 namesz = cp - name; 3476 break; 3477 } 3478 if ('\\' != *cp) 3479 continue; 3480 namesz = cp - name; 3481 if ('{' == cp[1] || '}' == cp[1]) 3482 break; 3483 cp++; 3484 if ('\\' == *cp) 3485 continue; 3486 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos, 3487 "%.*s", (int)(cp - name + 1), name); 3488 mandoc_escape((const char **)&cp, NULL, NULL); 3489 break; 3490 } 3491 3492 /* Read past spaces. */ 3493 while (' ' == *cp) 3494 cp++; 3495 3496 *cpp = cp; 3497 return namesz; 3498 } 3499 3500 /* 3501 * Store *string into the user-defined string called *name. 3502 * To clear an existing entry, call with (*r, *name, NULL, 0). 3503 * append == 0: replace mode 3504 * append == 1: single-line append mode 3505 * append == 2: multiline append mode, append '\n' after each call 3506 */ 3507 static void 3508 roff_setstr(struct roff *r, const char *name, const char *string, 3509 int append) 3510 { 3511 size_t namesz; 3512 3513 namesz = strlen(name); 3514 roff_setstrn(&r->strtab, name, namesz, string, 3515 string ? strlen(string) : 0, append); 3516 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3517 } 3518 3519 static void 3520 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 3521 const char *string, size_t stringsz, int append) 3522 { 3523 struct roffkv *n; 3524 char *c; 3525 int i; 3526 size_t oldch, newch; 3527 3528 /* Search for an existing string with the same name. */ 3529 n = *r; 3530 3531 while (n && (namesz != n->key.sz || 3532 strncmp(n->key.p, name, namesz))) 3533 n = n->next; 3534 3535 if (NULL == n) { 3536 /* Create a new string table entry. */ 3537 n = mandoc_malloc(sizeof(struct roffkv)); 3538 n->key.p = mandoc_strndup(name, namesz); 3539 n->key.sz = namesz; 3540 n->val.p = NULL; 3541 n->val.sz = 0; 3542 n->next = *r; 3543 *r = n; 3544 } else if (0 == append) { 3545 free(n->val.p); 3546 n->val.p = NULL; 3547 n->val.sz = 0; 3548 } 3549 3550 if (NULL == string) 3551 return; 3552 3553 /* 3554 * One additional byte for the '\n' in multiline mode, 3555 * and one for the terminating '\0'. 3556 */ 3557 newch = stringsz + (1 < append ? 2u : 1u); 3558 3559 if (NULL == n->val.p) { 3560 n->val.p = mandoc_malloc(newch); 3561 *n->val.p = '\0'; 3562 oldch = 0; 3563 } else { 3564 oldch = n->val.sz; 3565 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 3566 } 3567 3568 /* Skip existing content in the destination buffer. */ 3569 c = n->val.p + (int)oldch; 3570 3571 /* Append new content to the destination buffer. */ 3572 i = 0; 3573 while (i < (int)stringsz) { 3574 /* 3575 * Rudimentary roff copy mode: 3576 * Handle escaped backslashes. 3577 */ 3578 if ('\\' == string[i] && '\\' == string[i + 1]) 3579 i++; 3580 *c++ = string[i++]; 3581 } 3582 3583 /* Append terminating bytes. */ 3584 if (1 < append) 3585 *c++ = '\n'; 3586 3587 *c = '\0'; 3588 n->val.sz = (int)(c - n->val.p); 3589 } 3590 3591 static const char * 3592 roff_getstrn(struct roff *r, const char *name, size_t len, 3593 int *deftype) 3594 { 3595 const struct roffkv *n; 3596 int found, i; 3597 enum roff_tok tok; 3598 3599 found = 0; 3600 for (n = r->strtab; n != NULL; n = n->next) { 3601 if (strncmp(name, n->key.p, len) != 0 || 3602 n->key.p[len] != '\0' || n->val.p == NULL) 3603 continue; 3604 if (*deftype & ROFFDEF_USER) { 3605 *deftype = ROFFDEF_USER; 3606 return n->val.p; 3607 } else { 3608 found = 1; 3609 break; 3610 } 3611 } 3612 for (n = r->rentab; n != NULL; n = n->next) { 3613 if (strncmp(name, n->key.p, len) != 0 || 3614 n->key.p[len] != '\0' || n->val.p == NULL) 3615 continue; 3616 if (*deftype & ROFFDEF_REN) { 3617 *deftype = ROFFDEF_REN; 3618 return n->val.p; 3619 } else { 3620 found = 1; 3621 break; 3622 } 3623 } 3624 for (i = 0; i < PREDEFS_MAX; i++) { 3625 if (strncmp(name, predefs[i].name, len) != 0 || 3626 predefs[i].name[len] != '\0') 3627 continue; 3628 if (*deftype & ROFFDEF_PRE) { 3629 *deftype = ROFFDEF_PRE; 3630 return predefs[i].str; 3631 } else { 3632 found = 1; 3633 break; 3634 } 3635 } 3636 if (r->man->macroset != MACROSET_MAN) { 3637 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { 3638 if (strncmp(name, roff_name[tok], len) != 0 || 3639 roff_name[tok][len] != '\0') 3640 continue; 3641 if (*deftype & ROFFDEF_STD) { 3642 *deftype = ROFFDEF_STD; 3643 return NULL; 3644 } else { 3645 found = 1; 3646 break; 3647 } 3648 } 3649 } 3650 if (r->man->macroset != MACROSET_MDOC) { 3651 for (tok = MAN_TH; tok < MAN_MAX; tok++) { 3652 if (strncmp(name, roff_name[tok], len) != 0 || 3653 roff_name[tok][len] != '\0') 3654 continue; 3655 if (*deftype & ROFFDEF_STD) { 3656 *deftype = ROFFDEF_STD; 3657 return NULL; 3658 } else { 3659 found = 1; 3660 break; 3661 } 3662 } 3663 } 3664 3665 if (found == 0 && *deftype != ROFFDEF_ANY) { 3666 if (*deftype & ROFFDEF_REN) { 3667 /* 3668 * This might still be a request, 3669 * so do not treat it as undefined yet. 3670 */ 3671 *deftype = ROFFDEF_UNDEF; 3672 return NULL; 3673 } 3674 3675 /* Using an undefined string defines it to be empty. */ 3676 3677 roff_setstrn(&r->strtab, name, len, "", 0, 0); 3678 roff_setstrn(&r->rentab, name, len, NULL, 0, 0); 3679 } 3680 3681 *deftype = 0; 3682 return NULL; 3683 } 3684 3685 static void 3686 roff_freestr(struct roffkv *r) 3687 { 3688 struct roffkv *n, *nn; 3689 3690 for (n = r; n; n = nn) { 3691 free(n->key.p); 3692 free(n->val.p); 3693 nn = n->next; 3694 free(n); 3695 } 3696 } 3697 3698 /* --- accessors and utility functions ------------------------------------ */ 3699 3700 /* 3701 * Duplicate an input string, making the appropriate character 3702 * conversations (as stipulated by `tr') along the way. 3703 * Returns a heap-allocated string with all the replacements made. 3704 */ 3705 char * 3706 roff_strdup(const struct roff *r, const char *p) 3707 { 3708 const struct roffkv *cp; 3709 char *res; 3710 const char *pp; 3711 size_t ssz, sz; 3712 enum mandoc_esc esc; 3713 3714 if (NULL == r->xmbtab && NULL == r->xtab) 3715 return mandoc_strdup(p); 3716 else if ('\0' == *p) 3717 return mandoc_strdup(""); 3718 3719 /* 3720 * Step through each character looking for term matches 3721 * (remember that a `tr' can be invoked with an escape, which is 3722 * a glyph but the escape is multi-character). 3723 * We only do this if the character hash has been initialised 3724 * and the string is >0 length. 3725 */ 3726 3727 res = NULL; 3728 ssz = 0; 3729 3730 while ('\0' != *p) { 3731 assert((unsigned int)*p < 128); 3732 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { 3733 sz = r->xtab[(int)*p].sz; 3734 res = mandoc_realloc(res, ssz + sz + 1); 3735 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 3736 ssz += sz; 3737 p++; 3738 continue; 3739 } else if ('\\' != *p) { 3740 res = mandoc_realloc(res, ssz + 2); 3741 res[ssz++] = *p++; 3742 continue; 3743 } 3744 3745 /* Search for term matches. */ 3746 for (cp = r->xmbtab; cp; cp = cp->next) 3747 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 3748 break; 3749 3750 if (NULL != cp) { 3751 /* 3752 * A match has been found. 3753 * Append the match to the array and move 3754 * forward by its keysize. 3755 */ 3756 res = mandoc_realloc(res, 3757 ssz + cp->val.sz + 1); 3758 memcpy(res + ssz, cp->val.p, cp->val.sz); 3759 ssz += cp->val.sz; 3760 p += (int)cp->key.sz; 3761 continue; 3762 } 3763 3764 /* 3765 * Handle escapes carefully: we need to copy 3766 * over just the escape itself, or else we might 3767 * do replacements within the escape itself. 3768 * Make sure to pass along the bogus string. 3769 */ 3770 pp = p++; 3771 esc = mandoc_escape(&p, NULL, NULL); 3772 if (ESCAPE_ERROR == esc) { 3773 sz = strlen(pp); 3774 res = mandoc_realloc(res, ssz + sz + 1); 3775 memcpy(res + ssz, pp, sz); 3776 break; 3777 } 3778 /* 3779 * We bail out on bad escapes. 3780 * No need to warn: we already did so when 3781 * roff_res() was called. 3782 */ 3783 sz = (int)(p - pp); 3784 res = mandoc_realloc(res, ssz + sz + 1); 3785 memcpy(res + ssz, pp, sz); 3786 ssz += sz; 3787 } 3788 3789 res[(int)ssz] = '\0'; 3790 return res; 3791 } 3792 3793 int 3794 roff_getformat(const struct roff *r) 3795 { 3796 3797 return r->format; 3798 } 3799 3800 /* 3801 * Find out whether a line is a macro line or not. 3802 * If it is, adjust the current position and return one; if it isn't, 3803 * return zero and don't change the current position. 3804 * If the control character has been set with `.cc', then let that grain 3805 * precedence. 3806 * This is slighly contrary to groff, where using the non-breaking 3807 * control character when `cc' has been invoked will cause the 3808 * non-breaking macro contents to be printed verbatim. 3809 */ 3810 int 3811 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 3812 { 3813 int pos; 3814 3815 pos = *ppos; 3816 3817 if (r->control != '\0' && cp[pos] == r->control) 3818 pos++; 3819 else if (r->control != '\0') 3820 return 0; 3821 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 3822 pos += 2; 3823 else if ('.' == cp[pos] || '\'' == cp[pos]) 3824 pos++; 3825 else 3826 return 0; 3827 3828 while (' ' == cp[pos] || '\t' == cp[pos]) 3829 pos++; 3830 3831 *ppos = pos; 3832 return 1; 3833 } 3834