1 /* $OpenBSD: roff.c,v 1.237 2019/04/21 23:45:50 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <limits.h> 23 #include <stddef.h> 24 #include <stdint.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 29 #include "mandoc_aux.h" 30 #include "mandoc_ohash.h" 31 #include "mandoc.h" 32 #include "roff.h" 33 #include "mandoc_parse.h" 34 #include "libmandoc.h" 35 #include "roff_int.h" 36 #include "tbl_parse.h" 37 #include "eqn_parse.h" 38 39 /* 40 * ASCII_ESC is used to signal from roff_getarg() to roff_expand() 41 * that an escape sequence resulted from copy-in processing and 42 * needs to be checked or interpolated. As it is used nowhere 43 * else, it is defined here rather than in a header file. 44 */ 45 #define ASCII_ESC 27 46 47 /* Maximum number of string expansions per line, to break infinite loops. */ 48 #define EXPAND_LIMIT 1000 49 50 /* Types of definitions of macros and strings. */ 51 #define ROFFDEF_USER (1 << 1) /* User-defined. */ 52 #define ROFFDEF_PRE (1 << 2) /* Predefined. */ 53 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ 54 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ 55 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ 56 ROFFDEF_REN | ROFFDEF_STD) 57 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ 58 59 /* --- data types --------------------------------------------------------- */ 60 61 /* 62 * An incredibly-simple string buffer. 63 */ 64 struct roffstr { 65 char *p; /* nil-terminated buffer */ 66 size_t sz; /* saved strlen(p) */ 67 }; 68 69 /* 70 * A key-value roffstr pair as part of a singly-linked list. 71 */ 72 struct roffkv { 73 struct roffstr key; 74 struct roffstr val; 75 struct roffkv *next; /* next in list */ 76 }; 77 78 /* 79 * A single number register as part of a singly-linked list. 80 */ 81 struct roffreg { 82 struct roffstr key; 83 int val; 84 int step; 85 struct roffreg *next; 86 }; 87 88 /* 89 * Association of request and macro names with token IDs. 90 */ 91 struct roffreq { 92 enum roff_tok tok; 93 char name[]; 94 }; 95 96 /* 97 * A macro processing context. 98 * More than one is needed when macro calls are nested. 99 */ 100 struct mctx { 101 char **argv; 102 int argc; 103 int argsz; 104 }; 105 106 struct roff { 107 struct roff_man *man; /* mdoc or man parser */ 108 struct roffnode *last; /* leaf of stack */ 109 struct mctx *mstack; /* stack of macro contexts */ 110 int *rstack; /* stack of inverted `ie' values */ 111 struct ohash *reqtab; /* request lookup table */ 112 struct roffreg *regtab; /* number registers */ 113 struct roffkv *strtab; /* user-defined strings & macros */ 114 struct roffkv *rentab; /* renamed strings & macros */ 115 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 116 struct roffstr *xtab; /* single-byte trans table (`tr') */ 117 const char *current_string; /* value of last called user macro */ 118 struct tbl_node *first_tbl; /* first table parsed */ 119 struct tbl_node *last_tbl; /* last table parsed */ 120 struct tbl_node *tbl; /* current table being parsed */ 121 struct eqn_node *last_eqn; /* equation parser */ 122 struct eqn_node *eqn; /* active equation parser */ 123 int eqn_inline; /* current equation is inline */ 124 int options; /* parse options */ 125 int mstacksz; /* current size of mstack */ 126 int mstackpos; /* position in mstack */ 127 int rstacksz; /* current size limit of rstack */ 128 int rstackpos; /* position in rstack */ 129 int format; /* current file in mdoc or man format */ 130 char control; /* control character */ 131 char escape; /* escape character */ 132 }; 133 134 /* 135 * A macro definition, condition, or ignored block. 136 */ 137 struct roffnode { 138 enum roff_tok tok; /* type of node */ 139 struct roffnode *parent; /* up one in stack */ 140 int line; /* parse line */ 141 int col; /* parse col */ 142 char *name; /* node name, e.g. macro name */ 143 char *end; /* custom end macro of the block */ 144 int endspan; /* scope to: 1=eol 2=next line -1=\} */ 145 int rule; /* content is: 1=evaluated 0=skipped */ 146 }; 147 148 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 149 enum roff_tok tok, /* tok of macro */ \ 150 struct buf *buf, /* input buffer */ \ 151 int ln, /* parse line */ \ 152 int ppos, /* original pos in buffer */ \ 153 int pos, /* current pos in buffer */ \ 154 int *offs /* reset offset of buffer data */ 155 156 typedef int (*roffproc)(ROFF_ARGS); 157 158 struct roffmac { 159 roffproc proc; /* process new macro */ 160 roffproc text; /* process as child text of macro */ 161 roffproc sub; /* process as child of macro */ 162 int flags; 163 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 164 }; 165 166 struct predef { 167 const char *name; /* predefined input name */ 168 const char *str; /* replacement symbol */ 169 }; 170 171 #define PREDEF(__name, __str) \ 172 { (__name), (__str) }, 173 174 /* --- function prototypes ------------------------------------------------ */ 175 176 static int roffnode_cleanscope(struct roff *); 177 static int roffnode_pop(struct roff *); 178 static void roffnode_push(struct roff *, enum roff_tok, 179 const char *, int, int); 180 static void roff_addtbl(struct roff_man *, int, struct tbl_node *); 181 static int roff_als(ROFF_ARGS); 182 static int roff_block(ROFF_ARGS); 183 static int roff_block_text(ROFF_ARGS); 184 static int roff_block_sub(ROFF_ARGS); 185 static int roff_break(ROFF_ARGS); 186 static int roff_cblock(ROFF_ARGS); 187 static int roff_cc(ROFF_ARGS); 188 static int roff_ccond(struct roff *, int, int); 189 static int roff_char(ROFF_ARGS); 190 static int roff_cond(ROFF_ARGS); 191 static int roff_cond_text(ROFF_ARGS); 192 static int roff_cond_sub(ROFF_ARGS); 193 static int roff_ds(ROFF_ARGS); 194 static int roff_ec(ROFF_ARGS); 195 static int roff_eo(ROFF_ARGS); 196 static int roff_eqndelim(struct roff *, struct buf *, int); 197 static int roff_evalcond(struct roff *r, int, char *, int *); 198 static int roff_evalnum(struct roff *, int, 199 const char *, int *, int *, int); 200 static int roff_evalpar(struct roff *, int, 201 const char *, int *, int *, int); 202 static int roff_evalstrcond(const char *, int *); 203 static int roff_expand(struct roff *, struct buf *, 204 int, int, char); 205 static void roff_free1(struct roff *); 206 static void roff_freereg(struct roffreg *); 207 static void roff_freestr(struct roffkv *); 208 static size_t roff_getname(struct roff *, char **, int, int); 209 static int roff_getnum(const char *, int *, int *, int); 210 static int roff_getop(const char *, int *, char *); 211 static int roff_getregn(struct roff *, 212 const char *, size_t, char); 213 static int roff_getregro(const struct roff *, 214 const char *name); 215 static const char *roff_getstrn(struct roff *, 216 const char *, size_t, int *); 217 static int roff_hasregn(const struct roff *, 218 const char *, size_t); 219 static int roff_insec(ROFF_ARGS); 220 static int roff_it(ROFF_ARGS); 221 static int roff_line_ignore(ROFF_ARGS); 222 static void roff_man_alloc1(struct roff_man *); 223 static void roff_man_free1(struct roff_man *); 224 static int roff_manyarg(ROFF_ARGS); 225 static int roff_noarg(ROFF_ARGS); 226 static int roff_nop(ROFF_ARGS); 227 static int roff_nr(ROFF_ARGS); 228 static int roff_onearg(ROFF_ARGS); 229 static enum roff_tok roff_parse(struct roff *, char *, int *, 230 int, int); 231 static int roff_parsetext(struct roff *, struct buf *, 232 int, int *); 233 static int roff_renamed(ROFF_ARGS); 234 static int roff_return(ROFF_ARGS); 235 static int roff_rm(ROFF_ARGS); 236 static int roff_rn(ROFF_ARGS); 237 static int roff_rr(ROFF_ARGS); 238 static void roff_setregn(struct roff *, const char *, 239 size_t, int, char, int); 240 static void roff_setstr(struct roff *, 241 const char *, const char *, int); 242 static void roff_setstrn(struct roffkv **, const char *, 243 size_t, const char *, size_t, int); 244 static int roff_shift(ROFF_ARGS); 245 static int roff_so(ROFF_ARGS); 246 static int roff_tr(ROFF_ARGS); 247 static int roff_Dd(ROFF_ARGS); 248 static int roff_TE(ROFF_ARGS); 249 static int roff_TS(ROFF_ARGS); 250 static int roff_EQ(ROFF_ARGS); 251 static int roff_EN(ROFF_ARGS); 252 static int roff_T_(ROFF_ARGS); 253 static int roff_unsupp(ROFF_ARGS); 254 static int roff_userdef(ROFF_ARGS); 255 256 /* --- constant data ------------------------------------------------------ */ 257 258 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 259 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 260 261 const char *__roff_name[MAN_MAX + 1] = { 262 "br", "ce", "fi", "ft", 263 "ll", "mc", "nf", 264 "po", "rj", "sp", 265 "ta", "ti", NULL, 266 "ab", "ad", "af", "aln", 267 "als", "am", "am1", "ami", 268 "ami1", "as", "as1", "asciify", 269 "backtrace", "bd", "bleedat", "blm", 270 "box", "boxa", "bp", "BP", 271 "break", "breakchar", "brnl", "brp", 272 "brpnl", "c2", "cc", 273 "cf", "cflags", "ch", "char", 274 "chop", "class", "close", "CL", 275 "color", "composite", "continue", "cp", 276 "cropat", "cs", "cu", "da", 277 "dch", "Dd", "de", "de1", 278 "defcolor", "dei", "dei1", "device", 279 "devicem", "di", "do", "ds", 280 "ds1", "dwh", "dt", "ec", 281 "ecr", "ecs", "el", "em", 282 "EN", "eo", "EP", "EQ", 283 "errprint", "ev", "evc", "ex", 284 "fallback", "fam", "fc", "fchar", 285 "fcolor", "fdeferlig", "feature", "fkern", 286 "fl", "flig", "fp", "fps", 287 "fschar", "fspacewidth", "fspecial", "ftr", 288 "fzoom", "gcolor", "hc", "hcode", 289 "hidechar", "hla", "hlm", "hpf", 290 "hpfa", "hpfcode", "hw", "hy", 291 "hylang", "hylen", "hym", "hypp", 292 "hys", "ie", "if", "ig", 293 "index", "it", "itc", "IX", 294 "kern", "kernafter", "kernbefore", "kernpair", 295 "lc", "lc_ctype", "lds", "length", 296 "letadj", "lf", "lg", "lhang", 297 "linetabs", "lnr", "lnrf", "lpfx", 298 "ls", "lsm", "lt", 299 "mediasize", "minss", "mk", "mso", 300 "na", "ne", "nh", "nhychar", 301 "nm", "nn", "nop", "nr", 302 "nrf", "nroff", "ns", "nx", 303 "open", "opena", "os", "output", 304 "padj", "papersize", "pc", "pev", 305 "pi", "PI", "pl", "pm", 306 "pn", "pnr", "ps", 307 "psbb", "pshape", "pso", "ptr", 308 "pvs", "rchar", "rd", "recursionlimit", 309 "return", "rfschar", "rhang", 310 "rm", "rn", "rnn", "rr", 311 "rs", "rt", "schar", "sentchar", 312 "shc", "shift", "sizes", "so", 313 "spacewidth", "special", "spreadwarn", "ss", 314 "sty", "substring", "sv", "sy", 315 "T&", "tc", "TE", 316 "TH", "tkf", "tl", 317 "tm", "tm1", "tmc", "tr", 318 "track", "transchar", "trf", "trimat", 319 "trin", "trnt", "troff", "TS", 320 "uf", "ul", "unformat", "unwatch", 321 "unwatchn", "vpt", "vs", "warn", 322 "warnscale", "watch", "watchlength", "watchn", 323 "wh", "while", "write", "writec", 324 "writem", "xflag", ".", NULL, 325 NULL, "text", 326 "Dd", "Dt", "Os", "Sh", 327 "Ss", "Pp", "D1", "Dl", 328 "Bd", "Ed", "Bl", "El", 329 "It", "Ad", "An", "Ap", 330 "Ar", "Cd", "Cm", "Dv", 331 "Er", "Ev", "Ex", "Fa", 332 "Fd", "Fl", "Fn", "Ft", 333 "Ic", "In", "Li", "Nd", 334 "Nm", "Op", "Ot", "Pa", 335 "Rv", "St", "Va", "Vt", 336 "Xr", "%A", "%B", "%D", 337 "%I", "%J", "%N", "%O", 338 "%P", "%R", "%T", "%V", 339 "Ac", "Ao", "Aq", "At", 340 "Bc", "Bf", "Bo", "Bq", 341 "Bsx", "Bx", "Db", "Dc", 342 "Do", "Dq", "Ec", "Ef", 343 "Em", "Eo", "Fx", "Ms", 344 "No", "Ns", "Nx", "Ox", 345 "Pc", "Pf", "Po", "Pq", 346 "Qc", "Ql", "Qo", "Qq", 347 "Re", "Rs", "Sc", "So", 348 "Sq", "Sm", "Sx", "Sy", 349 "Tn", "Ux", "Xc", "Xo", 350 "Fo", "Fc", "Oo", "Oc", 351 "Bk", "Ek", "Bt", "Hf", 352 "Fr", "Ud", "Lb", "Lp", 353 "Lk", "Mt", "Brq", "Bro", 354 "Brc", "%C", "Es", "En", 355 "Dx", "%Q", "%U", "Ta", 356 NULL, 357 "TH", "SH", "SS", "TP", 358 "TQ", 359 "LP", "PP", "P", "IP", 360 "HP", "SM", "SB", "BI", 361 "IB", "BR", "RB", "R", 362 "B", "I", "IR", "RI", 363 "RE", "RS", "DT", "UC", 364 "PD", "AT", "in", 365 "SY", "YS", "OP", 366 "EX", "EE", "UR", 367 "UE", "MT", "ME", NULL 368 }; 369 const char *const *roff_name = __roff_name; 370 371 static struct roffmac roffs[TOKEN_NONE] = { 372 { roff_noarg, NULL, NULL, 0 }, /* br */ 373 { roff_onearg, NULL, NULL, 0 }, /* ce */ 374 { roff_noarg, NULL, NULL, 0 }, /* fi */ 375 { roff_onearg, NULL, NULL, 0 }, /* ft */ 376 { roff_onearg, NULL, NULL, 0 }, /* ll */ 377 { roff_onearg, NULL, NULL, 0 }, /* mc */ 378 { roff_noarg, NULL, NULL, 0 }, /* nf */ 379 { roff_onearg, NULL, NULL, 0 }, /* po */ 380 { roff_onearg, NULL, NULL, 0 }, /* rj */ 381 { roff_onearg, NULL, NULL, 0 }, /* sp */ 382 { roff_manyarg, NULL, NULL, 0 }, /* ta */ 383 { roff_onearg, NULL, NULL, 0 }, /* ti */ 384 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ 385 { roff_unsupp, NULL, NULL, 0 }, /* ab */ 386 { roff_line_ignore, NULL, NULL, 0 }, /* ad */ 387 { roff_line_ignore, NULL, NULL, 0 }, /* af */ 388 { roff_unsupp, NULL, NULL, 0 }, /* aln */ 389 { roff_als, NULL, NULL, 0 }, /* als */ 390 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ 391 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ 392 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ 393 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ 394 { roff_ds, NULL, NULL, 0 }, /* as */ 395 { roff_ds, NULL, NULL, 0 }, /* as1 */ 396 { roff_unsupp, NULL, NULL, 0 }, /* asciify */ 397 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ 398 { roff_line_ignore, NULL, NULL, 0 }, /* bd */ 399 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ 400 { roff_unsupp, NULL, NULL, 0 }, /* blm */ 401 { roff_unsupp, NULL, NULL, 0 }, /* box */ 402 { roff_unsupp, NULL, NULL, 0 }, /* boxa */ 403 { roff_line_ignore, NULL, NULL, 0 }, /* bp */ 404 { roff_unsupp, NULL, NULL, 0 }, /* BP */ 405 { roff_break, NULL, NULL, 0 }, /* break */ 406 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ 407 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ 408 { roff_noarg, NULL, NULL, 0 }, /* brp */ 409 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ 410 { roff_unsupp, NULL, NULL, 0 }, /* c2 */ 411 { roff_cc, NULL, NULL, 0 }, /* cc */ 412 { roff_insec, NULL, NULL, 0 }, /* cf */ 413 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ 414 { roff_line_ignore, NULL, NULL, 0 }, /* ch */ 415 { roff_char, NULL, NULL, 0 }, /* char */ 416 { roff_unsupp, NULL, NULL, 0 }, /* chop */ 417 { roff_line_ignore, NULL, NULL, 0 }, /* class */ 418 { roff_insec, NULL, NULL, 0 }, /* close */ 419 { roff_unsupp, NULL, NULL, 0 }, /* CL */ 420 { roff_line_ignore, NULL, NULL, 0 }, /* color */ 421 { roff_unsupp, NULL, NULL, 0 }, /* composite */ 422 { roff_unsupp, NULL, NULL, 0 }, /* continue */ 423 { roff_line_ignore, NULL, NULL, 0 }, /* cp */ 424 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ 425 { roff_line_ignore, NULL, NULL, 0 }, /* cs */ 426 { roff_line_ignore, NULL, NULL, 0 }, /* cu */ 427 { roff_unsupp, NULL, NULL, 0 }, /* da */ 428 { roff_unsupp, NULL, NULL, 0 }, /* dch */ 429 { roff_Dd, NULL, NULL, 0 }, /* Dd */ 430 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ 431 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ 432 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ 433 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ 434 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ 435 { roff_unsupp, NULL, NULL, 0 }, /* device */ 436 { roff_unsupp, NULL, NULL, 0 }, /* devicem */ 437 { roff_unsupp, NULL, NULL, 0 }, /* di */ 438 { roff_unsupp, NULL, NULL, 0 }, /* do */ 439 { roff_ds, NULL, NULL, 0 }, /* ds */ 440 { roff_ds, NULL, NULL, 0 }, /* ds1 */ 441 { roff_unsupp, NULL, NULL, 0 }, /* dwh */ 442 { roff_unsupp, NULL, NULL, 0 }, /* dt */ 443 { roff_ec, NULL, NULL, 0 }, /* ec */ 444 { roff_unsupp, NULL, NULL, 0 }, /* ecr */ 445 { roff_unsupp, NULL, NULL, 0 }, /* ecs */ 446 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ 447 { roff_unsupp, NULL, NULL, 0 }, /* em */ 448 { roff_EN, NULL, NULL, 0 }, /* EN */ 449 { roff_eo, NULL, NULL, 0 }, /* eo */ 450 { roff_unsupp, NULL, NULL, 0 }, /* EP */ 451 { roff_EQ, NULL, NULL, 0 }, /* EQ */ 452 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ 453 { roff_unsupp, NULL, NULL, 0 }, /* ev */ 454 { roff_unsupp, NULL, NULL, 0 }, /* evc */ 455 { roff_unsupp, NULL, NULL, 0 }, /* ex */ 456 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ 457 { roff_line_ignore, NULL, NULL, 0 }, /* fam */ 458 { roff_unsupp, NULL, NULL, 0 }, /* fc */ 459 { roff_unsupp, NULL, NULL, 0 }, /* fchar */ 460 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ 461 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ 462 { roff_line_ignore, NULL, NULL, 0 }, /* feature */ 463 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ 464 { roff_line_ignore, NULL, NULL, 0 }, /* fl */ 465 { roff_line_ignore, NULL, NULL, 0 }, /* flig */ 466 { roff_line_ignore, NULL, NULL, 0 }, /* fp */ 467 { roff_line_ignore, NULL, NULL, 0 }, /* fps */ 468 { roff_unsupp, NULL, NULL, 0 }, /* fschar */ 469 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ 470 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ 471 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ 472 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ 473 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ 474 { roff_line_ignore, NULL, NULL, 0 }, /* hc */ 475 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ 476 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ 477 { roff_line_ignore, NULL, NULL, 0 }, /* hla */ 478 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ 479 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ 480 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ 481 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ 482 { roff_line_ignore, NULL, NULL, 0 }, /* hw */ 483 { roff_line_ignore, NULL, NULL, 0 }, /* hy */ 484 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ 485 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ 486 { roff_line_ignore, NULL, NULL, 0 }, /* hym */ 487 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ 488 { roff_line_ignore, NULL, NULL, 0 }, /* hys */ 489 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ 490 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ 491 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ 492 { roff_unsupp, NULL, NULL, 0 }, /* index */ 493 { roff_it, NULL, NULL, 0 }, /* it */ 494 { roff_unsupp, NULL, NULL, 0 }, /* itc */ 495 { roff_line_ignore, NULL, NULL, 0 }, /* IX */ 496 { roff_line_ignore, NULL, NULL, 0 }, /* kern */ 497 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ 498 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ 499 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ 500 { roff_unsupp, NULL, NULL, 0 }, /* lc */ 501 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ 502 { roff_unsupp, NULL, NULL, 0 }, /* lds */ 503 { roff_unsupp, NULL, NULL, 0 }, /* length */ 504 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ 505 { roff_insec, NULL, NULL, 0 }, /* lf */ 506 { roff_line_ignore, NULL, NULL, 0 }, /* lg */ 507 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ 508 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ 509 { roff_unsupp, NULL, NULL, 0 }, /* lnr */ 510 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ 511 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ 512 { roff_line_ignore, NULL, NULL, 0 }, /* ls */ 513 { roff_unsupp, NULL, NULL, 0 }, /* lsm */ 514 { roff_line_ignore, NULL, NULL, 0 }, /* lt */ 515 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ 516 { roff_line_ignore, NULL, NULL, 0 }, /* minss */ 517 { roff_line_ignore, NULL, NULL, 0 }, /* mk */ 518 { roff_insec, NULL, NULL, 0 }, /* mso */ 519 { roff_line_ignore, NULL, NULL, 0 }, /* na */ 520 { roff_line_ignore, NULL, NULL, 0 }, /* ne */ 521 { roff_line_ignore, NULL, NULL, 0 }, /* nh */ 522 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ 523 { roff_unsupp, NULL, NULL, 0 }, /* nm */ 524 { roff_unsupp, NULL, NULL, 0 }, /* nn */ 525 { roff_nop, NULL, NULL, 0 }, /* nop */ 526 { roff_nr, NULL, NULL, 0 }, /* nr */ 527 { roff_unsupp, NULL, NULL, 0 }, /* nrf */ 528 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ 529 { roff_line_ignore, NULL, NULL, 0 }, /* ns */ 530 { roff_insec, NULL, NULL, 0 }, /* nx */ 531 { roff_insec, NULL, NULL, 0 }, /* open */ 532 { roff_insec, NULL, NULL, 0 }, /* opena */ 533 { roff_line_ignore, NULL, NULL, 0 }, /* os */ 534 { roff_unsupp, NULL, NULL, 0 }, /* output */ 535 { roff_line_ignore, NULL, NULL, 0 }, /* padj */ 536 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ 537 { roff_line_ignore, NULL, NULL, 0 }, /* pc */ 538 { roff_line_ignore, NULL, NULL, 0 }, /* pev */ 539 { roff_insec, NULL, NULL, 0 }, /* pi */ 540 { roff_unsupp, NULL, NULL, 0 }, /* PI */ 541 { roff_line_ignore, NULL, NULL, 0 }, /* pl */ 542 { roff_line_ignore, NULL, NULL, 0 }, /* pm */ 543 { roff_line_ignore, NULL, NULL, 0 }, /* pn */ 544 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ 545 { roff_line_ignore, NULL, NULL, 0 }, /* ps */ 546 { roff_unsupp, NULL, NULL, 0 }, /* psbb */ 547 { roff_unsupp, NULL, NULL, 0 }, /* pshape */ 548 { roff_insec, NULL, NULL, 0 }, /* pso */ 549 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ 550 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ 551 { roff_unsupp, NULL, NULL, 0 }, /* rchar */ 552 { roff_line_ignore, NULL, NULL, 0 }, /* rd */ 553 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ 554 { roff_return, NULL, NULL, 0 }, /* return */ 555 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ 556 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ 557 { roff_rm, NULL, NULL, 0 }, /* rm */ 558 { roff_rn, NULL, NULL, 0 }, /* rn */ 559 { roff_unsupp, NULL, NULL, 0 }, /* rnn */ 560 { roff_rr, NULL, NULL, 0 }, /* rr */ 561 { roff_line_ignore, NULL, NULL, 0 }, /* rs */ 562 { roff_line_ignore, NULL, NULL, 0 }, /* rt */ 563 { roff_unsupp, NULL, NULL, 0 }, /* schar */ 564 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ 565 { roff_line_ignore, NULL, NULL, 0 }, /* shc */ 566 { roff_shift, NULL, NULL, 0 }, /* shift */ 567 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ 568 { roff_so, NULL, NULL, 0 }, /* so */ 569 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ 570 { roff_line_ignore, NULL, NULL, 0 }, /* special */ 571 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ 572 { roff_line_ignore, NULL, NULL, 0 }, /* ss */ 573 { roff_line_ignore, NULL, NULL, 0 }, /* sty */ 574 { roff_unsupp, NULL, NULL, 0 }, /* substring */ 575 { roff_line_ignore, NULL, NULL, 0 }, /* sv */ 576 { roff_insec, NULL, NULL, 0 }, /* sy */ 577 { roff_T_, NULL, NULL, 0 }, /* T& */ 578 { roff_unsupp, NULL, NULL, 0 }, /* tc */ 579 { roff_TE, NULL, NULL, 0 }, /* TE */ 580 { roff_Dd, NULL, NULL, 0 }, /* TH */ 581 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ 582 { roff_unsupp, NULL, NULL, 0 }, /* tl */ 583 { roff_line_ignore, NULL, NULL, 0 }, /* tm */ 584 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ 585 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ 586 { roff_tr, NULL, NULL, 0 }, /* tr */ 587 { roff_line_ignore, NULL, NULL, 0 }, /* track */ 588 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ 589 { roff_insec, NULL, NULL, 0 }, /* trf */ 590 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ 591 { roff_unsupp, NULL, NULL, 0 }, /* trin */ 592 { roff_unsupp, NULL, NULL, 0 }, /* trnt */ 593 { roff_line_ignore, NULL, NULL, 0 }, /* troff */ 594 { roff_TS, NULL, NULL, 0 }, /* TS */ 595 { roff_line_ignore, NULL, NULL, 0 }, /* uf */ 596 { roff_line_ignore, NULL, NULL, 0 }, /* ul */ 597 { roff_unsupp, NULL, NULL, 0 }, /* unformat */ 598 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ 599 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ 600 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ 601 { roff_line_ignore, NULL, NULL, 0 }, /* vs */ 602 { roff_line_ignore, NULL, NULL, 0 }, /* warn */ 603 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ 604 { roff_line_ignore, NULL, NULL, 0 }, /* watch */ 605 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ 606 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ 607 { roff_unsupp, NULL, NULL, 0 }, /* wh */ 608 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/ 609 { roff_insec, NULL, NULL, 0 }, /* write */ 610 { roff_insec, NULL, NULL, 0 }, /* writec */ 611 { roff_insec, NULL, NULL, 0 }, /* writem */ 612 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ 613 { roff_cblock, NULL, NULL, 0 }, /* . */ 614 { roff_renamed, NULL, NULL, 0 }, 615 { roff_userdef, NULL, NULL, 0 } 616 }; 617 618 /* Array of injected predefined strings. */ 619 #define PREDEFS_MAX 38 620 static const struct predef predefs[PREDEFS_MAX] = { 621 #include "predefs.in" 622 }; 623 624 static int roffce_lines; /* number of input lines to center */ 625 static struct roff_node *roffce_node; /* active request */ 626 static int roffit_lines; /* number of lines to delay */ 627 static char *roffit_macro; /* nil-terminated macro line */ 628 629 630 /* --- request table ------------------------------------------------------ */ 631 632 struct ohash * 633 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) 634 { 635 struct ohash *htab; 636 struct roffreq *req; 637 enum roff_tok tok; 638 size_t sz; 639 unsigned int slot; 640 641 htab = mandoc_malloc(sizeof(*htab)); 642 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); 643 644 for (tok = mintok; tok < maxtok; tok++) { 645 if (roff_name[tok] == NULL) 646 continue; 647 sz = strlen(roff_name[tok]); 648 req = mandoc_malloc(sizeof(*req) + sz + 1); 649 req->tok = tok; 650 memcpy(req->name, roff_name[tok], sz + 1); 651 slot = ohash_qlookup(htab, req->name); 652 ohash_insert(htab, slot, req); 653 } 654 return htab; 655 } 656 657 void 658 roffhash_free(struct ohash *htab) 659 { 660 struct roffreq *req; 661 unsigned int slot; 662 663 if (htab == NULL) 664 return; 665 for (req = ohash_first(htab, &slot); req != NULL; 666 req = ohash_next(htab, &slot)) 667 free(req); 668 ohash_delete(htab); 669 free(htab); 670 } 671 672 enum roff_tok 673 roffhash_find(struct ohash *htab, const char *name, size_t sz) 674 { 675 struct roffreq *req; 676 const char *end; 677 678 if (sz) { 679 end = name + sz; 680 req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); 681 } else 682 req = ohash_find(htab, ohash_qlookup(htab, name)); 683 return req == NULL ? TOKEN_NONE : req->tok; 684 } 685 686 /* --- stack of request blocks -------------------------------------------- */ 687 688 /* 689 * Pop the current node off of the stack of roff instructions currently 690 * pending. Return 1 if it is a loop or 0 otherwise. 691 */ 692 static int 693 roffnode_pop(struct roff *r) 694 { 695 struct roffnode *p; 696 int inloop; 697 698 p = r->last; 699 inloop = p->tok == ROFF_while; 700 r->last = p->parent; 701 free(p->name); 702 free(p->end); 703 free(p); 704 return inloop; 705 } 706 707 /* 708 * Push a roff node onto the instruction stack. This must later be 709 * removed with roffnode_pop(). 710 */ 711 static void 712 roffnode_push(struct roff *r, enum roff_tok tok, const char *name, 713 int line, int col) 714 { 715 struct roffnode *p; 716 717 p = mandoc_calloc(1, sizeof(struct roffnode)); 718 p->tok = tok; 719 if (name) 720 p->name = mandoc_strdup(name); 721 p->parent = r->last; 722 p->line = line; 723 p->col = col; 724 p->rule = p->parent ? p->parent->rule : 0; 725 726 r->last = p; 727 } 728 729 /* --- roff parser state data management ---------------------------------- */ 730 731 static void 732 roff_free1(struct roff *r) 733 { 734 int i; 735 736 tbl_free(r->first_tbl); 737 r->first_tbl = r->last_tbl = r->tbl = NULL; 738 739 eqn_free(r->last_eqn); 740 r->last_eqn = r->eqn = NULL; 741 742 while (r->mstackpos >= 0) 743 roff_userret(r); 744 745 while (r->last) 746 roffnode_pop(r); 747 748 free (r->rstack); 749 r->rstack = NULL; 750 r->rstacksz = 0; 751 r->rstackpos = -1; 752 753 roff_freereg(r->regtab); 754 r->regtab = NULL; 755 756 roff_freestr(r->strtab); 757 roff_freestr(r->rentab); 758 roff_freestr(r->xmbtab); 759 r->strtab = r->rentab = r->xmbtab = NULL; 760 761 if (r->xtab) 762 for (i = 0; i < 128; i++) 763 free(r->xtab[i].p); 764 free(r->xtab); 765 r->xtab = NULL; 766 } 767 768 void 769 roff_reset(struct roff *r) 770 { 771 roff_free1(r); 772 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 773 r->control = '\0'; 774 r->escape = '\\'; 775 roffce_lines = 0; 776 roffce_node = NULL; 777 roffit_lines = 0; 778 roffit_macro = NULL; 779 } 780 781 void 782 roff_free(struct roff *r) 783 { 784 int i; 785 786 roff_free1(r); 787 for (i = 0; i < r->mstacksz; i++) 788 free(r->mstack[i].argv); 789 free(r->mstack); 790 roffhash_free(r->reqtab); 791 free(r); 792 } 793 794 struct roff * 795 roff_alloc(int options) 796 { 797 struct roff *r; 798 799 r = mandoc_calloc(1, sizeof(struct roff)); 800 r->reqtab = roffhash_alloc(0, ROFF_RENAMED); 801 r->options = options; 802 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 803 r->mstackpos = -1; 804 r->rstackpos = -1; 805 r->escape = '\\'; 806 return r; 807 } 808 809 /* --- syntax tree state data management ---------------------------------- */ 810 811 static void 812 roff_man_free1(struct roff_man *man) 813 { 814 if (man->meta.first != NULL) 815 roff_node_delete(man, man->meta.first); 816 free(man->meta.msec); 817 free(man->meta.vol); 818 free(man->meta.os); 819 free(man->meta.arch); 820 free(man->meta.title); 821 free(man->meta.name); 822 free(man->meta.date); 823 free(man->meta.sodest); 824 } 825 826 void 827 roff_state_reset(struct roff_man *man) 828 { 829 man->last = man->meta.first; 830 man->last_es = NULL; 831 man->flags = 0; 832 man->lastsec = man->lastnamed = SEC_NONE; 833 man->next = ROFF_NEXT_CHILD; 834 roff_setreg(man->roff, "nS", 0, '='); 835 } 836 837 static void 838 roff_man_alloc1(struct roff_man *man) 839 { 840 memset(&man->meta, 0, sizeof(man->meta)); 841 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first)); 842 man->meta.first->type = ROFFT_ROOT; 843 man->meta.macroset = MACROSET_NONE; 844 roff_state_reset(man); 845 } 846 847 void 848 roff_man_reset(struct roff_man *man) 849 { 850 roff_man_free1(man); 851 roff_man_alloc1(man); 852 } 853 854 void 855 roff_man_free(struct roff_man *man) 856 { 857 roff_man_free1(man); 858 free(man); 859 } 860 861 struct roff_man * 862 roff_man_alloc(struct roff *roff, const char *os_s, int quick) 863 { 864 struct roff_man *man; 865 866 man = mandoc_calloc(1, sizeof(*man)); 867 man->roff = roff; 868 man->os_s = os_s; 869 man->quick = quick; 870 roff_man_alloc1(man); 871 roff->man = man; 872 return man; 873 } 874 875 /* --- syntax tree handling ----------------------------------------------- */ 876 877 struct roff_node * 878 roff_node_alloc(struct roff_man *man, int line, int pos, 879 enum roff_type type, int tok) 880 { 881 struct roff_node *n; 882 883 n = mandoc_calloc(1, sizeof(*n)); 884 n->line = line; 885 n->pos = pos; 886 n->tok = tok; 887 n->type = type; 888 n->sec = man->lastsec; 889 890 if (man->flags & MDOC_SYNOPSIS) 891 n->flags |= NODE_SYNPRETTY; 892 else 893 n->flags &= ~NODE_SYNPRETTY; 894 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL) 895 n->flags |= NODE_NOFILL; 896 else 897 n->flags &= ~NODE_NOFILL; 898 if (man->flags & MDOC_NEWLINE) 899 n->flags |= NODE_LINE; 900 man->flags &= ~MDOC_NEWLINE; 901 902 return n; 903 } 904 905 void 906 roff_node_append(struct roff_man *man, struct roff_node *n) 907 { 908 909 switch (man->next) { 910 case ROFF_NEXT_SIBLING: 911 if (man->last->next != NULL) { 912 n->next = man->last->next; 913 man->last->next->prev = n; 914 } else 915 man->last->parent->last = n; 916 man->last->next = n; 917 n->prev = man->last; 918 n->parent = man->last->parent; 919 break; 920 case ROFF_NEXT_CHILD: 921 if (man->last->child != NULL) { 922 n->next = man->last->child; 923 man->last->child->prev = n; 924 } else 925 man->last->last = n; 926 man->last->child = n; 927 n->parent = man->last; 928 break; 929 default: 930 abort(); 931 } 932 man->last = n; 933 934 switch (n->type) { 935 case ROFFT_HEAD: 936 n->parent->head = n; 937 break; 938 case ROFFT_BODY: 939 if (n->end != ENDBODY_NOT) 940 return; 941 n->parent->body = n; 942 break; 943 case ROFFT_TAIL: 944 n->parent->tail = n; 945 break; 946 default: 947 return; 948 } 949 950 /* 951 * Copy over the normalised-data pointer of our parent. Not 952 * everybody has one, but copying a null pointer is fine. 953 */ 954 955 n->norm = n->parent->norm; 956 assert(n->parent->type == ROFFT_BLOCK); 957 } 958 959 void 960 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 961 { 962 struct roff_node *n; 963 964 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 965 n->string = roff_strdup(man->roff, word); 966 roff_node_append(man, n); 967 n->flags |= NODE_VALID | NODE_ENDED; 968 man->next = ROFF_NEXT_SIBLING; 969 } 970 971 void 972 roff_word_append(struct roff_man *man, const char *word) 973 { 974 struct roff_node *n; 975 char *addstr, *newstr; 976 977 n = man->last; 978 addstr = roff_strdup(man->roff, word); 979 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 980 free(addstr); 981 free(n->string); 982 n->string = newstr; 983 man->next = ROFF_NEXT_SIBLING; 984 } 985 986 void 987 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 988 { 989 struct roff_node *n; 990 991 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 992 roff_node_append(man, n); 993 man->next = ROFF_NEXT_CHILD; 994 } 995 996 struct roff_node * 997 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 998 { 999 struct roff_node *n; 1000 1001 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 1002 roff_node_append(man, n); 1003 man->next = ROFF_NEXT_CHILD; 1004 return n; 1005 } 1006 1007 struct roff_node * 1008 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 1009 { 1010 struct roff_node *n; 1011 1012 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 1013 roff_node_append(man, n); 1014 man->next = ROFF_NEXT_CHILD; 1015 return n; 1016 } 1017 1018 struct roff_node * 1019 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 1020 { 1021 struct roff_node *n; 1022 1023 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 1024 roff_node_append(man, n); 1025 man->next = ROFF_NEXT_CHILD; 1026 return n; 1027 } 1028 1029 static void 1030 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl) 1031 { 1032 struct roff_node *n; 1033 struct tbl_span *span; 1034 1035 if (man->meta.macroset == MACROSET_MAN) 1036 man_breakscope(man, ROFF_TS); 1037 while ((span = tbl_span(tbl)) != NULL) { 1038 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE); 1039 n->span = span; 1040 roff_node_append(man, n); 1041 n->flags |= NODE_VALID | NODE_ENDED; 1042 man->next = ROFF_NEXT_SIBLING; 1043 } 1044 } 1045 1046 void 1047 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1048 { 1049 1050 /* Adjust siblings. */ 1051 1052 if (n->prev) 1053 n->prev->next = n->next; 1054 if (n->next) 1055 n->next->prev = n->prev; 1056 1057 /* Adjust parent. */ 1058 1059 if (n->parent != NULL) { 1060 if (n->parent->child == n) 1061 n->parent->child = n->next; 1062 if (n->parent->last == n) 1063 n->parent->last = n->prev; 1064 } 1065 1066 /* Adjust parse point. */ 1067 1068 if (man == NULL) 1069 return; 1070 if (man->last == n) { 1071 if (n->prev == NULL) { 1072 man->last = n->parent; 1073 man->next = ROFF_NEXT_CHILD; 1074 } else { 1075 man->last = n->prev; 1076 man->next = ROFF_NEXT_SIBLING; 1077 } 1078 } 1079 if (man->meta.first == n) 1080 man->meta.first = NULL; 1081 } 1082 1083 void 1084 roff_node_relink(struct roff_man *man, struct roff_node *n) 1085 { 1086 roff_node_unlink(man, n); 1087 n->prev = n->next = NULL; 1088 roff_node_append(man, n); 1089 } 1090 1091 void 1092 roff_node_free(struct roff_node *n) 1093 { 1094 1095 if (n->args != NULL) 1096 mdoc_argv_free(n->args); 1097 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1098 free(n->norm); 1099 eqn_box_free(n->eqn); 1100 free(n->string); 1101 free(n); 1102 } 1103 1104 void 1105 roff_node_delete(struct roff_man *man, struct roff_node *n) 1106 { 1107 1108 while (n->child != NULL) 1109 roff_node_delete(man, n->child); 1110 roff_node_unlink(man, n); 1111 roff_node_free(n); 1112 } 1113 1114 void 1115 deroff(char **dest, const struct roff_node *n) 1116 { 1117 char *cp; 1118 size_t sz; 1119 1120 if (n->type != ROFFT_TEXT) { 1121 for (n = n->child; n != NULL; n = n->next) 1122 deroff(dest, n); 1123 return; 1124 } 1125 1126 /* Skip leading whitespace. */ 1127 1128 for (cp = n->string; *cp != '\0'; cp++) { 1129 if (cp[0] == '\\' && cp[1] != '\0' && 1130 strchr(" %&0^|~", cp[1]) != NULL) 1131 cp++; 1132 else if ( ! isspace((unsigned char)*cp)) 1133 break; 1134 } 1135 1136 /* Skip trailing backslash. */ 1137 1138 sz = strlen(cp); 1139 if (sz > 0 && cp[sz - 1] == '\\') 1140 sz--; 1141 1142 /* Skip trailing whitespace. */ 1143 1144 for (; sz; sz--) 1145 if ( ! isspace((unsigned char)cp[sz-1])) 1146 break; 1147 1148 /* Skip empty strings. */ 1149 1150 if (sz == 0) 1151 return; 1152 1153 if (*dest == NULL) { 1154 *dest = mandoc_strndup(cp, sz); 1155 return; 1156 } 1157 1158 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1159 free(*dest); 1160 *dest = cp; 1161 } 1162 1163 /* --- main functions of the roff parser ---------------------------------- */ 1164 1165 /* 1166 * In the current line, expand escape sequences that produce parsable 1167 * input text. Also check the syntax of the remaining escape sequences, 1168 * which typically produce output glyphs or change formatter state. 1169 */ 1170 static int 1171 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc) 1172 { 1173 struct mctx *ctx; /* current macro call context */ 1174 char ubuf[24]; /* buffer to print the number */ 1175 struct roff_node *n; /* used for header comments */ 1176 const char *start; /* start of the string to process */ 1177 char *stesc; /* start of an escape sequence ('\\') */ 1178 const char *esct; /* type of esccape sequence */ 1179 char *ep; /* end of comment string */ 1180 const char *stnam; /* start of the name, after "[(*" */ 1181 const char *cp; /* end of the name, e.g. before ']' */ 1182 const char *res; /* the string to be substituted */ 1183 char *nbuf; /* new buffer to copy buf->buf to */ 1184 size_t maxl; /* expected length of the escape name */ 1185 size_t naml; /* actual length of the escape name */ 1186 size_t asz; /* length of the replacement */ 1187 size_t rsz; /* length of the rest of the string */ 1188 int inaml; /* length returned from mandoc_escape() */ 1189 int expand_count; /* to avoid infinite loops */ 1190 int npos; /* position in numeric expression */ 1191 int arg_complete; /* argument not interrupted by eol */ 1192 int quote_args; /* true for \\$@, false for \\$* */ 1193 int done; /* no more input available */ 1194 int deftype; /* type of definition to paste */ 1195 int rcsid; /* kind of RCS id seen */ 1196 enum mandocerr err; /* for escape sequence problems */ 1197 char sign; /* increment number register */ 1198 char term; /* character terminating the escape */ 1199 1200 /* Search forward for comments. */ 1201 1202 done = 0; 1203 start = buf->buf + pos; 1204 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { 1205 if (stesc[0] != newesc || stesc[1] == '\0') 1206 continue; 1207 stesc++; 1208 if (*stesc != '"' && *stesc != '#') 1209 continue; 1210 1211 /* Comment found, look for RCS id. */ 1212 1213 rcsid = 0; 1214 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) { 1215 rcsid = 1 << MANDOC_OS_OPENBSD; 1216 cp += 8; 1217 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) { 1218 rcsid = 1 << MANDOC_OS_NETBSD; 1219 cp += 7; 1220 } 1221 if (cp != NULL && 1222 isalnum((unsigned char)*cp) == 0 && 1223 strchr(cp, '$') != NULL) { 1224 if (r->man->meta.rcsids & rcsid) 1225 mandoc_msg(MANDOCERR_RCS_REP, ln, 1226 (int)(stesc - buf->buf) + 1, 1227 "%s", stesc + 1); 1228 r->man->meta.rcsids |= rcsid; 1229 } 1230 1231 /* Handle trailing whitespace. */ 1232 1233 ep = strchr(stesc--, '\0') - 1; 1234 if (*ep == '\n') { 1235 done = 1; 1236 ep--; 1237 } 1238 if (*ep == ' ' || *ep == '\t') 1239 mandoc_msg(MANDOCERR_SPACE_EOL, 1240 ln, (int)(ep - buf->buf), NULL); 1241 1242 /* 1243 * Save comments preceding the title macro 1244 * in the syntax tree. 1245 */ 1246 1247 if (newesc != ASCII_ESC && r->format == 0) { 1248 while (*ep == ' ' || *ep == '\t') 1249 ep--; 1250 ep[1] = '\0'; 1251 n = roff_node_alloc(r->man, 1252 ln, stesc + 1 - buf->buf, 1253 ROFFT_COMMENT, TOKEN_NONE); 1254 n->string = mandoc_strdup(stesc + 2); 1255 roff_node_append(r->man, n); 1256 n->flags |= NODE_VALID | NODE_ENDED; 1257 r->man->next = ROFF_NEXT_SIBLING; 1258 } 1259 1260 /* Line continuation with comment. */ 1261 1262 if (stesc[1] == '#') { 1263 *stesc = '\0'; 1264 return ROFF_IGN | ROFF_APPEND; 1265 } 1266 1267 /* Discard normal comments. */ 1268 1269 while (stesc > start && stesc[-1] == ' ' && 1270 (stesc == start + 1 || stesc[-2] != '\\')) 1271 stesc--; 1272 *stesc = '\0'; 1273 break; 1274 } 1275 if (stesc == start) 1276 return ROFF_CONT; 1277 stesc--; 1278 1279 /* Notice the end of the input. */ 1280 1281 if (*stesc == '\n') { 1282 *stesc-- = '\0'; 1283 done = 1; 1284 } 1285 1286 expand_count = 0; 1287 while (stesc >= start) { 1288 if (*stesc != newesc) { 1289 1290 /* 1291 * If we have a non-standard escape character, 1292 * escape literal backslashes because all 1293 * processing in subsequent functions uses 1294 * the standard escaping rules. 1295 */ 1296 1297 if (newesc != ASCII_ESC && *stesc == '\\') { 1298 *stesc = '\0'; 1299 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", 1300 buf->buf, stesc + 1) + 1; 1301 start = nbuf + pos; 1302 stesc = nbuf + (stesc - buf->buf); 1303 free(buf->buf); 1304 buf->buf = nbuf; 1305 } 1306 1307 /* Search backwards for the next escape. */ 1308 1309 stesc--; 1310 continue; 1311 } 1312 1313 /* If it is escaped, skip it. */ 1314 1315 for (cp = stesc - 1; cp >= start; cp--) 1316 if (*cp != r->escape) 1317 break; 1318 1319 if ((stesc - cp) % 2 == 0) { 1320 while (stesc > cp) 1321 *stesc-- = '\\'; 1322 continue; 1323 } else if (stesc[1] != '\0') { 1324 *stesc = '\\'; 1325 } else { 1326 *stesc-- = '\0'; 1327 if (done) 1328 continue; 1329 else 1330 return ROFF_IGN | ROFF_APPEND; 1331 } 1332 1333 /* Decide whether to expand or to check only. */ 1334 1335 term = '\0'; 1336 cp = stesc + 1; 1337 if (*cp == 'E') 1338 cp++; 1339 esct = cp; 1340 switch (*esct) { 1341 case '*': 1342 case '$': 1343 res = NULL; 1344 break; 1345 case 'B': 1346 case 'w': 1347 term = cp[1]; 1348 /* FALLTHROUGH */ 1349 case 'n': 1350 sign = cp[1]; 1351 if (sign == '+' || sign == '-') 1352 cp++; 1353 res = ubuf; 1354 break; 1355 default: 1356 err = MANDOCERR_OK; 1357 switch(mandoc_escape(&cp, &stnam, &inaml)) { 1358 case ESCAPE_SPECIAL: 1359 if (mchars_spec2cp(stnam, inaml) >= 0) 1360 break; 1361 /* FALLTHROUGH */ 1362 case ESCAPE_ERROR: 1363 err = MANDOCERR_ESC_BAD; 1364 break; 1365 case ESCAPE_UNDEF: 1366 err = MANDOCERR_ESC_UNDEF; 1367 break; 1368 case ESCAPE_UNSUPP: 1369 err = MANDOCERR_ESC_UNSUPP; 1370 break; 1371 default: 1372 break; 1373 } 1374 if (err != MANDOCERR_OK) 1375 mandoc_msg(err, ln, (int)(stesc - buf->buf), 1376 "%.*s", (int)(cp - stesc), stesc); 1377 stesc--; 1378 continue; 1379 } 1380 1381 if (EXPAND_LIMIT < ++expand_count) { 1382 mandoc_msg(MANDOCERR_ROFFLOOP, 1383 ln, (int)(stesc - buf->buf), NULL); 1384 return ROFF_IGN; 1385 } 1386 1387 /* 1388 * The third character decides the length 1389 * of the name of the string or register. 1390 * Save a pointer to the name. 1391 */ 1392 1393 if (term == '\0') { 1394 switch (*++cp) { 1395 case '\0': 1396 maxl = 0; 1397 break; 1398 case '(': 1399 cp++; 1400 maxl = 2; 1401 break; 1402 case '[': 1403 cp++; 1404 term = ']'; 1405 maxl = 0; 1406 break; 1407 default: 1408 maxl = 1; 1409 break; 1410 } 1411 } else { 1412 cp += 2; 1413 maxl = 0; 1414 } 1415 stnam = cp; 1416 1417 /* Advance to the end of the name. */ 1418 1419 naml = 0; 1420 arg_complete = 1; 1421 while (maxl == 0 || naml < maxl) { 1422 if (*cp == '\0') { 1423 mandoc_msg(MANDOCERR_ESC_BAD, ln, 1424 (int)(stesc - buf->buf), "%s", stesc); 1425 arg_complete = 0; 1426 break; 1427 } 1428 if (maxl == 0 && *cp == term) { 1429 cp++; 1430 break; 1431 } 1432 if (*cp++ != '\\' || *esct != 'w') { 1433 naml++; 1434 continue; 1435 } 1436 switch (mandoc_escape(&cp, NULL, NULL)) { 1437 case ESCAPE_SPECIAL: 1438 case ESCAPE_UNICODE: 1439 case ESCAPE_NUMBERED: 1440 case ESCAPE_UNDEF: 1441 case ESCAPE_OVERSTRIKE: 1442 naml++; 1443 break; 1444 default: 1445 break; 1446 } 1447 } 1448 1449 /* 1450 * Retrieve the replacement string; if it is 1451 * undefined, resume searching for escapes. 1452 */ 1453 1454 switch (*esct) { 1455 case '*': 1456 if (arg_complete) { 1457 deftype = ROFFDEF_USER | ROFFDEF_PRE; 1458 res = roff_getstrn(r, stnam, naml, &deftype); 1459 1460 /* 1461 * If not overriden, let \*(.T 1462 * through to the formatters. 1463 */ 1464 1465 if (res == NULL && naml == 2 && 1466 stnam[0] == '.' && stnam[1] == 'T') { 1467 roff_setstrn(&r->strtab, 1468 ".T", 2, NULL, 0, 0); 1469 stesc--; 1470 continue; 1471 } 1472 } 1473 break; 1474 case '$': 1475 if (r->mstackpos < 0) { 1476 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, 1477 (int)(stesc - buf->buf), "%.3s", stesc); 1478 break; 1479 } 1480 ctx = r->mstack + r->mstackpos; 1481 npos = esct[1] - '1'; 1482 if (npos >= 0 && npos <= 8) { 1483 res = npos < ctx->argc ? 1484 ctx->argv[npos] : ""; 1485 break; 1486 } 1487 if (esct[1] == '*') 1488 quote_args = 0; 1489 else if (esct[1] == '@') 1490 quote_args = 1; 1491 else { 1492 mandoc_msg(MANDOCERR_ARG_NONUM, ln, 1493 (int)(stesc - buf->buf), "%.3s", stesc); 1494 break; 1495 } 1496 asz = 0; 1497 for (npos = 0; npos < ctx->argc; npos++) { 1498 if (npos) 1499 asz++; /* blank */ 1500 if (quote_args) 1501 asz += 2; /* quotes */ 1502 asz += strlen(ctx->argv[npos]); 1503 } 1504 if (asz != 3) { 1505 rsz = buf->sz - (stesc - buf->buf) - 3; 1506 if (asz < 3) 1507 memmove(stesc + asz, stesc + 3, rsz); 1508 buf->sz += asz - 3; 1509 nbuf = mandoc_realloc(buf->buf, buf->sz); 1510 start = nbuf + pos; 1511 stesc = nbuf + (stesc - buf->buf); 1512 buf->buf = nbuf; 1513 if (asz > 3) 1514 memmove(stesc + asz, stesc + 3, rsz); 1515 } 1516 for (npos = 0; npos < ctx->argc; npos++) { 1517 if (npos) 1518 *stesc++ = ' '; 1519 if (quote_args) 1520 *stesc++ = '"'; 1521 cp = ctx->argv[npos]; 1522 while (*cp != '\0') 1523 *stesc++ = *cp++; 1524 if (quote_args) 1525 *stesc++ = '"'; 1526 } 1527 continue; 1528 case 'B': 1529 npos = 0; 1530 ubuf[0] = arg_complete && 1531 roff_evalnum(r, ln, stnam, &npos, 1532 NULL, ROFFNUM_SCALE) && 1533 stnam + npos + 1 == cp ? '1' : '0'; 1534 ubuf[1] = '\0'; 1535 break; 1536 case 'n': 1537 if (arg_complete) 1538 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1539 roff_getregn(r, stnam, naml, sign)); 1540 else 1541 ubuf[0] = '\0'; 1542 break; 1543 case 'w': 1544 /* use even incomplete args */ 1545 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1546 24 * (int)naml); 1547 break; 1548 } 1549 1550 if (res == NULL) { 1551 if (*esct == '*') 1552 mandoc_msg(MANDOCERR_STR_UNDEF, 1553 ln, (int)(stesc - buf->buf), 1554 "%.*s", (int)naml, stnam); 1555 res = ""; 1556 } else if (buf->sz + strlen(res) > SHRT_MAX) { 1557 mandoc_msg(MANDOCERR_ROFFLOOP, 1558 ln, (int)(stesc - buf->buf), NULL); 1559 return ROFF_IGN; 1560 } 1561 1562 /* Replace the escape sequence by the string. */ 1563 1564 *stesc = '\0'; 1565 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", 1566 buf->buf, res, cp) + 1; 1567 1568 /* Prepare for the next replacement. */ 1569 1570 start = nbuf + pos; 1571 stesc = nbuf + (stesc - buf->buf) + strlen(res); 1572 free(buf->buf); 1573 buf->buf = nbuf; 1574 } 1575 return ROFF_CONT; 1576 } 1577 1578 /* 1579 * Parse a quoted or unquoted roff-style request or macro argument. 1580 * Return a pointer to the parsed argument, which is either the original 1581 * pointer or advanced by one byte in case the argument is quoted. 1582 * NUL-terminate the argument in place. 1583 * Collapse pairs of quotes inside quoted arguments. 1584 * Advance the argument pointer to the next argument, 1585 * or to the NUL byte terminating the argument line. 1586 */ 1587 char * 1588 roff_getarg(struct roff *r, char **cpp, int ln, int *pos) 1589 { 1590 struct buf buf; 1591 char *cp, *start; 1592 int newesc, pairs, quoted, white; 1593 1594 /* Quoting can only start with a new word. */ 1595 start = *cpp; 1596 quoted = 0; 1597 if ('"' == *start) { 1598 quoted = 1; 1599 start++; 1600 } 1601 1602 newesc = pairs = white = 0; 1603 for (cp = start; '\0' != *cp; cp++) { 1604 1605 /* 1606 * Move the following text left 1607 * after quoted quotes and after "\\" and "\t". 1608 */ 1609 if (pairs) 1610 cp[-pairs] = cp[0]; 1611 1612 if ('\\' == cp[0]) { 1613 /* 1614 * In copy mode, translate double to single 1615 * backslashes and backslash-t to literal tabs. 1616 */ 1617 switch (cp[1]) { 1618 case 'a': 1619 case 't': 1620 cp[-pairs] = '\t'; 1621 pairs++; 1622 cp++; 1623 break; 1624 case '\\': 1625 newesc = 1; 1626 cp[-pairs] = ASCII_ESC; 1627 pairs++; 1628 cp++; 1629 break; 1630 case ' ': 1631 /* Skip escaped blanks. */ 1632 if (0 == quoted) 1633 cp++; 1634 break; 1635 default: 1636 break; 1637 } 1638 } else if (0 == quoted) { 1639 if (' ' == cp[0]) { 1640 /* Unescaped blanks end unquoted args. */ 1641 white = 1; 1642 break; 1643 } 1644 } else if ('"' == cp[0]) { 1645 if ('"' == cp[1]) { 1646 /* Quoted quotes collapse. */ 1647 pairs++; 1648 cp++; 1649 } else { 1650 /* Unquoted quotes end quoted args. */ 1651 quoted = 2; 1652 break; 1653 } 1654 } 1655 } 1656 1657 /* Quoted argument without a closing quote. */ 1658 if (1 == quoted) 1659 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL); 1660 1661 /* NUL-terminate this argument and move to the next one. */ 1662 if (pairs) 1663 cp[-pairs] = '\0'; 1664 if ('\0' != *cp) { 1665 *cp++ = '\0'; 1666 while (' ' == *cp) 1667 cp++; 1668 } 1669 *pos += (int)(cp - start) + (quoted ? 1 : 0); 1670 *cpp = cp; 1671 1672 if ('\0' == *cp && (white || ' ' == cp[-1])) 1673 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); 1674 1675 start = mandoc_strdup(start); 1676 if (newesc == 0) 1677 return start; 1678 1679 buf.buf = start; 1680 buf.sz = strlen(start) + 1; 1681 buf.next = NULL; 1682 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { 1683 free(buf.buf); 1684 buf.buf = mandoc_strdup(""); 1685 } 1686 return buf.buf; 1687 } 1688 1689 1690 /* 1691 * Process text streams. 1692 */ 1693 static int 1694 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) 1695 { 1696 size_t sz; 1697 const char *start; 1698 char *p; 1699 int isz; 1700 enum mandoc_esc esc; 1701 1702 /* Spring the input line trap. */ 1703 1704 if (roffit_lines == 1) { 1705 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1706 free(buf->buf); 1707 buf->buf = p; 1708 buf->sz = isz + 1; 1709 *offs = 0; 1710 free(roffit_macro); 1711 roffit_lines = 0; 1712 return ROFF_REPARSE; 1713 } else if (roffit_lines > 1) 1714 --roffit_lines; 1715 1716 if (roffce_node != NULL && buf->buf[pos] != '\0') { 1717 if (roffce_lines < 1) { 1718 r->man->last = roffce_node; 1719 r->man->next = ROFF_NEXT_SIBLING; 1720 roffce_lines = 0; 1721 roffce_node = NULL; 1722 } else 1723 roffce_lines--; 1724 } 1725 1726 /* Convert all breakable hyphens into ASCII_HYPH. */ 1727 1728 start = p = buf->buf + pos; 1729 1730 while (*p != '\0') { 1731 sz = strcspn(p, "-\\"); 1732 p += sz; 1733 1734 if (*p == '\0') 1735 break; 1736 1737 if (*p == '\\') { 1738 /* Skip over escapes. */ 1739 p++; 1740 esc = mandoc_escape((const char **)&p, NULL, NULL); 1741 if (esc == ESCAPE_ERROR) 1742 break; 1743 while (*p == '-') 1744 p++; 1745 continue; 1746 } else if (p == start) { 1747 p++; 1748 continue; 1749 } 1750 1751 if (isalpha((unsigned char)p[-1]) && 1752 isalpha((unsigned char)p[1])) 1753 *p = ASCII_HYPH; 1754 p++; 1755 } 1756 return ROFF_CONT; 1757 } 1758 1759 int 1760 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) 1761 { 1762 enum roff_tok t; 1763 int e; 1764 int pos; /* parse point */ 1765 int spos; /* saved parse point for messages */ 1766 int ppos; /* original offset in buf->buf */ 1767 int ctl; /* macro line (boolean) */ 1768 1769 ppos = pos = *offs; 1770 1771 /* Handle in-line equation delimiters. */ 1772 1773 if (r->tbl == NULL && 1774 r->last_eqn != NULL && r->last_eqn->delim && 1775 (r->eqn == NULL || r->eqn_inline)) { 1776 e = roff_eqndelim(r, buf, pos); 1777 if (e == ROFF_REPARSE) 1778 return e; 1779 assert(e == ROFF_CONT); 1780 } 1781 1782 /* Expand some escape sequences. */ 1783 1784 e = roff_expand(r, buf, ln, pos, r->escape); 1785 if ((e & ROFF_MASK) == ROFF_IGN) 1786 return e; 1787 assert(e == ROFF_CONT); 1788 1789 ctl = roff_getcontrol(r, buf->buf, &pos); 1790 1791 /* 1792 * First, if a scope is open and we're not a macro, pass the 1793 * text through the macro's filter. 1794 * Equations process all content themselves. 1795 * Tables process almost all content themselves, but we want 1796 * to warn about macros before passing it there. 1797 */ 1798 1799 if (r->last != NULL && ! ctl) { 1800 t = r->last->tok; 1801 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1802 if ((e & ROFF_MASK) == ROFF_IGN) 1803 return e; 1804 e &= ~ROFF_MASK; 1805 } else 1806 e = ROFF_IGN; 1807 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { 1808 eqn_read(r->eqn, buf->buf + ppos); 1809 return e; 1810 } 1811 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { 1812 tbl_read(r->tbl, ln, buf->buf, ppos); 1813 roff_addtbl(r->man, ln, r->tbl); 1814 return e; 1815 } 1816 if ( ! ctl) 1817 return roff_parsetext(r, buf, pos, offs) | e; 1818 1819 /* Skip empty request lines. */ 1820 1821 if (buf->buf[pos] == '"') { 1822 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL); 1823 return ROFF_IGN; 1824 } else if (buf->buf[pos] == '\0') 1825 return ROFF_IGN; 1826 1827 /* 1828 * If a scope is open, go to the child handler for that macro, 1829 * as it may want to preprocess before doing anything with it. 1830 * Don't do so if an equation is open. 1831 */ 1832 1833 if (r->last) { 1834 t = r->last->tok; 1835 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1836 } 1837 1838 /* No scope is open. This is a new request or macro. */ 1839 1840 spos = pos; 1841 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1842 1843 /* Tables ignore most macros. */ 1844 1845 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS || 1846 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) { 1847 mandoc_msg(MANDOCERR_TBLMACRO, 1848 ln, pos, "%s", buf->buf + spos); 1849 if (t != TOKEN_NONE) 1850 return ROFF_IGN; 1851 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1852 pos++; 1853 while (buf->buf[pos] == ' ') 1854 pos++; 1855 tbl_read(r->tbl, ln, buf->buf, pos); 1856 roff_addtbl(r->man, ln, r->tbl); 1857 return ROFF_IGN; 1858 } 1859 1860 /* For now, let high level macros abort .ce mode. */ 1861 1862 if (ctl && roffce_node != NULL && 1863 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 1864 t == ROFF_TH || t == ROFF_TS)) { 1865 r->man->last = roffce_node; 1866 r->man->next = ROFF_NEXT_SIBLING; 1867 roffce_lines = 0; 1868 roffce_node = NULL; 1869 } 1870 1871 /* 1872 * This is neither a roff request nor a user-defined macro. 1873 * Let the standard macro set parsers handle it. 1874 */ 1875 1876 if (t == TOKEN_NONE) 1877 return ROFF_CONT; 1878 1879 /* Execute a roff request or a user defined macro. */ 1880 1881 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); 1882 } 1883 1884 /* 1885 * Internal interface function to tell the roff parser that execution 1886 * of the current macro ended. This is required because macro 1887 * definitions usually do not end with a .return request. 1888 */ 1889 void 1890 roff_userret(struct roff *r) 1891 { 1892 struct mctx *ctx; 1893 int i; 1894 1895 assert(r->mstackpos >= 0); 1896 ctx = r->mstack + r->mstackpos; 1897 for (i = 0; i < ctx->argc; i++) 1898 free(ctx->argv[i]); 1899 ctx->argc = 0; 1900 r->mstackpos--; 1901 } 1902 1903 void 1904 roff_endparse(struct roff *r) 1905 { 1906 if (r->last != NULL) 1907 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line, 1908 r->last->col, "%s", roff_name[r->last->tok]); 1909 1910 if (r->eqn != NULL) { 1911 mandoc_msg(MANDOCERR_BLK_NOEND, 1912 r->eqn->node->line, r->eqn->node->pos, "EQ"); 1913 eqn_parse(r->eqn); 1914 r->eqn = NULL; 1915 } 1916 1917 if (r->tbl != NULL) { 1918 tbl_end(r->tbl, 1); 1919 r->tbl = NULL; 1920 } 1921 } 1922 1923 /* 1924 * Parse a roff node's type from the input buffer. This must be in the 1925 * form of ".foo xxx" in the usual way. 1926 */ 1927 static enum roff_tok 1928 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 1929 { 1930 char *cp; 1931 const char *mac; 1932 size_t maclen; 1933 int deftype; 1934 enum roff_tok t; 1935 1936 cp = buf + *pos; 1937 1938 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 1939 return TOKEN_NONE; 1940 1941 mac = cp; 1942 maclen = roff_getname(r, &cp, ln, ppos); 1943 1944 deftype = ROFFDEF_USER | ROFFDEF_REN; 1945 r->current_string = roff_getstrn(r, mac, maclen, &deftype); 1946 switch (deftype) { 1947 case ROFFDEF_USER: 1948 t = ROFF_USERDEF; 1949 break; 1950 case ROFFDEF_REN: 1951 t = ROFF_RENAMED; 1952 break; 1953 default: 1954 t = roffhash_find(r->reqtab, mac, maclen); 1955 break; 1956 } 1957 if (t != TOKEN_NONE) 1958 *pos = cp - buf; 1959 else if (deftype == ROFFDEF_UNDEF) { 1960 /* Using an undefined macro defines it to be empty. */ 1961 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); 1962 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); 1963 } 1964 return t; 1965 } 1966 1967 /* --- handling of request blocks ----------------------------------------- */ 1968 1969 static int 1970 roff_cblock(ROFF_ARGS) 1971 { 1972 1973 /* 1974 * A block-close `..' should only be invoked as a child of an 1975 * ignore macro, otherwise raise a warning and just ignore it. 1976 */ 1977 1978 if (r->last == NULL) { 1979 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 1980 return ROFF_IGN; 1981 } 1982 1983 switch (r->last->tok) { 1984 case ROFF_am: 1985 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */ 1986 case ROFF_ami: 1987 case ROFF_de: 1988 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 1989 case ROFF_dei: 1990 case ROFF_ig: 1991 break; 1992 default: 1993 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 1994 return ROFF_IGN; 1995 } 1996 1997 if (buf->buf[pos] != '\0') 1998 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 1999 ".. %s", buf->buf + pos); 2000 2001 roffnode_pop(r); 2002 roffnode_cleanscope(r); 2003 return ROFF_IGN; 2004 2005 } 2006 2007 /* 2008 * Pop all nodes ending at the end of the current input line. 2009 * Return the number of loops ended. 2010 */ 2011 static int 2012 roffnode_cleanscope(struct roff *r) 2013 { 2014 int inloop; 2015 2016 inloop = 0; 2017 while (r->last != NULL) { 2018 if (--r->last->endspan != 0) 2019 break; 2020 inloop += roffnode_pop(r); 2021 } 2022 return inloop; 2023 } 2024 2025 /* 2026 * Handle the closing \} of a conditional block. 2027 * Apart from generating warnings, this only pops nodes. 2028 * Return the number of loops ended. 2029 */ 2030 static int 2031 roff_ccond(struct roff *r, int ln, int ppos) 2032 { 2033 if (NULL == r->last) { 2034 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2035 return 0; 2036 } 2037 2038 switch (r->last->tok) { 2039 case ROFF_el: 2040 case ROFF_ie: 2041 case ROFF_if: 2042 case ROFF_while: 2043 break; 2044 default: 2045 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2046 return 0; 2047 } 2048 2049 if (r->last->endspan > -1) { 2050 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2051 return 0; 2052 } 2053 2054 return roffnode_pop(r) + roffnode_cleanscope(r); 2055 } 2056 2057 static int 2058 roff_block(ROFF_ARGS) 2059 { 2060 const char *name, *value; 2061 char *call, *cp, *iname, *rname; 2062 size_t csz, namesz, rsz; 2063 int deftype; 2064 2065 /* Ignore groff compatibility mode for now. */ 2066 2067 if (tok == ROFF_de1) 2068 tok = ROFF_de; 2069 else if (tok == ROFF_dei1) 2070 tok = ROFF_dei; 2071 else if (tok == ROFF_am1) 2072 tok = ROFF_am; 2073 else if (tok == ROFF_ami1) 2074 tok = ROFF_ami; 2075 2076 /* Parse the macro name argument. */ 2077 2078 cp = buf->buf + pos; 2079 if (tok == ROFF_ig) { 2080 iname = NULL; 2081 namesz = 0; 2082 } else { 2083 iname = cp; 2084 namesz = roff_getname(r, &cp, ln, ppos); 2085 iname[namesz] = '\0'; 2086 } 2087 2088 /* Resolve the macro name argument if it is indirect. */ 2089 2090 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2091 deftype = ROFFDEF_USER; 2092 name = roff_getstrn(r, iname, namesz, &deftype); 2093 if (name == NULL) { 2094 mandoc_msg(MANDOCERR_STR_UNDEF, 2095 ln, (int)(iname - buf->buf), 2096 "%.*s", (int)namesz, iname); 2097 namesz = 0; 2098 } else 2099 namesz = strlen(name); 2100 } else 2101 name = iname; 2102 2103 if (namesz == 0 && tok != ROFF_ig) { 2104 mandoc_msg(MANDOCERR_REQ_EMPTY, 2105 ln, ppos, "%s", roff_name[tok]); 2106 return ROFF_IGN; 2107 } 2108 2109 roffnode_push(r, tok, name, ln, ppos); 2110 2111 /* 2112 * At the beginning of a `de' macro, clear the existing string 2113 * with the same name, if there is one. New content will be 2114 * appended from roff_block_text() in multiline mode. 2115 */ 2116 2117 if (tok == ROFF_de || tok == ROFF_dei) { 2118 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 2119 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2120 } else if (tok == ROFF_am || tok == ROFF_ami) { 2121 deftype = ROFFDEF_ANY; 2122 value = roff_getstrn(r, iname, namesz, &deftype); 2123 switch (deftype) { /* Before appending, ... */ 2124 case ROFFDEF_PRE: /* copy predefined to user-defined. */ 2125 roff_setstrn(&r->strtab, name, namesz, 2126 value, strlen(value), 0); 2127 break; 2128 case ROFFDEF_REN: /* call original standard macro. */ 2129 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2130 (int)strlen(value), value); 2131 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2132 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2133 free(call); 2134 break; 2135 case ROFFDEF_STD: /* rename and call standard macro. */ 2136 rsz = mandoc_asprintf(&rname, "__%s_renamed", name); 2137 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); 2138 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2139 (int)rsz, rname); 2140 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2141 free(call); 2142 free(rname); 2143 break; 2144 default: 2145 break; 2146 } 2147 } 2148 2149 if (*cp == '\0') 2150 return ROFF_IGN; 2151 2152 /* Get the custom end marker. */ 2153 2154 iname = cp; 2155 namesz = roff_getname(r, &cp, ln, ppos); 2156 2157 /* Resolve the end marker if it is indirect. */ 2158 2159 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2160 deftype = ROFFDEF_USER; 2161 name = roff_getstrn(r, iname, namesz, &deftype); 2162 if (name == NULL) { 2163 mandoc_msg(MANDOCERR_STR_UNDEF, 2164 ln, (int)(iname - buf->buf), 2165 "%.*s", (int)namesz, iname); 2166 namesz = 0; 2167 } else 2168 namesz = strlen(name); 2169 } else 2170 name = iname; 2171 2172 if (namesz) 2173 r->last->end = mandoc_strndup(name, namesz); 2174 2175 if (*cp != '\0') 2176 mandoc_msg(MANDOCERR_ARG_EXCESS, 2177 ln, pos, ".%s ... %s", roff_name[tok], cp); 2178 2179 return ROFF_IGN; 2180 } 2181 2182 static int 2183 roff_block_sub(ROFF_ARGS) 2184 { 2185 enum roff_tok t; 2186 int i, j; 2187 2188 /* 2189 * First check whether a custom macro exists at this level. If 2190 * it does, then check against it. This is some of groff's 2191 * stranger behaviours. If we encountered a custom end-scope 2192 * tag and that tag also happens to be a "real" macro, then we 2193 * need to try interpreting it again as a real macro. If it's 2194 * not, then return ignore. Else continue. 2195 */ 2196 2197 if (r->last->end) { 2198 for (i = pos, j = 0; r->last->end[j]; j++, i++) 2199 if (buf->buf[i] != r->last->end[j]) 2200 break; 2201 2202 if (r->last->end[j] == '\0' && 2203 (buf->buf[i] == '\0' || 2204 buf->buf[i] == ' ' || 2205 buf->buf[i] == '\t')) { 2206 roffnode_pop(r); 2207 roffnode_cleanscope(r); 2208 2209 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 2210 i++; 2211 2212 pos = i; 2213 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 2214 TOKEN_NONE) 2215 return ROFF_RERUN; 2216 return ROFF_IGN; 2217 } 2218 } 2219 2220 /* 2221 * If we have no custom end-query or lookup failed, then try 2222 * pulling it out of the hashtable. 2223 */ 2224 2225 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2226 2227 if (t != ROFF_cblock) { 2228 if (tok != ROFF_ig) 2229 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 2230 return ROFF_IGN; 2231 } 2232 2233 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2234 } 2235 2236 static int 2237 roff_block_text(ROFF_ARGS) 2238 { 2239 2240 if (tok != ROFF_ig) 2241 roff_setstr(r, r->last->name, buf->buf + pos, 2); 2242 2243 return ROFF_IGN; 2244 } 2245 2246 static int 2247 roff_cond_sub(ROFF_ARGS) 2248 { 2249 struct roffnode *bl; 2250 char *ep; 2251 int endloop, irc, rr; 2252 enum roff_tok t; 2253 2254 irc = ROFF_IGN; 2255 rr = r->last->rule; 2256 endloop = tok != ROFF_while ? ROFF_IGN : 2257 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2258 if (roffnode_cleanscope(r)) 2259 irc |= endloop; 2260 2261 /* 2262 * If `\}' occurs on a macro line without a preceding macro, 2263 * drop the line completely. 2264 */ 2265 2266 ep = buf->buf + pos; 2267 if (ep[0] == '\\' && ep[1] == '}') 2268 rr = 0; 2269 2270 /* 2271 * The closing delimiter `\}' rewinds the conditional scope 2272 * but is otherwise ignored when interpreting the line. 2273 */ 2274 2275 while ((ep = strchr(ep, '\\')) != NULL) { 2276 switch (ep[1]) { 2277 case '}': 2278 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2279 if (roff_ccond(r, ln, ep - buf->buf)) 2280 irc |= endloop; 2281 break; 2282 case '\0': 2283 ++ep; 2284 break; 2285 default: 2286 ep += 2; 2287 break; 2288 } 2289 } 2290 2291 /* 2292 * Fully handle known macros when they are structurally 2293 * required or when the conditional evaluated to true. 2294 */ 2295 2296 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2297 if (t == ROFF_break) { 2298 if (irc & ROFF_LOOPMASK) 2299 irc = ROFF_IGN | ROFF_LOOPEXIT; 2300 else if (rr) { 2301 for (bl = r->last; bl != NULL; bl = bl->parent) { 2302 bl->rule = 0; 2303 if (bl->tok == ROFF_while) 2304 break; 2305 } 2306 } 2307 } else if (t != TOKEN_NONE && 2308 (rr || roffs[t].flags & ROFFMAC_STRUCT)) 2309 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2310 else 2311 irc |= rr ? ROFF_CONT : ROFF_IGN; 2312 return irc; 2313 } 2314 2315 static int 2316 roff_cond_text(ROFF_ARGS) 2317 { 2318 char *ep; 2319 int endloop, irc, rr; 2320 2321 irc = ROFF_IGN; 2322 rr = r->last->rule; 2323 endloop = tok != ROFF_while ? ROFF_IGN : 2324 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2325 if (roffnode_cleanscope(r)) 2326 irc |= endloop; 2327 2328 /* 2329 * If `\}' occurs on a text line with neither preceding 2330 * nor following characters, drop the line completely. 2331 */ 2332 2333 ep = buf->buf + pos; 2334 if (strcmp(ep, "\\}") == 0) 2335 rr = 0; 2336 2337 /* 2338 * The closing delimiter `\}' rewinds the conditional scope 2339 * but is otherwise ignored when interpreting the line. 2340 */ 2341 2342 while ((ep = strchr(ep, '\\')) != NULL) { 2343 switch (ep[1]) { 2344 case '}': 2345 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2346 if (roff_ccond(r, ln, ep - buf->buf)) 2347 irc |= endloop; 2348 break; 2349 case '\0': 2350 ++ep; 2351 break; 2352 default: 2353 ep += 2; 2354 break; 2355 } 2356 } 2357 if (rr) 2358 irc |= ROFF_CONT; 2359 return irc; 2360 } 2361 2362 /* --- handling of numeric and conditional expressions -------------------- */ 2363 2364 /* 2365 * Parse a single signed integer number. Stop at the first non-digit. 2366 * If there is at least one digit, return success and advance the 2367 * parse point, else return failure and let the parse point unchanged. 2368 * Ignore overflows, treat them just like the C language. 2369 */ 2370 static int 2371 roff_getnum(const char *v, int *pos, int *res, int flags) 2372 { 2373 int myres, scaled, n, p; 2374 2375 if (NULL == res) 2376 res = &myres; 2377 2378 p = *pos; 2379 n = v[p] == '-'; 2380 if (n || v[p] == '+') 2381 p++; 2382 2383 if (flags & ROFFNUM_WHITE) 2384 while (isspace((unsigned char)v[p])) 2385 p++; 2386 2387 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2388 *res = 10 * *res + v[p] - '0'; 2389 if (p == *pos + n) 2390 return 0; 2391 2392 if (n) 2393 *res = -*res; 2394 2395 /* Each number may be followed by one optional scaling unit. */ 2396 2397 switch (v[p]) { 2398 case 'f': 2399 scaled = *res * 65536; 2400 break; 2401 case 'i': 2402 scaled = *res * 240; 2403 break; 2404 case 'c': 2405 scaled = *res * 240 / 2.54; 2406 break; 2407 case 'v': 2408 case 'P': 2409 scaled = *res * 40; 2410 break; 2411 case 'm': 2412 case 'n': 2413 scaled = *res * 24; 2414 break; 2415 case 'p': 2416 scaled = *res * 10 / 3; 2417 break; 2418 case 'u': 2419 scaled = *res; 2420 break; 2421 case 'M': 2422 scaled = *res * 6 / 25; 2423 break; 2424 default: 2425 scaled = *res; 2426 p--; 2427 break; 2428 } 2429 if (flags & ROFFNUM_SCALE) 2430 *res = scaled; 2431 2432 *pos = p + 1; 2433 return 1; 2434 } 2435 2436 /* 2437 * Evaluate a string comparison condition. 2438 * The first character is the delimiter. 2439 * Succeed if the string up to its second occurrence 2440 * matches the string up to its third occurence. 2441 * Advance the cursor after the third occurrence 2442 * or lacking that, to the end of the line. 2443 */ 2444 static int 2445 roff_evalstrcond(const char *v, int *pos) 2446 { 2447 const char *s1, *s2, *s3; 2448 int match; 2449 2450 match = 0; 2451 s1 = v + *pos; /* initial delimiter */ 2452 s2 = s1 + 1; /* for scanning the first string */ 2453 s3 = strchr(s2, *s1); /* for scanning the second string */ 2454 2455 if (NULL == s3) /* found no middle delimiter */ 2456 goto out; 2457 2458 while ('\0' != *++s3) { 2459 if (*s2 != *s3) { /* mismatch */ 2460 s3 = strchr(s3, *s1); 2461 break; 2462 } 2463 if (*s3 == *s1) { /* found the final delimiter */ 2464 match = 1; 2465 break; 2466 } 2467 s2++; 2468 } 2469 2470 out: 2471 if (NULL == s3) 2472 s3 = strchr(s2, '\0'); 2473 else if (*s3 != '\0') 2474 s3++; 2475 *pos = s3 - v; 2476 return match; 2477 } 2478 2479 /* 2480 * Evaluate an optionally negated single character, numerical, 2481 * or string condition. 2482 */ 2483 static int 2484 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2485 { 2486 const char *start, *end; 2487 char *cp, *name; 2488 size_t sz; 2489 int deftype, len, number, savepos, istrue, wanttrue; 2490 2491 if ('!' == v[*pos]) { 2492 wanttrue = 0; 2493 (*pos)++; 2494 } else 2495 wanttrue = 1; 2496 2497 switch (v[*pos]) { 2498 case '\0': 2499 return 0; 2500 case 'n': 2501 case 'o': 2502 (*pos)++; 2503 return wanttrue; 2504 case 'e': 2505 case 't': 2506 case 'v': 2507 (*pos)++; 2508 return !wanttrue; 2509 case 'c': 2510 do { 2511 (*pos)++; 2512 } while (v[*pos] == ' '); 2513 2514 /* 2515 * Quirk for groff compatibility: 2516 * The horizontal tab is neither available nor unavailable. 2517 */ 2518 2519 if (v[*pos] == '\t') { 2520 (*pos)++; 2521 return 0; 2522 } 2523 2524 /* Printable ASCII characters are available. */ 2525 2526 if (v[*pos] != '\\') { 2527 (*pos)++; 2528 return wanttrue; 2529 } 2530 2531 end = v + ++*pos; 2532 switch (mandoc_escape(&end, &start, &len)) { 2533 case ESCAPE_SPECIAL: 2534 istrue = mchars_spec2cp(start, len) != -1; 2535 break; 2536 case ESCAPE_UNICODE: 2537 istrue = 1; 2538 break; 2539 case ESCAPE_NUMBERED: 2540 istrue = mchars_num2char(start, len) != -1; 2541 break; 2542 default: 2543 istrue = !wanttrue; 2544 break; 2545 } 2546 *pos = end - v; 2547 return istrue == wanttrue; 2548 case 'd': 2549 case 'r': 2550 cp = v + *pos + 1; 2551 while (*cp == ' ') 2552 cp++; 2553 name = cp; 2554 sz = roff_getname(r, &cp, ln, cp - v); 2555 if (sz == 0) 2556 istrue = 0; 2557 else if (v[*pos] == 'r') 2558 istrue = roff_hasregn(r, name, sz); 2559 else { 2560 deftype = ROFFDEF_ANY; 2561 roff_getstrn(r, name, sz, &deftype); 2562 istrue = !!deftype; 2563 } 2564 *pos = (name + sz) - v; 2565 return istrue == wanttrue; 2566 default: 2567 break; 2568 } 2569 2570 savepos = *pos; 2571 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2572 return (number > 0) == wanttrue; 2573 else if (*pos == savepos) 2574 return roff_evalstrcond(v, pos) == wanttrue; 2575 else 2576 return 0; 2577 } 2578 2579 static int 2580 roff_line_ignore(ROFF_ARGS) 2581 { 2582 2583 return ROFF_IGN; 2584 } 2585 2586 static int 2587 roff_insec(ROFF_ARGS) 2588 { 2589 2590 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]); 2591 return ROFF_IGN; 2592 } 2593 2594 static int 2595 roff_unsupp(ROFF_ARGS) 2596 { 2597 2598 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]); 2599 return ROFF_IGN; 2600 } 2601 2602 static int 2603 roff_cond(ROFF_ARGS) 2604 { 2605 int irc; 2606 2607 roffnode_push(r, tok, NULL, ln, ppos); 2608 2609 /* 2610 * An `.el' has no conditional body: it will consume the value 2611 * of the current rstack entry set in prior `ie' calls or 2612 * defaults to DENY. 2613 * 2614 * If we're not an `el', however, then evaluate the conditional. 2615 */ 2616 2617 r->last->rule = tok == ROFF_el ? 2618 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2619 roff_evalcond(r, ln, buf->buf, &pos); 2620 2621 /* 2622 * An if-else will put the NEGATION of the current evaluated 2623 * conditional into the stack of rules. 2624 */ 2625 2626 if (tok == ROFF_ie) { 2627 if (r->rstackpos + 1 == r->rstacksz) { 2628 r->rstacksz += 16; 2629 r->rstack = mandoc_reallocarray(r->rstack, 2630 r->rstacksz, sizeof(int)); 2631 } 2632 r->rstack[++r->rstackpos] = !r->last->rule; 2633 } 2634 2635 /* If the parent has false as its rule, then so do we. */ 2636 2637 if (r->last->parent && !r->last->parent->rule) 2638 r->last->rule = 0; 2639 2640 /* 2641 * Determine scope. 2642 * If there is nothing on the line after the conditional, 2643 * not even whitespace, use next-line scope. 2644 * Except that .while does not support next-line scope. 2645 */ 2646 2647 if (buf->buf[pos] == '\0' && tok != ROFF_while) { 2648 r->last->endspan = 2; 2649 goto out; 2650 } 2651 2652 while (buf->buf[pos] == ' ') 2653 pos++; 2654 2655 /* An opening brace requests multiline scope. */ 2656 2657 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2658 r->last->endspan = -1; 2659 pos += 2; 2660 while (buf->buf[pos] == ' ') 2661 pos++; 2662 goto out; 2663 } 2664 2665 /* 2666 * Anything else following the conditional causes 2667 * single-line scope. Warn if the scope contains 2668 * nothing but trailing whitespace. 2669 */ 2670 2671 if (buf->buf[pos] == '\0') 2672 mandoc_msg(MANDOCERR_COND_EMPTY, 2673 ln, ppos, "%s", roff_name[tok]); 2674 2675 r->last->endspan = 1; 2676 2677 out: 2678 *offs = pos; 2679 irc = ROFF_RERUN; 2680 if (tok == ROFF_while) 2681 irc |= ROFF_WHILE; 2682 return irc; 2683 } 2684 2685 static int 2686 roff_ds(ROFF_ARGS) 2687 { 2688 char *string; 2689 const char *name; 2690 size_t namesz; 2691 2692 /* Ignore groff compatibility mode for now. */ 2693 2694 if (tok == ROFF_ds1) 2695 tok = ROFF_ds; 2696 else if (tok == ROFF_as1) 2697 tok = ROFF_as; 2698 2699 /* 2700 * The first word is the name of the string. 2701 * If it is empty or terminated by an escape sequence, 2702 * abort the `ds' request without defining anything. 2703 */ 2704 2705 name = string = buf->buf + pos; 2706 if (*name == '\0') 2707 return ROFF_IGN; 2708 2709 namesz = roff_getname(r, &string, ln, pos); 2710 switch (name[namesz]) { 2711 case '\\': 2712 return ROFF_IGN; 2713 case '\t': 2714 string = buf->buf + pos + namesz; 2715 break; 2716 default: 2717 break; 2718 } 2719 2720 /* Read past the initial double-quote, if any. */ 2721 if (*string == '"') 2722 string++; 2723 2724 /* The rest is the value. */ 2725 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2726 ROFF_as == tok); 2727 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2728 return ROFF_IGN; 2729 } 2730 2731 /* 2732 * Parse a single operator, one or two characters long. 2733 * If the operator is recognized, return success and advance the 2734 * parse point, else return failure and let the parse point unchanged. 2735 */ 2736 static int 2737 roff_getop(const char *v, int *pos, char *res) 2738 { 2739 2740 *res = v[*pos]; 2741 2742 switch (*res) { 2743 case '+': 2744 case '-': 2745 case '*': 2746 case '/': 2747 case '%': 2748 case '&': 2749 case ':': 2750 break; 2751 case '<': 2752 switch (v[*pos + 1]) { 2753 case '=': 2754 *res = 'l'; 2755 (*pos)++; 2756 break; 2757 case '>': 2758 *res = '!'; 2759 (*pos)++; 2760 break; 2761 case '?': 2762 *res = 'i'; 2763 (*pos)++; 2764 break; 2765 default: 2766 break; 2767 } 2768 break; 2769 case '>': 2770 switch (v[*pos + 1]) { 2771 case '=': 2772 *res = 'g'; 2773 (*pos)++; 2774 break; 2775 case '?': 2776 *res = 'a'; 2777 (*pos)++; 2778 break; 2779 default: 2780 break; 2781 } 2782 break; 2783 case '=': 2784 if ('=' == v[*pos + 1]) 2785 (*pos)++; 2786 break; 2787 default: 2788 return 0; 2789 } 2790 (*pos)++; 2791 2792 return *res; 2793 } 2794 2795 /* 2796 * Evaluate either a parenthesized numeric expression 2797 * or a single signed integer number. 2798 */ 2799 static int 2800 roff_evalpar(struct roff *r, int ln, 2801 const char *v, int *pos, int *res, int flags) 2802 { 2803 2804 if ('(' != v[*pos]) 2805 return roff_getnum(v, pos, res, flags); 2806 2807 (*pos)++; 2808 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2809 return 0; 2810 2811 /* 2812 * Omission of the closing parenthesis 2813 * is an error in validation mode, 2814 * but ignored in evaluation mode. 2815 */ 2816 2817 if (')' == v[*pos]) 2818 (*pos)++; 2819 else if (NULL == res) 2820 return 0; 2821 2822 return 1; 2823 } 2824 2825 /* 2826 * Evaluate a complete numeric expression. 2827 * Proceed left to right, there is no concept of precedence. 2828 */ 2829 static int 2830 roff_evalnum(struct roff *r, int ln, const char *v, 2831 int *pos, int *res, int flags) 2832 { 2833 int mypos, operand2; 2834 char operator; 2835 2836 if (NULL == pos) { 2837 mypos = 0; 2838 pos = &mypos; 2839 } 2840 2841 if (flags & ROFFNUM_WHITE) 2842 while (isspace((unsigned char)v[*pos])) 2843 (*pos)++; 2844 2845 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2846 return 0; 2847 2848 while (1) { 2849 if (flags & ROFFNUM_WHITE) 2850 while (isspace((unsigned char)v[*pos])) 2851 (*pos)++; 2852 2853 if ( ! roff_getop(v, pos, &operator)) 2854 break; 2855 2856 if (flags & ROFFNUM_WHITE) 2857 while (isspace((unsigned char)v[*pos])) 2858 (*pos)++; 2859 2860 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2861 return 0; 2862 2863 if (flags & ROFFNUM_WHITE) 2864 while (isspace((unsigned char)v[*pos])) 2865 (*pos)++; 2866 2867 if (NULL == res) 2868 continue; 2869 2870 switch (operator) { 2871 case '+': 2872 *res += operand2; 2873 break; 2874 case '-': 2875 *res -= operand2; 2876 break; 2877 case '*': 2878 *res *= operand2; 2879 break; 2880 case '/': 2881 if (operand2 == 0) { 2882 mandoc_msg(MANDOCERR_DIVZERO, 2883 ln, *pos, "%s", v); 2884 *res = 0; 2885 break; 2886 } 2887 *res /= operand2; 2888 break; 2889 case '%': 2890 if (operand2 == 0) { 2891 mandoc_msg(MANDOCERR_DIVZERO, 2892 ln, *pos, "%s", v); 2893 *res = 0; 2894 break; 2895 } 2896 *res %= operand2; 2897 break; 2898 case '<': 2899 *res = *res < operand2; 2900 break; 2901 case '>': 2902 *res = *res > operand2; 2903 break; 2904 case 'l': 2905 *res = *res <= operand2; 2906 break; 2907 case 'g': 2908 *res = *res >= operand2; 2909 break; 2910 case '=': 2911 *res = *res == operand2; 2912 break; 2913 case '!': 2914 *res = *res != operand2; 2915 break; 2916 case '&': 2917 *res = *res && operand2; 2918 break; 2919 case ':': 2920 *res = *res || operand2; 2921 break; 2922 case 'i': 2923 if (operand2 < *res) 2924 *res = operand2; 2925 break; 2926 case 'a': 2927 if (operand2 > *res) 2928 *res = operand2; 2929 break; 2930 default: 2931 abort(); 2932 } 2933 } 2934 return 1; 2935 } 2936 2937 /* --- register management ------------------------------------------------ */ 2938 2939 void 2940 roff_setreg(struct roff *r, const char *name, int val, char sign) 2941 { 2942 roff_setregn(r, name, strlen(name), val, sign, INT_MIN); 2943 } 2944 2945 static void 2946 roff_setregn(struct roff *r, const char *name, size_t len, 2947 int val, char sign, int step) 2948 { 2949 struct roffreg *reg; 2950 2951 /* Search for an existing register with the same name. */ 2952 reg = r->regtab; 2953 2954 while (reg != NULL && (reg->key.sz != len || 2955 strncmp(reg->key.p, name, len) != 0)) 2956 reg = reg->next; 2957 2958 if (NULL == reg) { 2959 /* Create a new register. */ 2960 reg = mandoc_malloc(sizeof(struct roffreg)); 2961 reg->key.p = mandoc_strndup(name, len); 2962 reg->key.sz = len; 2963 reg->val = 0; 2964 reg->step = 0; 2965 reg->next = r->regtab; 2966 r->regtab = reg; 2967 } 2968 2969 if ('+' == sign) 2970 reg->val += val; 2971 else if ('-' == sign) 2972 reg->val -= val; 2973 else 2974 reg->val = val; 2975 if (step != INT_MIN) 2976 reg->step = step; 2977 } 2978 2979 /* 2980 * Handle some predefined read-only number registers. 2981 * For now, return -1 if the requested register is not predefined; 2982 * in case a predefined read-only register having the value -1 2983 * were to turn up, another special value would have to be chosen. 2984 */ 2985 static int 2986 roff_getregro(const struct roff *r, const char *name) 2987 { 2988 2989 switch (*name) { 2990 case '$': /* Number of arguments of the last macro evaluated. */ 2991 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; 2992 case 'A': /* ASCII approximation mode is always off. */ 2993 return 0; 2994 case 'g': /* Groff compatibility mode is always on. */ 2995 return 1; 2996 case 'H': /* Fixed horizontal resolution. */ 2997 return 24; 2998 case 'j': /* Always adjust left margin only. */ 2999 return 0; 3000 case 'T': /* Some output device is always defined. */ 3001 return 1; 3002 case 'V': /* Fixed vertical resolution. */ 3003 return 40; 3004 default: 3005 return -1; 3006 } 3007 } 3008 3009 int 3010 roff_getreg(struct roff *r, const char *name) 3011 { 3012 return roff_getregn(r, name, strlen(name), '\0'); 3013 } 3014 3015 static int 3016 roff_getregn(struct roff *r, const char *name, size_t len, char sign) 3017 { 3018 struct roffreg *reg; 3019 int val; 3020 3021 if ('.' == name[0] && 2 == len) { 3022 val = roff_getregro(r, name + 1); 3023 if (-1 != val) 3024 return val; 3025 } 3026 3027 for (reg = r->regtab; reg; reg = reg->next) { 3028 if (len == reg->key.sz && 3029 0 == strncmp(name, reg->key.p, len)) { 3030 switch (sign) { 3031 case '+': 3032 reg->val += reg->step; 3033 break; 3034 case '-': 3035 reg->val -= reg->step; 3036 break; 3037 default: 3038 break; 3039 } 3040 return reg->val; 3041 } 3042 } 3043 3044 roff_setregn(r, name, len, 0, '\0', INT_MIN); 3045 return 0; 3046 } 3047 3048 static int 3049 roff_hasregn(const struct roff *r, const char *name, size_t len) 3050 { 3051 struct roffreg *reg; 3052 int val; 3053 3054 if ('.' == name[0] && 2 == len) { 3055 val = roff_getregro(r, name + 1); 3056 if (-1 != val) 3057 return 1; 3058 } 3059 3060 for (reg = r->regtab; reg; reg = reg->next) 3061 if (len == reg->key.sz && 3062 0 == strncmp(name, reg->key.p, len)) 3063 return 1; 3064 3065 return 0; 3066 } 3067 3068 static void 3069 roff_freereg(struct roffreg *reg) 3070 { 3071 struct roffreg *old_reg; 3072 3073 while (NULL != reg) { 3074 free(reg->key.p); 3075 old_reg = reg; 3076 reg = reg->next; 3077 free(old_reg); 3078 } 3079 } 3080 3081 static int 3082 roff_nr(ROFF_ARGS) 3083 { 3084 char *key, *val, *step; 3085 size_t keysz; 3086 int iv, is, len; 3087 char sign; 3088 3089 key = val = buf->buf + pos; 3090 if (*key == '\0') 3091 return ROFF_IGN; 3092 3093 keysz = roff_getname(r, &val, ln, pos); 3094 if (key[keysz] == '\\' || key[keysz] == '\t') 3095 return ROFF_IGN; 3096 3097 sign = *val; 3098 if (sign == '+' || sign == '-') 3099 val++; 3100 3101 len = 0; 3102 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) 3103 return ROFF_IGN; 3104 3105 step = val + len; 3106 while (isspace((unsigned char)*step)) 3107 step++; 3108 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) 3109 is = INT_MIN; 3110 3111 roff_setregn(r, key, keysz, iv, sign, is); 3112 return ROFF_IGN; 3113 } 3114 3115 static int 3116 roff_rr(ROFF_ARGS) 3117 { 3118 struct roffreg *reg, **prev; 3119 char *name, *cp; 3120 size_t namesz; 3121 3122 name = cp = buf->buf + pos; 3123 if (*name == '\0') 3124 return ROFF_IGN; 3125 namesz = roff_getname(r, &cp, ln, pos); 3126 name[namesz] = '\0'; 3127 3128 prev = &r->regtab; 3129 while (1) { 3130 reg = *prev; 3131 if (reg == NULL || !strcmp(name, reg->key.p)) 3132 break; 3133 prev = ®->next; 3134 } 3135 if (reg != NULL) { 3136 *prev = reg->next; 3137 free(reg->key.p); 3138 free(reg); 3139 } 3140 return ROFF_IGN; 3141 } 3142 3143 /* --- handler functions for roff requests -------------------------------- */ 3144 3145 static int 3146 roff_rm(ROFF_ARGS) 3147 { 3148 const char *name; 3149 char *cp; 3150 size_t namesz; 3151 3152 cp = buf->buf + pos; 3153 while (*cp != '\0') { 3154 name = cp; 3155 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 3156 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 3157 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3158 if (name[namesz] == '\\' || name[namesz] == '\t') 3159 break; 3160 } 3161 return ROFF_IGN; 3162 } 3163 3164 static int 3165 roff_it(ROFF_ARGS) 3166 { 3167 int iv; 3168 3169 /* Parse the number of lines. */ 3170 3171 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 3172 mandoc_msg(MANDOCERR_IT_NONUM, 3173 ln, ppos, "%s", buf->buf + 1); 3174 return ROFF_IGN; 3175 } 3176 3177 while (isspace((unsigned char)buf->buf[pos])) 3178 pos++; 3179 3180 /* 3181 * Arm the input line trap. 3182 * Special-casing "an-trap" is an ugly workaround to cope 3183 * with DocBook stupidly fiddling with man(7) internals. 3184 */ 3185 3186 roffit_lines = iv; 3187 roffit_macro = mandoc_strdup(iv != 1 || 3188 strcmp(buf->buf + pos, "an-trap") ? 3189 buf->buf + pos : "br"); 3190 return ROFF_IGN; 3191 } 3192 3193 static int 3194 roff_Dd(ROFF_ARGS) 3195 { 3196 int mask; 3197 enum roff_tok t, te; 3198 3199 switch (tok) { 3200 case ROFF_Dd: 3201 tok = MDOC_Dd; 3202 te = MDOC_MAX; 3203 if (r->format == 0) 3204 r->format = MPARSE_MDOC; 3205 mask = MPARSE_MDOC | MPARSE_QUICK; 3206 break; 3207 case ROFF_TH: 3208 tok = MAN_TH; 3209 te = MAN_MAX; 3210 if (r->format == 0) 3211 r->format = MPARSE_MAN; 3212 mask = MPARSE_QUICK; 3213 break; 3214 default: 3215 abort(); 3216 } 3217 if ((r->options & mask) == 0) 3218 for (t = tok; t < te; t++) 3219 roff_setstr(r, roff_name[t], NULL, 0); 3220 return ROFF_CONT; 3221 } 3222 3223 static int 3224 roff_TE(ROFF_ARGS) 3225 { 3226 r->man->flags &= ~ROFF_NONOFILL; 3227 if (r->tbl == NULL) { 3228 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE"); 3229 return ROFF_IGN; 3230 } 3231 if (tbl_end(r->tbl, 0) == 0) { 3232 r->tbl = NULL; 3233 free(buf->buf); 3234 buf->buf = mandoc_strdup(".sp"); 3235 buf->sz = 4; 3236 *offs = 0; 3237 return ROFF_REPARSE; 3238 } 3239 r->tbl = NULL; 3240 return ROFF_IGN; 3241 } 3242 3243 static int 3244 roff_T_(ROFF_ARGS) 3245 { 3246 3247 if (NULL == r->tbl) 3248 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&"); 3249 else 3250 tbl_restart(ln, ppos, r->tbl); 3251 3252 return ROFF_IGN; 3253 } 3254 3255 /* 3256 * Handle in-line equation delimiters. 3257 */ 3258 static int 3259 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 3260 { 3261 char *cp1, *cp2; 3262 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 3263 3264 /* 3265 * Outside equations, look for an opening delimiter. 3266 * If we are inside an equation, we already know it is 3267 * in-line, or this function wouldn't have been called; 3268 * so look for a closing delimiter. 3269 */ 3270 3271 cp1 = buf->buf + pos; 3272 cp2 = strchr(cp1, r->eqn == NULL ? 3273 r->last_eqn->odelim : r->last_eqn->cdelim); 3274 if (cp2 == NULL) 3275 return ROFF_CONT; 3276 3277 *cp2++ = '\0'; 3278 bef_pr = bef_nl = aft_nl = aft_pr = ""; 3279 3280 /* Handle preceding text, protecting whitespace. */ 3281 3282 if (*buf->buf != '\0') { 3283 if (r->eqn == NULL) 3284 bef_pr = "\\&"; 3285 bef_nl = "\n"; 3286 } 3287 3288 /* 3289 * Prepare replacing the delimiter with an equation macro 3290 * and drop leading white space from the equation. 3291 */ 3292 3293 if (r->eqn == NULL) { 3294 while (*cp2 == ' ') 3295 cp2++; 3296 mac = ".EQ"; 3297 } else 3298 mac = ".EN"; 3299 3300 /* Handle following text, protecting whitespace. */ 3301 3302 if (*cp2 != '\0') { 3303 aft_nl = "\n"; 3304 if (r->eqn != NULL) 3305 aft_pr = "\\&"; 3306 } 3307 3308 /* Do the actual replacement. */ 3309 3310 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 3311 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 3312 free(buf->buf); 3313 buf->buf = cp1; 3314 3315 /* Toggle the in-line state of the eqn subsystem. */ 3316 3317 r->eqn_inline = r->eqn == NULL; 3318 return ROFF_REPARSE; 3319 } 3320 3321 static int 3322 roff_EQ(ROFF_ARGS) 3323 { 3324 struct roff_node *n; 3325 3326 if (r->man->meta.macroset == MACROSET_MAN) 3327 man_breakscope(r->man, ROFF_EQ); 3328 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); 3329 if (ln > r->man->last->line) 3330 n->flags |= NODE_LINE; 3331 n->eqn = eqn_box_new(); 3332 roff_node_append(r->man, n); 3333 r->man->next = ROFF_NEXT_SIBLING; 3334 3335 assert(r->eqn == NULL); 3336 if (r->last_eqn == NULL) 3337 r->last_eqn = eqn_alloc(); 3338 else 3339 eqn_reset(r->last_eqn); 3340 r->eqn = r->last_eqn; 3341 r->eqn->node = n; 3342 3343 if (buf->buf[pos] != '\0') 3344 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3345 ".EQ %s", buf->buf + pos); 3346 3347 return ROFF_IGN; 3348 } 3349 3350 static int 3351 roff_EN(ROFF_ARGS) 3352 { 3353 if (r->eqn != NULL) { 3354 eqn_parse(r->eqn); 3355 r->eqn = NULL; 3356 } else 3357 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN"); 3358 if (buf->buf[pos] != '\0') 3359 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3360 "EN %s", buf->buf + pos); 3361 return ROFF_IGN; 3362 } 3363 3364 static int 3365 roff_TS(ROFF_ARGS) 3366 { 3367 if (r->tbl != NULL) { 3368 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); 3369 tbl_end(r->tbl, 0); 3370 } 3371 r->man->flags |= ROFF_NONOFILL; 3372 r->tbl = tbl_alloc(ppos, ln, r->last_tbl); 3373 if (r->last_tbl == NULL) 3374 r->first_tbl = r->tbl; 3375 r->last_tbl = r->tbl; 3376 return ROFF_IGN; 3377 } 3378 3379 static int 3380 roff_noarg(ROFF_ARGS) 3381 { 3382 if (r->man->flags & (MAN_BLINE | MAN_ELINE)) 3383 man_breakscope(r->man, tok); 3384 if (tok == ROFF_brp) 3385 tok = ROFF_br; 3386 roff_elem_alloc(r->man, ln, ppos, tok); 3387 if (buf->buf[pos] != '\0') 3388 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3389 "%s %s", roff_name[tok], buf->buf + pos); 3390 if (tok == ROFF_nf) 3391 r->man->flags |= ROFF_NOFILL; 3392 else if (tok == ROFF_fi) 3393 r->man->flags &= ~ROFF_NOFILL; 3394 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3395 r->man->next = ROFF_NEXT_SIBLING; 3396 return ROFF_IGN; 3397 } 3398 3399 static int 3400 roff_onearg(ROFF_ARGS) 3401 { 3402 struct roff_node *n; 3403 char *cp; 3404 int npos; 3405 3406 if (r->man->flags & (MAN_BLINE | MAN_ELINE) && 3407 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || 3408 tok == ROFF_ti)) 3409 man_breakscope(r->man, tok); 3410 3411 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { 3412 r->man->last = roffce_node; 3413 r->man->next = ROFF_NEXT_SIBLING; 3414 } 3415 3416 roff_elem_alloc(r->man, ln, ppos, tok); 3417 n = r->man->last; 3418 3419 cp = buf->buf + pos; 3420 if (*cp != '\0') { 3421 while (*cp != '\0' && *cp != ' ') 3422 cp++; 3423 while (*cp == ' ') 3424 *cp++ = '\0'; 3425 if (*cp != '\0') 3426 mandoc_msg(MANDOCERR_ARG_EXCESS, 3427 ln, (int)(cp - buf->buf), 3428 "%s ... %s", roff_name[tok], cp); 3429 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3430 } 3431 3432 if (tok == ROFF_ce || tok == ROFF_rj) { 3433 if (r->man->last->type == ROFFT_ELEM) { 3434 roff_word_alloc(r->man, ln, pos, "1"); 3435 r->man->last->flags |= NODE_NOSRC; 3436 } 3437 npos = 0; 3438 if (roff_evalnum(r, ln, r->man->last->string, &npos, 3439 &roffce_lines, 0) == 0) { 3440 mandoc_msg(MANDOCERR_CE_NONUM, 3441 ln, pos, "ce %s", buf->buf + pos); 3442 roffce_lines = 1; 3443 } 3444 if (roffce_lines < 1) { 3445 r->man->last = r->man->last->parent; 3446 roffce_node = NULL; 3447 roffce_lines = 0; 3448 } else 3449 roffce_node = r->man->last->parent; 3450 } else { 3451 n->flags |= NODE_VALID | NODE_ENDED; 3452 r->man->last = n; 3453 } 3454 n->flags |= NODE_LINE; 3455 r->man->next = ROFF_NEXT_SIBLING; 3456 return ROFF_IGN; 3457 } 3458 3459 static int 3460 roff_manyarg(ROFF_ARGS) 3461 { 3462 struct roff_node *n; 3463 char *sp, *ep; 3464 3465 roff_elem_alloc(r->man, ln, ppos, tok); 3466 n = r->man->last; 3467 3468 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { 3469 while (*ep != '\0' && *ep != ' ') 3470 ep++; 3471 while (*ep == ' ') 3472 *ep++ = '\0'; 3473 roff_word_alloc(r->man, ln, sp - buf->buf, sp); 3474 } 3475 3476 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3477 r->man->last = n; 3478 r->man->next = ROFF_NEXT_SIBLING; 3479 return ROFF_IGN; 3480 } 3481 3482 static int 3483 roff_als(ROFF_ARGS) 3484 { 3485 char *oldn, *newn, *end, *value; 3486 size_t oldsz, newsz, valsz; 3487 3488 newn = oldn = buf->buf + pos; 3489 if (*newn == '\0') 3490 return ROFF_IGN; 3491 3492 newsz = roff_getname(r, &oldn, ln, pos); 3493 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0') 3494 return ROFF_IGN; 3495 3496 end = oldn; 3497 oldsz = roff_getname(r, &end, ln, oldn - buf->buf); 3498 if (oldsz == 0) 3499 return ROFF_IGN; 3500 3501 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", 3502 (int)oldsz, oldn); 3503 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); 3504 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3505 free(value); 3506 return ROFF_IGN; 3507 } 3508 3509 /* 3510 * The .break request only makes sense inside conditionals, 3511 * and that case is already handled in roff_cond_sub(). 3512 */ 3513 static int 3514 roff_break(ROFF_ARGS) 3515 { 3516 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break"); 3517 return ROFF_IGN; 3518 } 3519 3520 static int 3521 roff_cc(ROFF_ARGS) 3522 { 3523 const char *p; 3524 3525 p = buf->buf + pos; 3526 3527 if (*p == '\0' || (r->control = *p++) == '.') 3528 r->control = '\0'; 3529 3530 if (*p != '\0') 3531 mandoc_msg(MANDOCERR_ARG_EXCESS, 3532 ln, p - buf->buf, "cc ... %s", p); 3533 3534 return ROFF_IGN; 3535 } 3536 3537 static int 3538 roff_char(ROFF_ARGS) 3539 { 3540 const char *p, *kp, *vp; 3541 size_t ksz, vsz; 3542 int font; 3543 3544 /* Parse the character to be replaced. */ 3545 3546 kp = buf->buf + pos; 3547 p = kp + 1; 3548 if (*kp == '\0' || (*kp == '\\' && 3549 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) || 3550 (*p != ' ' && *p != '\0')) { 3551 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp); 3552 return ROFF_IGN; 3553 } 3554 ksz = p - kp; 3555 while (*p == ' ') 3556 p++; 3557 3558 /* 3559 * If the replacement string contains a font escape sequence, 3560 * we have to restore the font at the end. 3561 */ 3562 3563 vp = p; 3564 vsz = strlen(p); 3565 font = 0; 3566 while (*p != '\0') { 3567 if (*p++ != '\\') 3568 continue; 3569 switch (mandoc_escape(&p, NULL, NULL)) { 3570 case ESCAPE_FONT: 3571 case ESCAPE_FONTROMAN: 3572 case ESCAPE_FONTITALIC: 3573 case ESCAPE_FONTBOLD: 3574 case ESCAPE_FONTBI: 3575 case ESCAPE_FONTCW: 3576 case ESCAPE_FONTPREV: 3577 font++; 3578 break; 3579 default: 3580 break; 3581 } 3582 } 3583 if (font > 1) 3584 mandoc_msg(MANDOCERR_CHAR_FONT, 3585 ln, (int)(vp - buf->buf), "%s", vp); 3586 3587 /* 3588 * Approximate the effect of .char using the .tr tables. 3589 * XXX In groff, .char and .tr interact differently. 3590 */ 3591 3592 if (ksz == 1) { 3593 if (r->xtab == NULL) 3594 r->xtab = mandoc_calloc(128, sizeof(*r->xtab)); 3595 assert((unsigned int)*kp < 128); 3596 free(r->xtab[(int)*kp].p); 3597 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p, 3598 "%s%s", vp, font ? "\fP" : ""); 3599 } else { 3600 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0); 3601 if (font) 3602 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1); 3603 } 3604 return ROFF_IGN; 3605 } 3606 3607 static int 3608 roff_ec(ROFF_ARGS) 3609 { 3610 const char *p; 3611 3612 p = buf->buf + pos; 3613 if (*p == '\0') 3614 r->escape = '\\'; 3615 else { 3616 r->escape = *p; 3617 if (*++p != '\0') 3618 mandoc_msg(MANDOCERR_ARG_EXCESS, ln, 3619 (int)(p - buf->buf), "ec ... %s", p); 3620 } 3621 return ROFF_IGN; 3622 } 3623 3624 static int 3625 roff_eo(ROFF_ARGS) 3626 { 3627 r->escape = '\0'; 3628 if (buf->buf[pos] != '\0') 3629 mandoc_msg(MANDOCERR_ARG_SKIP, 3630 ln, pos, "eo %s", buf->buf + pos); 3631 return ROFF_IGN; 3632 } 3633 3634 static int 3635 roff_nop(ROFF_ARGS) 3636 { 3637 while (buf->buf[pos] == ' ') 3638 pos++; 3639 *offs = pos; 3640 return ROFF_RERUN; 3641 } 3642 3643 static int 3644 roff_tr(ROFF_ARGS) 3645 { 3646 const char *p, *first, *second; 3647 size_t fsz, ssz; 3648 enum mandoc_esc esc; 3649 3650 p = buf->buf + pos; 3651 3652 if (*p == '\0') { 3653 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr"); 3654 return ROFF_IGN; 3655 } 3656 3657 while (*p != '\0') { 3658 fsz = ssz = 1; 3659 3660 first = p++; 3661 if (*first == '\\') { 3662 esc = mandoc_escape(&p, NULL, NULL); 3663 if (esc == ESCAPE_ERROR) { 3664 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3665 (int)(p - buf->buf), "%s", first); 3666 return ROFF_IGN; 3667 } 3668 fsz = (size_t)(p - first); 3669 } 3670 3671 second = p++; 3672 if (*second == '\\') { 3673 esc = mandoc_escape(&p, NULL, NULL); 3674 if (esc == ESCAPE_ERROR) { 3675 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3676 (int)(p - buf->buf), "%s", second); 3677 return ROFF_IGN; 3678 } 3679 ssz = (size_t)(p - second); 3680 } else if (*second == '\0') { 3681 mandoc_msg(MANDOCERR_TR_ODD, ln, 3682 (int)(first - buf->buf), "tr %s", first); 3683 second = " "; 3684 p--; 3685 } 3686 3687 if (fsz > 1) { 3688 roff_setstrn(&r->xmbtab, first, fsz, 3689 second, ssz, 0); 3690 continue; 3691 } 3692 3693 if (r->xtab == NULL) 3694 r->xtab = mandoc_calloc(128, 3695 sizeof(struct roffstr)); 3696 3697 free(r->xtab[(int)*first].p); 3698 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 3699 r->xtab[(int)*first].sz = ssz; 3700 } 3701 3702 return ROFF_IGN; 3703 } 3704 3705 /* 3706 * Implementation of the .return request. 3707 * There is no need to call roff_userret() from here. 3708 * The read module will call that after rewinding the reader stack 3709 * to the place from where the current macro was called. 3710 */ 3711 static int 3712 roff_return(ROFF_ARGS) 3713 { 3714 if (r->mstackpos >= 0) 3715 return ROFF_IGN | ROFF_USERRET; 3716 3717 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return"); 3718 return ROFF_IGN; 3719 } 3720 3721 static int 3722 roff_rn(ROFF_ARGS) 3723 { 3724 const char *value; 3725 char *oldn, *newn, *end; 3726 size_t oldsz, newsz; 3727 int deftype; 3728 3729 oldn = newn = buf->buf + pos; 3730 if (*oldn == '\0') 3731 return ROFF_IGN; 3732 3733 oldsz = roff_getname(r, &newn, ln, pos); 3734 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0') 3735 return ROFF_IGN; 3736 3737 end = newn; 3738 newsz = roff_getname(r, &end, ln, newn - buf->buf); 3739 if (newsz == 0) 3740 return ROFF_IGN; 3741 3742 deftype = ROFFDEF_ANY; 3743 value = roff_getstrn(r, oldn, oldsz, &deftype); 3744 switch (deftype) { 3745 case ROFFDEF_USER: 3746 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3747 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); 3748 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3749 break; 3750 case ROFFDEF_PRE: 3751 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3752 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3753 break; 3754 case ROFFDEF_REN: 3755 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); 3756 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); 3757 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3758 break; 3759 case ROFFDEF_STD: 3760 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); 3761 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3762 break; 3763 default: 3764 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3765 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3766 break; 3767 } 3768 return ROFF_IGN; 3769 } 3770 3771 static int 3772 roff_shift(ROFF_ARGS) 3773 { 3774 struct mctx *ctx; 3775 int levels, i; 3776 3777 levels = 1; 3778 if (buf->buf[pos] != '\0' && 3779 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { 3780 mandoc_msg(MANDOCERR_CE_NONUM, 3781 ln, pos, "shift %s", buf->buf + pos); 3782 levels = 1; 3783 } 3784 if (r->mstackpos < 0) { 3785 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift"); 3786 return ROFF_IGN; 3787 } 3788 ctx = r->mstack + r->mstackpos; 3789 if (levels > ctx->argc) { 3790 mandoc_msg(MANDOCERR_SHIFT, 3791 ln, pos, "%d, but max is %d", levels, ctx->argc); 3792 levels = ctx->argc; 3793 } 3794 if (levels == 0) 3795 return ROFF_IGN; 3796 for (i = 0; i < levels; i++) 3797 free(ctx->argv[i]); 3798 ctx->argc -= levels; 3799 for (i = 0; i < ctx->argc; i++) 3800 ctx->argv[i] = ctx->argv[i + levels]; 3801 return ROFF_IGN; 3802 } 3803 3804 static int 3805 roff_so(ROFF_ARGS) 3806 { 3807 char *name, *cp; 3808 3809 name = buf->buf + pos; 3810 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name); 3811 3812 /* 3813 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3814 * opening anything that's not in our cwd or anything beneath 3815 * it. Thus, explicitly disallow traversing up the file-system 3816 * or using absolute paths. 3817 */ 3818 3819 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3820 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name); 3821 buf->sz = mandoc_asprintf(&cp, 3822 ".sp\nSee the file %s.\n.sp", name) + 1; 3823 free(buf->buf); 3824 buf->buf = cp; 3825 *offs = 0; 3826 return ROFF_REPARSE; 3827 } 3828 3829 *offs = pos; 3830 return ROFF_SO; 3831 } 3832 3833 /* --- user defined strings and macros ------------------------------------ */ 3834 3835 static int 3836 roff_userdef(ROFF_ARGS) 3837 { 3838 struct mctx *ctx; 3839 char *arg, *ap, *dst, *src; 3840 size_t sz; 3841 3842 /* If the macro is empty, ignore it altogether. */ 3843 3844 if (*r->current_string == '\0') 3845 return ROFF_IGN; 3846 3847 /* Initialize a new macro stack context. */ 3848 3849 if (++r->mstackpos == r->mstacksz) { 3850 r->mstack = mandoc_recallocarray(r->mstack, 3851 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); 3852 r->mstacksz += 8; 3853 } 3854 ctx = r->mstack + r->mstackpos; 3855 ctx->argsz = 0; 3856 ctx->argc = 0; 3857 ctx->argv = NULL; 3858 3859 /* 3860 * Collect pointers to macro argument strings, 3861 * NUL-terminating them and escaping quotes. 3862 */ 3863 3864 src = buf->buf + pos; 3865 while (*src != '\0') { 3866 if (ctx->argc == ctx->argsz) { 3867 ctx->argsz += 8; 3868 ctx->argv = mandoc_reallocarray(ctx->argv, 3869 ctx->argsz, sizeof(*ctx->argv)); 3870 } 3871 arg = roff_getarg(r, &src, ln, &pos); 3872 sz = 1; /* For the terminating NUL. */ 3873 for (ap = arg; *ap != '\0'; ap++) 3874 sz += *ap == '"' ? 4 : 1; 3875 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); 3876 for (ap = arg; *ap != '\0'; ap++) { 3877 if (*ap == '"') { 3878 memcpy(dst, "\\(dq", 4); 3879 dst += 4; 3880 } else 3881 *dst++ = *ap; 3882 } 3883 *dst = '\0'; 3884 free(arg); 3885 } 3886 3887 /* Replace the macro invocation by the macro definition. */ 3888 3889 free(buf->buf); 3890 buf->buf = mandoc_strdup(r->current_string); 3891 buf->sz = strlen(buf->buf) + 1; 3892 *offs = 0; 3893 3894 return buf->buf[buf->sz - 2] == '\n' ? 3895 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND; 3896 } 3897 3898 /* 3899 * Calling a high-level macro that was renamed with .rn. 3900 * r->current_string has already been set up by roff_parse(). 3901 */ 3902 static int 3903 roff_renamed(ROFF_ARGS) 3904 { 3905 char *nbuf; 3906 3907 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, 3908 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; 3909 free(buf->buf); 3910 buf->buf = nbuf; 3911 *offs = 0; 3912 return ROFF_CONT; 3913 } 3914 3915 /* 3916 * Measure the length in bytes of the roff identifier at *cpp 3917 * and advance the pointer to the next word. 3918 */ 3919 static size_t 3920 roff_getname(struct roff *r, char **cpp, int ln, int pos) 3921 { 3922 char *name, *cp; 3923 size_t namesz; 3924 3925 name = *cpp; 3926 if (*name == '\0') 3927 return 0; 3928 3929 /* Advance cp to the byte after the end of the name. */ 3930 3931 for (cp = name; 1; cp++) { 3932 namesz = cp - name; 3933 if (*cp == '\0') 3934 break; 3935 if (*cp == ' ' || *cp == '\t') { 3936 cp++; 3937 break; 3938 } 3939 if (*cp != '\\') 3940 continue; 3941 if (cp[1] == '{' || cp[1] == '}') 3942 break; 3943 if (*++cp == '\\') 3944 continue; 3945 mandoc_msg(MANDOCERR_NAMESC, ln, pos, 3946 "%.*s", (int)(cp - name + 1), name); 3947 mandoc_escape((const char **)&cp, NULL, NULL); 3948 break; 3949 } 3950 3951 /* Read past spaces. */ 3952 3953 while (*cp == ' ') 3954 cp++; 3955 3956 *cpp = cp; 3957 return namesz; 3958 } 3959 3960 /* 3961 * Store *string into the user-defined string called *name. 3962 * To clear an existing entry, call with (*r, *name, NULL, 0). 3963 * append == 0: replace mode 3964 * append == 1: single-line append mode 3965 * append == 2: multiline append mode, append '\n' after each call 3966 */ 3967 static void 3968 roff_setstr(struct roff *r, const char *name, const char *string, 3969 int append) 3970 { 3971 size_t namesz; 3972 3973 namesz = strlen(name); 3974 roff_setstrn(&r->strtab, name, namesz, string, 3975 string ? strlen(string) : 0, append); 3976 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3977 } 3978 3979 static void 3980 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 3981 const char *string, size_t stringsz, int append) 3982 { 3983 struct roffkv *n; 3984 char *c; 3985 int i; 3986 size_t oldch, newch; 3987 3988 /* Search for an existing string with the same name. */ 3989 n = *r; 3990 3991 while (n && (namesz != n->key.sz || 3992 strncmp(n->key.p, name, namesz))) 3993 n = n->next; 3994 3995 if (NULL == n) { 3996 /* Create a new string table entry. */ 3997 n = mandoc_malloc(sizeof(struct roffkv)); 3998 n->key.p = mandoc_strndup(name, namesz); 3999 n->key.sz = namesz; 4000 n->val.p = NULL; 4001 n->val.sz = 0; 4002 n->next = *r; 4003 *r = n; 4004 } else if (0 == append) { 4005 free(n->val.p); 4006 n->val.p = NULL; 4007 n->val.sz = 0; 4008 } 4009 4010 if (NULL == string) 4011 return; 4012 4013 /* 4014 * One additional byte for the '\n' in multiline mode, 4015 * and one for the terminating '\0'. 4016 */ 4017 newch = stringsz + (1 < append ? 2u : 1u); 4018 4019 if (NULL == n->val.p) { 4020 n->val.p = mandoc_malloc(newch); 4021 *n->val.p = '\0'; 4022 oldch = 0; 4023 } else { 4024 oldch = n->val.sz; 4025 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 4026 } 4027 4028 /* Skip existing content in the destination buffer. */ 4029 c = n->val.p + (int)oldch; 4030 4031 /* Append new content to the destination buffer. */ 4032 i = 0; 4033 while (i < (int)stringsz) { 4034 /* 4035 * Rudimentary roff copy mode: 4036 * Handle escaped backslashes. 4037 */ 4038 if ('\\' == string[i] && '\\' == string[i + 1]) 4039 i++; 4040 *c++ = string[i++]; 4041 } 4042 4043 /* Append terminating bytes. */ 4044 if (1 < append) 4045 *c++ = '\n'; 4046 4047 *c = '\0'; 4048 n->val.sz = (int)(c - n->val.p); 4049 } 4050 4051 static const char * 4052 roff_getstrn(struct roff *r, const char *name, size_t len, 4053 int *deftype) 4054 { 4055 const struct roffkv *n; 4056 int found, i; 4057 enum roff_tok tok; 4058 4059 found = 0; 4060 for (n = r->strtab; n != NULL; n = n->next) { 4061 if (strncmp(name, n->key.p, len) != 0 || 4062 n->key.p[len] != '\0' || n->val.p == NULL) 4063 continue; 4064 if (*deftype & ROFFDEF_USER) { 4065 *deftype = ROFFDEF_USER; 4066 return n->val.p; 4067 } else { 4068 found = 1; 4069 break; 4070 } 4071 } 4072 for (n = r->rentab; n != NULL; n = n->next) { 4073 if (strncmp(name, n->key.p, len) != 0 || 4074 n->key.p[len] != '\0' || n->val.p == NULL) 4075 continue; 4076 if (*deftype & ROFFDEF_REN) { 4077 *deftype = ROFFDEF_REN; 4078 return n->val.p; 4079 } else { 4080 found = 1; 4081 break; 4082 } 4083 } 4084 for (i = 0; i < PREDEFS_MAX; i++) { 4085 if (strncmp(name, predefs[i].name, len) != 0 || 4086 predefs[i].name[len] != '\0') 4087 continue; 4088 if (*deftype & ROFFDEF_PRE) { 4089 *deftype = ROFFDEF_PRE; 4090 return predefs[i].str; 4091 } else { 4092 found = 1; 4093 break; 4094 } 4095 } 4096 if (r->man->meta.macroset != MACROSET_MAN) { 4097 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { 4098 if (strncmp(name, roff_name[tok], len) != 0 || 4099 roff_name[tok][len] != '\0') 4100 continue; 4101 if (*deftype & ROFFDEF_STD) { 4102 *deftype = ROFFDEF_STD; 4103 return NULL; 4104 } else { 4105 found = 1; 4106 break; 4107 } 4108 } 4109 } 4110 if (r->man->meta.macroset != MACROSET_MDOC) { 4111 for (tok = MAN_TH; tok < MAN_MAX; tok++) { 4112 if (strncmp(name, roff_name[tok], len) != 0 || 4113 roff_name[tok][len] != '\0') 4114 continue; 4115 if (*deftype & ROFFDEF_STD) { 4116 *deftype = ROFFDEF_STD; 4117 return NULL; 4118 } else { 4119 found = 1; 4120 break; 4121 } 4122 } 4123 } 4124 4125 if (found == 0 && *deftype != ROFFDEF_ANY) { 4126 if (*deftype & ROFFDEF_REN) { 4127 /* 4128 * This might still be a request, 4129 * so do not treat it as undefined yet. 4130 */ 4131 *deftype = ROFFDEF_UNDEF; 4132 return NULL; 4133 } 4134 4135 /* Using an undefined string defines it to be empty. */ 4136 4137 roff_setstrn(&r->strtab, name, len, "", 0, 0); 4138 roff_setstrn(&r->rentab, name, len, NULL, 0, 0); 4139 } 4140 4141 *deftype = 0; 4142 return NULL; 4143 } 4144 4145 static void 4146 roff_freestr(struct roffkv *r) 4147 { 4148 struct roffkv *n, *nn; 4149 4150 for (n = r; n; n = nn) { 4151 free(n->key.p); 4152 free(n->val.p); 4153 nn = n->next; 4154 free(n); 4155 } 4156 } 4157 4158 /* --- accessors and utility functions ------------------------------------ */ 4159 4160 /* 4161 * Duplicate an input string, making the appropriate character 4162 * conversations (as stipulated by `tr') along the way. 4163 * Returns a heap-allocated string with all the replacements made. 4164 */ 4165 char * 4166 roff_strdup(const struct roff *r, const char *p) 4167 { 4168 const struct roffkv *cp; 4169 char *res; 4170 const char *pp; 4171 size_t ssz, sz; 4172 enum mandoc_esc esc; 4173 4174 if (NULL == r->xmbtab && NULL == r->xtab) 4175 return mandoc_strdup(p); 4176 else if ('\0' == *p) 4177 return mandoc_strdup(""); 4178 4179 /* 4180 * Step through each character looking for term matches 4181 * (remember that a `tr' can be invoked with an escape, which is 4182 * a glyph but the escape is multi-character). 4183 * We only do this if the character hash has been initialised 4184 * and the string is >0 length. 4185 */ 4186 4187 res = NULL; 4188 ssz = 0; 4189 4190 while ('\0' != *p) { 4191 assert((unsigned int)*p < 128); 4192 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { 4193 sz = r->xtab[(int)*p].sz; 4194 res = mandoc_realloc(res, ssz + sz + 1); 4195 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 4196 ssz += sz; 4197 p++; 4198 continue; 4199 } else if ('\\' != *p) { 4200 res = mandoc_realloc(res, ssz + 2); 4201 res[ssz++] = *p++; 4202 continue; 4203 } 4204 4205 /* Search for term matches. */ 4206 for (cp = r->xmbtab; cp; cp = cp->next) 4207 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 4208 break; 4209 4210 if (NULL != cp) { 4211 /* 4212 * A match has been found. 4213 * Append the match to the array and move 4214 * forward by its keysize. 4215 */ 4216 res = mandoc_realloc(res, 4217 ssz + cp->val.sz + 1); 4218 memcpy(res + ssz, cp->val.p, cp->val.sz); 4219 ssz += cp->val.sz; 4220 p += (int)cp->key.sz; 4221 continue; 4222 } 4223 4224 /* 4225 * Handle escapes carefully: we need to copy 4226 * over just the escape itself, or else we might 4227 * do replacements within the escape itself. 4228 * Make sure to pass along the bogus string. 4229 */ 4230 pp = p++; 4231 esc = mandoc_escape(&p, NULL, NULL); 4232 if (ESCAPE_ERROR == esc) { 4233 sz = strlen(pp); 4234 res = mandoc_realloc(res, ssz + sz + 1); 4235 memcpy(res + ssz, pp, sz); 4236 break; 4237 } 4238 /* 4239 * We bail out on bad escapes. 4240 * No need to warn: we already did so when 4241 * roff_expand() was called. 4242 */ 4243 sz = (int)(p - pp); 4244 res = mandoc_realloc(res, ssz + sz + 1); 4245 memcpy(res + ssz, pp, sz); 4246 ssz += sz; 4247 } 4248 4249 res[(int)ssz] = '\0'; 4250 return res; 4251 } 4252 4253 int 4254 roff_getformat(const struct roff *r) 4255 { 4256 4257 return r->format; 4258 } 4259 4260 /* 4261 * Find out whether a line is a macro line or not. 4262 * If it is, adjust the current position and return one; if it isn't, 4263 * return zero and don't change the current position. 4264 * If the control character has been set with `.cc', then let that grain 4265 * precedence. 4266 * This is slighly contrary to groff, where using the non-breaking 4267 * control character when `cc' has been invoked will cause the 4268 * non-breaking macro contents to be printed verbatim. 4269 */ 4270 int 4271 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 4272 { 4273 int pos; 4274 4275 pos = *ppos; 4276 4277 if (r->control != '\0' && cp[pos] == r->control) 4278 pos++; 4279 else if (r->control != '\0') 4280 return 0; 4281 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 4282 pos += 2; 4283 else if ('.' == cp[pos] || '\'' == cp[pos]) 4284 pos++; 4285 else 4286 return 0; 4287 4288 while (' ' == cp[pos] || '\t' == cp[pos]) 4289 pos++; 4290 4291 *ppos = pos; 4292 return 1; 4293 } 4294