1 /* $OpenBSD: roff.c,v 1.268 2022/12/26 19:16:02 jmc Exp $ */ 2 /* 3 * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Implementation of the roff(7) parser for mandoc(1). 19 */ 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <limits.h> 25 #include <stddef.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 31 #include "mandoc_aux.h" 32 #include "mandoc_ohash.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mandoc_parse.h" 36 #include "libmandoc.h" 37 #include "roff_int.h" 38 #include "tbl_parse.h" 39 #include "eqn_parse.h" 40 41 /* Maximum number of string expansions per line, to break infinite loops. */ 42 #define EXPAND_LIMIT 1000 43 44 /* Types of definitions of macros and strings. */ 45 #define ROFFDEF_USER (1 << 1) /* User-defined. */ 46 #define ROFFDEF_PRE (1 << 2) /* Predefined. */ 47 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ 48 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ 49 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ 50 ROFFDEF_REN | ROFFDEF_STD) 51 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ 52 53 /* --- data types --------------------------------------------------------- */ 54 55 /* 56 * An incredibly-simple string buffer. 57 */ 58 struct roffstr { 59 char *p; /* nil-terminated buffer */ 60 size_t sz; /* saved strlen(p) */ 61 }; 62 63 /* 64 * A key-value roffstr pair as part of a singly-linked list. 65 */ 66 struct roffkv { 67 struct roffstr key; 68 struct roffstr val; 69 struct roffkv *next; /* next in list */ 70 }; 71 72 /* 73 * A single number register as part of a singly-linked list. 74 */ 75 struct roffreg { 76 struct roffstr key; 77 int val; 78 int step; 79 struct roffreg *next; 80 }; 81 82 /* 83 * Association of request and macro names with token IDs. 84 */ 85 struct roffreq { 86 enum roff_tok tok; 87 char name[]; 88 }; 89 90 /* 91 * A macro processing context. 92 * More than one is needed when macro calls are nested. 93 */ 94 struct mctx { 95 char **argv; 96 int argc; 97 int argsz; 98 }; 99 100 struct roff { 101 struct roff_man *man; /* mdoc or man parser */ 102 struct roffnode *last; /* leaf of stack */ 103 struct mctx *mstack; /* stack of macro contexts */ 104 int *rstack; /* stack of inverted `ie' values */ 105 struct ohash *reqtab; /* request lookup table */ 106 struct roffreg *regtab; /* number registers */ 107 struct roffkv *strtab; /* user-defined strings & macros */ 108 struct roffkv *rentab; /* renamed strings & macros */ 109 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 110 struct roffstr *xtab; /* single-byte trans table (`tr') */ 111 const char *current_string; /* value of last called user macro */ 112 struct tbl_node *first_tbl; /* first table parsed */ 113 struct tbl_node *last_tbl; /* last table parsed */ 114 struct tbl_node *tbl; /* current table being parsed */ 115 struct eqn_node *last_eqn; /* equation parser */ 116 struct eqn_node *eqn; /* active equation parser */ 117 int eqn_inline; /* current equation is inline */ 118 int options; /* parse options */ 119 int mstacksz; /* current size of mstack */ 120 int mstackpos; /* position in mstack */ 121 int rstacksz; /* current size limit of rstack */ 122 int rstackpos; /* position in rstack */ 123 int format; /* current file in mdoc or man format */ 124 char control; /* control character */ 125 char escape; /* escape character */ 126 }; 127 128 /* 129 * A macro definition, condition, or ignored block. 130 */ 131 struct roffnode { 132 enum roff_tok tok; /* type of node */ 133 struct roffnode *parent; /* up one in stack */ 134 int line; /* parse line */ 135 int col; /* parse col */ 136 char *name; /* node name, e.g. macro name */ 137 char *end; /* custom end macro of the block */ 138 int endspan; /* scope to: 1=eol 2=next line -1=\} */ 139 int rule; /* content is: 1=evaluated 0=skipped */ 140 }; 141 142 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 143 enum roff_tok tok, /* tok of macro */ \ 144 struct buf *buf, /* input buffer */ \ 145 int ln, /* parse line */ \ 146 int ppos, /* original pos in buffer */ \ 147 int pos, /* current pos in buffer */ \ 148 int *offs /* reset offset of buffer data */ 149 150 typedef int (*roffproc)(ROFF_ARGS); 151 152 struct roffmac { 153 roffproc proc; /* process new macro */ 154 roffproc text; /* process as child text of macro */ 155 roffproc sub; /* process as child of macro */ 156 int flags; 157 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 158 }; 159 160 struct predef { 161 const char *name; /* predefined input name */ 162 const char *str; /* replacement symbol */ 163 }; 164 165 #define PREDEF(__name, __str) \ 166 { (__name), (__str) }, 167 168 /* --- function prototypes ------------------------------------------------ */ 169 170 static int roffnode_cleanscope(struct roff *); 171 static int roffnode_pop(struct roff *); 172 static void roffnode_push(struct roff *, enum roff_tok, 173 const char *, int, int); 174 static void roff_addtbl(struct roff_man *, int, struct tbl_node *); 175 static int roff_als(ROFF_ARGS); 176 static int roff_block(ROFF_ARGS); 177 static int roff_block_text(ROFF_ARGS); 178 static int roff_block_sub(ROFF_ARGS); 179 static int roff_break(ROFF_ARGS); 180 static int roff_cblock(ROFF_ARGS); 181 static int roff_cc(ROFF_ARGS); 182 static int roff_ccond(struct roff *, int, int); 183 static int roff_char(ROFF_ARGS); 184 static int roff_cond(ROFF_ARGS); 185 static int roff_cond_checkend(ROFF_ARGS); 186 static int roff_cond_text(ROFF_ARGS); 187 static int roff_cond_sub(ROFF_ARGS); 188 static int roff_ds(ROFF_ARGS); 189 static int roff_ec(ROFF_ARGS); 190 static int roff_eo(ROFF_ARGS); 191 static int roff_eqndelim(struct roff *, struct buf *, int); 192 static int roff_evalcond(struct roff *, int, char *, int *); 193 static int roff_evalnum(struct roff *, int, 194 const char *, int *, int *, int); 195 static int roff_evalpar(struct roff *, int, 196 const char *, int *, int *, int); 197 static int roff_evalstrcond(const char *, int *); 198 static int roff_expand(struct roff *, struct buf *, 199 int, int, char); 200 static void roff_expand_patch(struct buf *, int, 201 const char *, int); 202 static void roff_free1(struct roff *); 203 static void roff_freereg(struct roffreg *); 204 static void roff_freestr(struct roffkv *); 205 static size_t roff_getname(struct roff *, char **, int, int); 206 static int roff_getnum(const char *, int *, int *, int); 207 static int roff_getop(const char *, int *, char *); 208 static int roff_getregn(struct roff *, 209 const char *, size_t, char); 210 static int roff_getregro(const struct roff *, 211 const char *name); 212 static const char *roff_getstrn(struct roff *, 213 const char *, size_t, int *); 214 static int roff_hasregn(const struct roff *, 215 const char *, size_t); 216 static int roff_insec(ROFF_ARGS); 217 static int roff_it(ROFF_ARGS); 218 static int roff_line_ignore(ROFF_ARGS); 219 static void roff_man_alloc1(struct roff_man *); 220 static void roff_man_free1(struct roff_man *); 221 static int roff_manyarg(ROFF_ARGS); 222 static int roff_mc(ROFF_ARGS); 223 static int roff_noarg(ROFF_ARGS); 224 static int roff_nop(ROFF_ARGS); 225 static int roff_nr(ROFF_ARGS); 226 static int roff_onearg(ROFF_ARGS); 227 static enum roff_tok roff_parse(struct roff *, char *, int *, 228 int, int); 229 static int roff_parse_comment(struct roff *, struct buf *, 230 int, int, char); 231 static int roff_parsetext(struct roff *, struct buf *, 232 int, int *); 233 static int roff_renamed(ROFF_ARGS); 234 static int roff_req_or_macro(ROFF_ARGS); 235 static int roff_return(ROFF_ARGS); 236 static int roff_rm(ROFF_ARGS); 237 static int roff_rn(ROFF_ARGS); 238 static int roff_rr(ROFF_ARGS); 239 static void roff_setregn(struct roff *, const char *, 240 size_t, int, char, int); 241 static void roff_setstr(struct roff *, 242 const char *, const char *, int); 243 static void roff_setstrn(struct roffkv **, const char *, 244 size_t, const char *, size_t, int); 245 static int roff_shift(ROFF_ARGS); 246 static int roff_so(ROFF_ARGS); 247 static int roff_tr(ROFF_ARGS); 248 static int roff_Dd(ROFF_ARGS); 249 static int roff_TE(ROFF_ARGS); 250 static int roff_TS(ROFF_ARGS); 251 static int roff_EQ(ROFF_ARGS); 252 static int roff_EN(ROFF_ARGS); 253 static int roff_T_(ROFF_ARGS); 254 static int roff_unsupp(ROFF_ARGS); 255 static int roff_userdef(ROFF_ARGS); 256 257 /* --- constant data ------------------------------------------------------ */ 258 259 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 260 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 261 262 const char *__roff_name[MAN_MAX + 1] = { 263 "br", "ce", "fi", "ft", 264 "ll", "mc", "nf", 265 "po", "rj", "sp", 266 "ta", "ti", NULL, 267 "ab", "ad", "af", "aln", 268 "als", "am", "am1", "ami", 269 "ami1", "as", "as1", "asciify", 270 "backtrace", "bd", "bleedat", "blm", 271 "box", "boxa", "bp", "BP", 272 "break", "breakchar", "brnl", "brp", 273 "brpnl", "c2", "cc", 274 "cf", "cflags", "ch", "char", 275 "chop", "class", "close", "CL", 276 "color", "composite", "continue", "cp", 277 "cropat", "cs", "cu", "da", 278 "dch", "Dd", "de", "de1", 279 "defcolor", "dei", "dei1", "device", 280 "devicem", "di", "do", "ds", 281 "ds1", "dwh", "dt", "ec", 282 "ecr", "ecs", "el", "em", 283 "EN", "eo", "EP", "EQ", 284 "errprint", "ev", "evc", "ex", 285 "fallback", "fam", "fc", "fchar", 286 "fcolor", "fdeferlig", "feature", "fkern", 287 "fl", "flig", "fp", "fps", 288 "fschar", "fspacewidth", "fspecial", "ftr", 289 "fzoom", "gcolor", "hc", "hcode", 290 "hidechar", "hla", "hlm", "hpf", 291 "hpfa", "hpfcode", "hw", "hy", 292 "hylang", "hylen", "hym", "hypp", 293 "hys", "ie", "if", "ig", 294 "index", "it", "itc", "IX", 295 "kern", "kernafter", "kernbefore", "kernpair", 296 "lc", "lc_ctype", "lds", "length", 297 "letadj", "lf", "lg", "lhang", 298 "linetabs", "lnr", "lnrf", "lpfx", 299 "ls", "lsm", "lt", 300 "mediasize", "minss", "mk", "mso", 301 "na", "ne", "nh", "nhychar", 302 "nm", "nn", "nop", "nr", 303 "nrf", "nroff", "ns", "nx", 304 "open", "opena", "os", "output", 305 "padj", "papersize", "pc", "pev", 306 "pi", "PI", "pl", "pm", 307 "pn", "pnr", "ps", 308 "psbb", "pshape", "pso", "ptr", 309 "pvs", "rchar", "rd", "recursionlimit", 310 "return", "rfschar", "rhang", 311 "rm", "rn", "rnn", "rr", 312 "rs", "rt", "schar", "sentchar", 313 "shc", "shift", "sizes", "so", 314 "spacewidth", "special", "spreadwarn", "ss", 315 "sty", "substring", "sv", "sy", 316 "T&", "tc", "TE", 317 "TH", "tkf", "tl", 318 "tm", "tm1", "tmc", "tr", 319 "track", "transchar", "trf", "trimat", 320 "trin", "trnt", "troff", "TS", 321 "uf", "ul", "unformat", "unwatch", 322 "unwatchn", "vpt", "vs", "warn", 323 "warnscale", "watch", "watchlength", "watchn", 324 "wh", "while", "write", "writec", 325 "writem", "xflag", ".", NULL, 326 NULL, "text", 327 "Dd", "Dt", "Os", "Sh", 328 "Ss", "Pp", "D1", "Dl", 329 "Bd", "Ed", "Bl", "El", 330 "It", "Ad", "An", "Ap", 331 "Ar", "Cd", "Cm", "Dv", 332 "Er", "Ev", "Ex", "Fa", 333 "Fd", "Fl", "Fn", "Ft", 334 "Ic", "In", "Li", "Nd", 335 "Nm", "Op", "Ot", "Pa", 336 "Rv", "St", "Va", "Vt", 337 "Xr", "%A", "%B", "%D", 338 "%I", "%J", "%N", "%O", 339 "%P", "%R", "%T", "%V", 340 "Ac", "Ao", "Aq", "At", 341 "Bc", "Bf", "Bo", "Bq", 342 "Bsx", "Bx", "Db", "Dc", 343 "Do", "Dq", "Ec", "Ef", 344 "Em", "Eo", "Fx", "Ms", 345 "No", "Ns", "Nx", "Ox", 346 "Pc", "Pf", "Po", "Pq", 347 "Qc", "Ql", "Qo", "Qq", 348 "Re", "Rs", "Sc", "So", 349 "Sq", "Sm", "Sx", "Sy", 350 "Tn", "Ux", "Xc", "Xo", 351 "Fo", "Fc", "Oo", "Oc", 352 "Bk", "Ek", "Bt", "Hf", 353 "Fr", "Ud", "Lb", "Lp", 354 "Lk", "Mt", "Brq", "Bro", 355 "Brc", "%C", "Es", "En", 356 "Dx", "%Q", "%U", "Ta", 357 "Tg", NULL, 358 "TH", "SH", "SS", "TP", 359 "TQ", 360 "LP", "PP", "P", "IP", 361 "HP", "SM", "SB", "BI", 362 "IB", "BR", "RB", "R", 363 "B", "I", "IR", "RI", 364 "RE", "RS", "DT", "UC", 365 "PD", "AT", "in", 366 "SY", "YS", "OP", 367 "EX", "EE", "UR", 368 "UE", "MT", "ME", NULL 369 }; 370 const char *const *roff_name = __roff_name; 371 372 static struct roffmac roffs[TOKEN_NONE] = { 373 { roff_noarg, NULL, NULL, 0 }, /* br */ 374 { roff_onearg, NULL, NULL, 0 }, /* ce */ 375 { roff_noarg, NULL, NULL, 0 }, /* fi */ 376 { roff_onearg, NULL, NULL, 0 }, /* ft */ 377 { roff_onearg, NULL, NULL, 0 }, /* ll */ 378 { roff_mc, NULL, NULL, 0 }, /* mc */ 379 { roff_noarg, NULL, NULL, 0 }, /* nf */ 380 { roff_onearg, NULL, NULL, 0 }, /* po */ 381 { roff_onearg, NULL, NULL, 0 }, /* rj */ 382 { roff_onearg, NULL, NULL, 0 }, /* sp */ 383 { roff_manyarg, NULL, NULL, 0 }, /* ta */ 384 { roff_onearg, NULL, NULL, 0 }, /* ti */ 385 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ 386 { roff_unsupp, NULL, NULL, 0 }, /* ab */ 387 { roff_line_ignore, NULL, NULL, 0 }, /* ad */ 388 { roff_line_ignore, NULL, NULL, 0 }, /* af */ 389 { roff_unsupp, NULL, NULL, 0 }, /* aln */ 390 { roff_als, NULL, NULL, 0 }, /* als */ 391 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ 392 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ 393 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ 394 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ 395 { roff_ds, NULL, NULL, 0 }, /* as */ 396 { roff_ds, NULL, NULL, 0 }, /* as1 */ 397 { roff_unsupp, NULL, NULL, 0 }, /* asciify */ 398 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ 399 { roff_line_ignore, NULL, NULL, 0 }, /* bd */ 400 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ 401 { roff_unsupp, NULL, NULL, 0 }, /* blm */ 402 { roff_unsupp, NULL, NULL, 0 }, /* box */ 403 { roff_unsupp, NULL, NULL, 0 }, /* boxa */ 404 { roff_line_ignore, NULL, NULL, 0 }, /* bp */ 405 { roff_unsupp, NULL, NULL, 0 }, /* BP */ 406 { roff_break, NULL, NULL, 0 }, /* break */ 407 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ 408 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ 409 { roff_noarg, NULL, NULL, 0 }, /* brp */ 410 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ 411 { roff_unsupp, NULL, NULL, 0 }, /* c2 */ 412 { roff_cc, NULL, NULL, 0 }, /* cc */ 413 { roff_insec, NULL, NULL, 0 }, /* cf */ 414 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ 415 { roff_line_ignore, NULL, NULL, 0 }, /* ch */ 416 { roff_char, NULL, NULL, 0 }, /* char */ 417 { roff_unsupp, NULL, NULL, 0 }, /* chop */ 418 { roff_line_ignore, NULL, NULL, 0 }, /* class */ 419 { roff_insec, NULL, NULL, 0 }, /* close */ 420 { roff_unsupp, NULL, NULL, 0 }, /* CL */ 421 { roff_line_ignore, NULL, NULL, 0 }, /* color */ 422 { roff_unsupp, NULL, NULL, 0 }, /* composite */ 423 { roff_unsupp, NULL, NULL, 0 }, /* continue */ 424 { roff_line_ignore, NULL, NULL, 0 }, /* cp */ 425 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ 426 { roff_line_ignore, NULL, NULL, 0 }, /* cs */ 427 { roff_line_ignore, NULL, NULL, 0 }, /* cu */ 428 { roff_unsupp, NULL, NULL, 0 }, /* da */ 429 { roff_unsupp, NULL, NULL, 0 }, /* dch */ 430 { roff_Dd, NULL, NULL, 0 }, /* Dd */ 431 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ 432 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ 433 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ 434 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ 435 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ 436 { roff_unsupp, NULL, NULL, 0 }, /* device */ 437 { roff_unsupp, NULL, NULL, 0 }, /* devicem */ 438 { roff_unsupp, NULL, NULL, 0 }, /* di */ 439 { roff_unsupp, NULL, NULL, 0 }, /* do */ 440 { roff_ds, NULL, NULL, 0 }, /* ds */ 441 { roff_ds, NULL, NULL, 0 }, /* ds1 */ 442 { roff_unsupp, NULL, NULL, 0 }, /* dwh */ 443 { roff_unsupp, NULL, NULL, 0 }, /* dt */ 444 { roff_ec, NULL, NULL, 0 }, /* ec */ 445 { roff_unsupp, NULL, NULL, 0 }, /* ecr */ 446 { roff_unsupp, NULL, NULL, 0 }, /* ecs */ 447 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ 448 { roff_unsupp, NULL, NULL, 0 }, /* em */ 449 { roff_EN, NULL, NULL, 0 }, /* EN */ 450 { roff_eo, NULL, NULL, 0 }, /* eo */ 451 { roff_unsupp, NULL, NULL, 0 }, /* EP */ 452 { roff_EQ, NULL, NULL, 0 }, /* EQ */ 453 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ 454 { roff_unsupp, NULL, NULL, 0 }, /* ev */ 455 { roff_unsupp, NULL, NULL, 0 }, /* evc */ 456 { roff_unsupp, NULL, NULL, 0 }, /* ex */ 457 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ 458 { roff_line_ignore, NULL, NULL, 0 }, /* fam */ 459 { roff_unsupp, NULL, NULL, 0 }, /* fc */ 460 { roff_unsupp, NULL, NULL, 0 }, /* fchar */ 461 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ 462 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ 463 { roff_line_ignore, NULL, NULL, 0 }, /* feature */ 464 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ 465 { roff_line_ignore, NULL, NULL, 0 }, /* fl */ 466 { roff_line_ignore, NULL, NULL, 0 }, /* flig */ 467 { roff_line_ignore, NULL, NULL, 0 }, /* fp */ 468 { roff_line_ignore, NULL, NULL, 0 }, /* fps */ 469 { roff_unsupp, NULL, NULL, 0 }, /* fschar */ 470 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ 471 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ 472 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ 473 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ 474 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ 475 { roff_line_ignore, NULL, NULL, 0 }, /* hc */ 476 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ 477 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ 478 { roff_line_ignore, NULL, NULL, 0 }, /* hla */ 479 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ 480 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ 481 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ 482 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ 483 { roff_line_ignore, NULL, NULL, 0 }, /* hw */ 484 { roff_line_ignore, NULL, NULL, 0 }, /* hy */ 485 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ 486 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ 487 { roff_line_ignore, NULL, NULL, 0 }, /* hym */ 488 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ 489 { roff_line_ignore, NULL, NULL, 0 }, /* hys */ 490 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ 491 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ 492 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ 493 { roff_unsupp, NULL, NULL, 0 }, /* index */ 494 { roff_it, NULL, NULL, 0 }, /* it */ 495 { roff_unsupp, NULL, NULL, 0 }, /* itc */ 496 { roff_line_ignore, NULL, NULL, 0 }, /* IX */ 497 { roff_line_ignore, NULL, NULL, 0 }, /* kern */ 498 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ 499 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ 500 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ 501 { roff_unsupp, NULL, NULL, 0 }, /* lc */ 502 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ 503 { roff_unsupp, NULL, NULL, 0 }, /* lds */ 504 { roff_unsupp, NULL, NULL, 0 }, /* length */ 505 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ 506 { roff_insec, NULL, NULL, 0 }, /* lf */ 507 { roff_line_ignore, NULL, NULL, 0 }, /* lg */ 508 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ 509 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ 510 { roff_unsupp, NULL, NULL, 0 }, /* lnr */ 511 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ 512 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ 513 { roff_line_ignore, NULL, NULL, 0 }, /* ls */ 514 { roff_unsupp, NULL, NULL, 0 }, /* lsm */ 515 { roff_line_ignore, NULL, NULL, 0 }, /* lt */ 516 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ 517 { roff_line_ignore, NULL, NULL, 0 }, /* minss */ 518 { roff_line_ignore, NULL, NULL, 0 }, /* mk */ 519 { roff_insec, NULL, NULL, 0 }, /* mso */ 520 { roff_line_ignore, NULL, NULL, 0 }, /* na */ 521 { roff_line_ignore, NULL, NULL, 0 }, /* ne */ 522 { roff_line_ignore, NULL, NULL, 0 }, /* nh */ 523 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ 524 { roff_unsupp, NULL, NULL, 0 }, /* nm */ 525 { roff_unsupp, NULL, NULL, 0 }, /* nn */ 526 { roff_nop, NULL, NULL, 0 }, /* nop */ 527 { roff_nr, NULL, NULL, 0 }, /* nr */ 528 { roff_unsupp, NULL, NULL, 0 }, /* nrf */ 529 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ 530 { roff_line_ignore, NULL, NULL, 0 }, /* ns */ 531 { roff_insec, NULL, NULL, 0 }, /* nx */ 532 { roff_insec, NULL, NULL, 0 }, /* open */ 533 { roff_insec, NULL, NULL, 0 }, /* opena */ 534 { roff_line_ignore, NULL, NULL, 0 }, /* os */ 535 { roff_unsupp, NULL, NULL, 0 }, /* output */ 536 { roff_line_ignore, NULL, NULL, 0 }, /* padj */ 537 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ 538 { roff_line_ignore, NULL, NULL, 0 }, /* pc */ 539 { roff_line_ignore, NULL, NULL, 0 }, /* pev */ 540 { roff_insec, NULL, NULL, 0 }, /* pi */ 541 { roff_unsupp, NULL, NULL, 0 }, /* PI */ 542 { roff_line_ignore, NULL, NULL, 0 }, /* pl */ 543 { roff_line_ignore, NULL, NULL, 0 }, /* pm */ 544 { roff_line_ignore, NULL, NULL, 0 }, /* pn */ 545 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ 546 { roff_line_ignore, NULL, NULL, 0 }, /* ps */ 547 { roff_unsupp, NULL, NULL, 0 }, /* psbb */ 548 { roff_unsupp, NULL, NULL, 0 }, /* pshape */ 549 { roff_insec, NULL, NULL, 0 }, /* pso */ 550 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ 551 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ 552 { roff_unsupp, NULL, NULL, 0 }, /* rchar */ 553 { roff_line_ignore, NULL, NULL, 0 }, /* rd */ 554 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ 555 { roff_return, NULL, NULL, 0 }, /* return */ 556 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ 557 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ 558 { roff_rm, NULL, NULL, 0 }, /* rm */ 559 { roff_rn, NULL, NULL, 0 }, /* rn */ 560 { roff_unsupp, NULL, NULL, 0 }, /* rnn */ 561 { roff_rr, NULL, NULL, 0 }, /* rr */ 562 { roff_line_ignore, NULL, NULL, 0 }, /* rs */ 563 { roff_line_ignore, NULL, NULL, 0 }, /* rt */ 564 { roff_unsupp, NULL, NULL, 0 }, /* schar */ 565 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ 566 { roff_line_ignore, NULL, NULL, 0 }, /* shc */ 567 { roff_shift, NULL, NULL, 0 }, /* shift */ 568 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ 569 { roff_so, NULL, NULL, 0 }, /* so */ 570 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ 571 { roff_line_ignore, NULL, NULL, 0 }, /* special */ 572 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ 573 { roff_line_ignore, NULL, NULL, 0 }, /* ss */ 574 { roff_line_ignore, NULL, NULL, 0 }, /* sty */ 575 { roff_unsupp, NULL, NULL, 0 }, /* substring */ 576 { roff_line_ignore, NULL, NULL, 0 }, /* sv */ 577 { roff_insec, NULL, NULL, 0 }, /* sy */ 578 { roff_T_, NULL, NULL, 0 }, /* T& */ 579 { roff_unsupp, NULL, NULL, 0 }, /* tc */ 580 { roff_TE, NULL, NULL, 0 }, /* TE */ 581 { roff_Dd, NULL, NULL, 0 }, /* TH */ 582 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ 583 { roff_unsupp, NULL, NULL, 0 }, /* tl */ 584 { roff_line_ignore, NULL, NULL, 0 }, /* tm */ 585 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ 586 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ 587 { roff_tr, NULL, NULL, 0 }, /* tr */ 588 { roff_line_ignore, NULL, NULL, 0 }, /* track */ 589 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ 590 { roff_insec, NULL, NULL, 0 }, /* trf */ 591 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ 592 { roff_unsupp, NULL, NULL, 0 }, /* trin */ 593 { roff_unsupp, NULL, NULL, 0 }, /* trnt */ 594 { roff_line_ignore, NULL, NULL, 0 }, /* troff */ 595 { roff_TS, NULL, NULL, 0 }, /* TS */ 596 { roff_line_ignore, NULL, NULL, 0 }, /* uf */ 597 { roff_line_ignore, NULL, NULL, 0 }, /* ul */ 598 { roff_unsupp, NULL, NULL, 0 }, /* unformat */ 599 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ 600 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ 601 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ 602 { roff_line_ignore, NULL, NULL, 0 }, /* vs */ 603 { roff_line_ignore, NULL, NULL, 0 }, /* warn */ 604 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ 605 { roff_line_ignore, NULL, NULL, 0 }, /* watch */ 606 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ 607 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ 608 { roff_unsupp, NULL, NULL, 0 }, /* wh */ 609 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/ 610 { roff_insec, NULL, NULL, 0 }, /* write */ 611 { roff_insec, NULL, NULL, 0 }, /* writec */ 612 { roff_insec, NULL, NULL, 0 }, /* writem */ 613 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ 614 { roff_cblock, NULL, NULL, 0 }, /* . */ 615 { roff_renamed, NULL, NULL, 0 }, 616 { roff_userdef, NULL, NULL, 0 } 617 }; 618 619 /* Array of injected predefined strings. */ 620 #define PREDEFS_MAX 38 621 static const struct predef predefs[PREDEFS_MAX] = { 622 #include "predefs.in" 623 }; 624 625 static int roffce_lines; /* number of input lines to center */ 626 static struct roff_node *roffce_node; /* active request */ 627 static int roffit_lines; /* number of lines to delay */ 628 static char *roffit_macro; /* nil-terminated macro line */ 629 630 631 /* --- request table ------------------------------------------------------ */ 632 633 struct ohash * 634 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) 635 { 636 struct ohash *htab; 637 struct roffreq *req; 638 enum roff_tok tok; 639 size_t sz; 640 unsigned int slot; 641 642 htab = mandoc_malloc(sizeof(*htab)); 643 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); 644 645 for (tok = mintok; tok < maxtok; tok++) { 646 if (roff_name[tok] == NULL) 647 continue; 648 sz = strlen(roff_name[tok]); 649 req = mandoc_malloc(sizeof(*req) + sz + 1); 650 req->tok = tok; 651 memcpy(req->name, roff_name[tok], sz + 1); 652 slot = ohash_qlookup(htab, req->name); 653 ohash_insert(htab, slot, req); 654 } 655 return htab; 656 } 657 658 void 659 roffhash_free(struct ohash *htab) 660 { 661 struct roffreq *req; 662 unsigned int slot; 663 664 if (htab == NULL) 665 return; 666 for (req = ohash_first(htab, &slot); req != NULL; 667 req = ohash_next(htab, &slot)) 668 free(req); 669 ohash_delete(htab); 670 free(htab); 671 } 672 673 enum roff_tok 674 roffhash_find(struct ohash *htab, const char *name, size_t sz) 675 { 676 struct roffreq *req; 677 const char *end; 678 679 if (sz) { 680 end = name + sz; 681 req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); 682 } else 683 req = ohash_find(htab, ohash_qlookup(htab, name)); 684 return req == NULL ? TOKEN_NONE : req->tok; 685 } 686 687 /* --- stack of request blocks -------------------------------------------- */ 688 689 /* 690 * Pop the current node off of the stack of roff instructions currently 691 * pending. Return 1 if it is a loop or 0 otherwise. 692 */ 693 static int 694 roffnode_pop(struct roff *r) 695 { 696 struct roffnode *p; 697 int inloop; 698 699 p = r->last; 700 inloop = p->tok == ROFF_while; 701 r->last = p->parent; 702 free(p->name); 703 free(p->end); 704 free(p); 705 return inloop; 706 } 707 708 /* 709 * Push a roff node onto the instruction stack. This must later be 710 * removed with roffnode_pop(). 711 */ 712 static void 713 roffnode_push(struct roff *r, enum roff_tok tok, const char *name, 714 int line, int col) 715 { 716 struct roffnode *p; 717 718 p = mandoc_calloc(1, sizeof(struct roffnode)); 719 p->tok = tok; 720 if (name) 721 p->name = mandoc_strdup(name); 722 p->parent = r->last; 723 p->line = line; 724 p->col = col; 725 p->rule = p->parent ? p->parent->rule : 0; 726 727 r->last = p; 728 } 729 730 /* --- roff parser state data management ---------------------------------- */ 731 732 static void 733 roff_free1(struct roff *r) 734 { 735 int i; 736 737 tbl_free(r->first_tbl); 738 r->first_tbl = r->last_tbl = r->tbl = NULL; 739 740 eqn_free(r->last_eqn); 741 r->last_eqn = r->eqn = NULL; 742 743 while (r->mstackpos >= 0) 744 roff_userret(r); 745 746 while (r->last) 747 roffnode_pop(r); 748 749 free (r->rstack); 750 r->rstack = NULL; 751 r->rstacksz = 0; 752 r->rstackpos = -1; 753 754 roff_freereg(r->regtab); 755 r->regtab = NULL; 756 757 roff_freestr(r->strtab); 758 roff_freestr(r->rentab); 759 roff_freestr(r->xmbtab); 760 r->strtab = r->rentab = r->xmbtab = NULL; 761 762 if (r->xtab) 763 for (i = 0; i < 128; i++) 764 free(r->xtab[i].p); 765 free(r->xtab); 766 r->xtab = NULL; 767 } 768 769 void 770 roff_reset(struct roff *r) 771 { 772 roff_free1(r); 773 r->options |= MPARSE_COMMENT; 774 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 775 r->control = '\0'; 776 r->escape = '\\'; 777 roffce_lines = 0; 778 roffce_node = NULL; 779 roffit_lines = 0; 780 roffit_macro = NULL; 781 } 782 783 void 784 roff_free(struct roff *r) 785 { 786 int i; 787 788 roff_free1(r); 789 for (i = 0; i < r->mstacksz; i++) 790 free(r->mstack[i].argv); 791 free(r->mstack); 792 roffhash_free(r->reqtab); 793 free(r); 794 } 795 796 struct roff * 797 roff_alloc(int options) 798 { 799 struct roff *r; 800 801 r = mandoc_calloc(1, sizeof(struct roff)); 802 r->reqtab = roffhash_alloc(0, ROFF_RENAMED); 803 r->options = options | MPARSE_COMMENT; 804 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 805 r->mstackpos = -1; 806 r->rstackpos = -1; 807 r->escape = '\\'; 808 return r; 809 } 810 811 /* --- syntax tree state data management ---------------------------------- */ 812 813 static void 814 roff_man_free1(struct roff_man *man) 815 { 816 if (man->meta.first != NULL) 817 roff_node_delete(man, man->meta.first); 818 free(man->meta.msec); 819 free(man->meta.vol); 820 free(man->meta.os); 821 free(man->meta.arch); 822 free(man->meta.title); 823 free(man->meta.name); 824 free(man->meta.date); 825 free(man->meta.sodest); 826 } 827 828 void 829 roff_state_reset(struct roff_man *man) 830 { 831 man->last = man->meta.first; 832 man->last_es = NULL; 833 man->flags = 0; 834 man->lastsec = man->lastnamed = SEC_NONE; 835 man->next = ROFF_NEXT_CHILD; 836 roff_setreg(man->roff, "nS", 0, '='); 837 } 838 839 static void 840 roff_man_alloc1(struct roff_man *man) 841 { 842 memset(&man->meta, 0, sizeof(man->meta)); 843 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first)); 844 man->meta.first->type = ROFFT_ROOT; 845 man->meta.macroset = MACROSET_NONE; 846 roff_state_reset(man); 847 } 848 849 void 850 roff_man_reset(struct roff_man *man) 851 { 852 roff_man_free1(man); 853 roff_man_alloc1(man); 854 } 855 856 void 857 roff_man_free(struct roff_man *man) 858 { 859 roff_man_free1(man); 860 free(man->os_r); 861 free(man); 862 } 863 864 struct roff_man * 865 roff_man_alloc(struct roff *roff, const char *os_s, int quick) 866 { 867 struct roff_man *man; 868 869 man = mandoc_calloc(1, sizeof(*man)); 870 man->roff = roff; 871 man->os_s = os_s; 872 man->quick = quick; 873 roff_man_alloc1(man); 874 roff->man = man; 875 return man; 876 } 877 878 /* --- syntax tree handling ----------------------------------------------- */ 879 880 struct roff_node * 881 roff_node_alloc(struct roff_man *man, int line, int pos, 882 enum roff_type type, int tok) 883 { 884 struct roff_node *n; 885 886 n = mandoc_calloc(1, sizeof(*n)); 887 n->line = line; 888 n->pos = pos; 889 n->tok = tok; 890 n->type = type; 891 n->sec = man->lastsec; 892 893 if (man->flags & MDOC_SYNOPSIS) 894 n->flags |= NODE_SYNPRETTY; 895 else 896 n->flags &= ~NODE_SYNPRETTY; 897 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL) 898 n->flags |= NODE_NOFILL; 899 else 900 n->flags &= ~NODE_NOFILL; 901 if (man->flags & MDOC_NEWLINE) 902 n->flags |= NODE_LINE; 903 man->flags &= ~MDOC_NEWLINE; 904 905 return n; 906 } 907 908 void 909 roff_node_append(struct roff_man *man, struct roff_node *n) 910 { 911 912 switch (man->next) { 913 case ROFF_NEXT_SIBLING: 914 if (man->last->next != NULL) { 915 n->next = man->last->next; 916 man->last->next->prev = n; 917 } else 918 man->last->parent->last = n; 919 man->last->next = n; 920 n->prev = man->last; 921 n->parent = man->last->parent; 922 break; 923 case ROFF_NEXT_CHILD: 924 if (man->last->child != NULL) { 925 n->next = man->last->child; 926 man->last->child->prev = n; 927 } else 928 man->last->last = n; 929 man->last->child = n; 930 n->parent = man->last; 931 break; 932 default: 933 abort(); 934 } 935 man->last = n; 936 937 switch (n->type) { 938 case ROFFT_HEAD: 939 n->parent->head = n; 940 break; 941 case ROFFT_BODY: 942 if (n->end != ENDBODY_NOT) 943 return; 944 n->parent->body = n; 945 break; 946 case ROFFT_TAIL: 947 n->parent->tail = n; 948 break; 949 default: 950 return; 951 } 952 953 /* 954 * Copy over the normalised-data pointer of our parent. Not 955 * everybody has one, but copying a null pointer is fine. 956 */ 957 958 n->norm = n->parent->norm; 959 assert(n->parent->type == ROFFT_BLOCK); 960 } 961 962 void 963 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 964 { 965 struct roff_node *n; 966 967 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 968 n->string = roff_strdup(man->roff, word); 969 roff_node_append(man, n); 970 n->flags |= NODE_VALID | NODE_ENDED; 971 man->next = ROFF_NEXT_SIBLING; 972 } 973 974 void 975 roff_word_append(struct roff_man *man, const char *word) 976 { 977 struct roff_node *n; 978 char *addstr, *newstr; 979 980 n = man->last; 981 addstr = roff_strdup(man->roff, word); 982 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 983 free(addstr); 984 free(n->string); 985 n->string = newstr; 986 man->next = ROFF_NEXT_SIBLING; 987 } 988 989 void 990 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 991 { 992 struct roff_node *n; 993 994 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 995 roff_node_append(man, n); 996 man->next = ROFF_NEXT_CHILD; 997 } 998 999 struct roff_node * 1000 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 1001 { 1002 struct roff_node *n; 1003 1004 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 1005 roff_node_append(man, n); 1006 man->next = ROFF_NEXT_CHILD; 1007 return n; 1008 } 1009 1010 struct roff_node * 1011 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 1012 { 1013 struct roff_node *n; 1014 1015 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 1016 roff_node_append(man, n); 1017 man->next = ROFF_NEXT_CHILD; 1018 return n; 1019 } 1020 1021 struct roff_node * 1022 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 1023 { 1024 struct roff_node *n; 1025 1026 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 1027 roff_node_append(man, n); 1028 man->next = ROFF_NEXT_CHILD; 1029 return n; 1030 } 1031 1032 static void 1033 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl) 1034 { 1035 struct roff_node *n; 1036 struct tbl_span *span; 1037 1038 if (man->meta.macroset == MACROSET_MAN) 1039 man_breakscope(man, ROFF_TS); 1040 while ((span = tbl_span(tbl)) != NULL) { 1041 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE); 1042 n->span = span; 1043 roff_node_append(man, n); 1044 n->flags |= NODE_VALID | NODE_ENDED; 1045 man->next = ROFF_NEXT_SIBLING; 1046 } 1047 } 1048 1049 void 1050 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1051 { 1052 1053 /* Adjust siblings. */ 1054 1055 if (n->prev) 1056 n->prev->next = n->next; 1057 if (n->next) 1058 n->next->prev = n->prev; 1059 1060 /* Adjust parent. */ 1061 1062 if (n->parent != NULL) { 1063 if (n->parent->child == n) 1064 n->parent->child = n->next; 1065 if (n->parent->last == n) 1066 n->parent->last = n->prev; 1067 } 1068 1069 /* Adjust parse point. */ 1070 1071 if (man == NULL) 1072 return; 1073 if (man->last == n) { 1074 if (n->prev == NULL) { 1075 man->last = n->parent; 1076 man->next = ROFF_NEXT_CHILD; 1077 } else { 1078 man->last = n->prev; 1079 man->next = ROFF_NEXT_SIBLING; 1080 } 1081 } 1082 if (man->meta.first == n) 1083 man->meta.first = NULL; 1084 } 1085 1086 void 1087 roff_node_relink(struct roff_man *man, struct roff_node *n) 1088 { 1089 roff_node_unlink(man, n); 1090 n->prev = n->next = NULL; 1091 roff_node_append(man, n); 1092 } 1093 1094 void 1095 roff_node_free(struct roff_node *n) 1096 { 1097 1098 if (n->args != NULL) 1099 mdoc_argv_free(n->args); 1100 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1101 free(n->norm); 1102 eqn_box_free(n->eqn); 1103 free(n->string); 1104 free(n->tag); 1105 free(n); 1106 } 1107 1108 void 1109 roff_node_delete(struct roff_man *man, struct roff_node *n) 1110 { 1111 1112 while (n->child != NULL) 1113 roff_node_delete(man, n->child); 1114 roff_node_unlink(man, n); 1115 roff_node_free(n); 1116 } 1117 1118 int 1119 roff_node_transparent(struct roff_node *n) 1120 { 1121 if (n == NULL) 1122 return 0; 1123 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) 1124 return 1; 1125 return roff_tok_transparent(n->tok); 1126 } 1127 1128 int 1129 roff_tok_transparent(enum roff_tok tok) 1130 { 1131 switch (tok) { 1132 case ROFF_ft: 1133 case ROFF_ll: 1134 case ROFF_mc: 1135 case ROFF_po: 1136 case ROFF_ta: 1137 case MDOC_Db: 1138 case MDOC_Es: 1139 case MDOC_Sm: 1140 case MDOC_Tg: 1141 case MAN_DT: 1142 case MAN_UC: 1143 case MAN_PD: 1144 case MAN_AT: 1145 return 1; 1146 default: 1147 return 0; 1148 } 1149 } 1150 1151 struct roff_node * 1152 roff_node_child(struct roff_node *n) 1153 { 1154 for (n = n->child; roff_node_transparent(n); n = n->next) 1155 continue; 1156 return n; 1157 } 1158 1159 struct roff_node * 1160 roff_node_prev(struct roff_node *n) 1161 { 1162 do { 1163 n = n->prev; 1164 } while (roff_node_transparent(n)); 1165 return n; 1166 } 1167 1168 struct roff_node * 1169 roff_node_next(struct roff_node *n) 1170 { 1171 do { 1172 n = n->next; 1173 } while (roff_node_transparent(n)); 1174 return n; 1175 } 1176 1177 void 1178 deroff(char **dest, const struct roff_node *n) 1179 { 1180 char *cp; 1181 size_t sz; 1182 1183 if (n->string == NULL) { 1184 for (n = n->child; n != NULL; n = n->next) 1185 deroff(dest, n); 1186 return; 1187 } 1188 1189 /* Skip leading whitespace. */ 1190 1191 for (cp = n->string; *cp != '\0'; cp++) { 1192 if (cp[0] == '\\' && cp[1] != '\0' && 1193 strchr(" %&0^|~", cp[1]) != NULL) 1194 cp++; 1195 else if ( ! isspace((unsigned char)*cp)) 1196 break; 1197 } 1198 1199 /* Skip trailing backslash. */ 1200 1201 sz = strlen(cp); 1202 if (sz > 0 && cp[sz - 1] == '\\') 1203 sz--; 1204 1205 /* Skip trailing whitespace. */ 1206 1207 for (; sz; sz--) 1208 if ( ! isspace((unsigned char)cp[sz-1])) 1209 break; 1210 1211 /* Skip empty strings. */ 1212 1213 if (sz == 0) 1214 return; 1215 1216 if (*dest == NULL) { 1217 *dest = mandoc_strndup(cp, sz); 1218 return; 1219 } 1220 1221 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1222 free(*dest); 1223 *dest = cp; 1224 } 1225 1226 /* --- main functions of the roff parser ---------------------------------- */ 1227 1228 /* 1229 * Save comments preceding the title macro, for example in order to 1230 * preserve Copyright and license headers in HTML output, 1231 * provide diagnostics about RCS ids and trailing whitespace in comments, 1232 * then discard comments including preceding whitespace. 1233 * This function also handles input line continuation. 1234 */ 1235 static int 1236 roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos, char ec) 1237 { 1238 struct roff_node *n; /* used for header comments */ 1239 const char *start; /* start of the string to process */ 1240 const char *cp; /* for RCS id parsing */ 1241 char *stesc; /* start of an escape sequence ('\\') */ 1242 char *ep; /* end of comment string */ 1243 int rcsid; /* kind of RCS id seen */ 1244 1245 for (start = stesc = buf->buf + pos;; stesc++) { 1246 /* 1247 * XXX Ugly hack: Remove the newline character that 1248 * mparse_buf_r() appended to mark the end of input 1249 * if it is not preceded by an escape character. 1250 */ 1251 if (stesc[0] == '\n') { 1252 assert(stesc[1] == '\0'); 1253 stesc[0] = '\0'; 1254 } 1255 1256 /* The line ends without continuation or comment. */ 1257 if (stesc[0] == '\0') 1258 return ROFF_CONT; 1259 1260 /* Unescaped byte: skip it. */ 1261 if (stesc[0] != ec) 1262 continue; 1263 1264 /* 1265 * XXX Ugly hack: Do not attempt to append another line 1266 * if the function mparse_buf_r() appended a newline 1267 * character to indicate the end of input. 1268 */ 1269 if (stesc[1] == '\n') { 1270 assert(stesc[2] == '\0'); 1271 stesc[0] = '\0'; 1272 return ROFF_CONT; 1273 } 1274 1275 /* 1276 * An escape character at the end of an input line 1277 * requests line continuation. 1278 */ 1279 if (stesc[1] == '\0') { 1280 stesc[0] = '\0'; 1281 return ROFF_IGN | ROFF_APPEND; 1282 } 1283 1284 /* Found a comment: process it. */ 1285 if (stesc[1] == '"' || stesc[1] == '#') 1286 break; 1287 1288 /* Escaped escape character: skip them both. */ 1289 if (stesc[1] == ec) 1290 stesc++; 1291 } 1292 1293 /* Look for an RCS id in the comment. */ 1294 1295 rcsid = 0; 1296 if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) { 1297 rcsid = 1 << MANDOC_OS_OPENBSD; 1298 cp += 8; 1299 } else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) { 1300 rcsid = 1 << MANDOC_OS_NETBSD; 1301 cp += 7; 1302 } 1303 if (cp != NULL && isalnum((unsigned char)*cp) == 0 && 1304 strchr(cp, '$') != NULL) { 1305 if (r->man->meta.rcsids & rcsid) 1306 mandoc_msg(MANDOCERR_RCS_REP, ln, 1307 (int)(stesc - buf->buf) + 2, "%s", stesc + 1); 1308 r->man->meta.rcsids |= rcsid; 1309 } 1310 1311 /* Warn about trailing whitespace at the end of the comment. */ 1312 1313 ep = strchr(stesc + 2, '\0') - 1; 1314 if (*ep == '\n') 1315 *ep-- = '\0'; 1316 if (*ep == ' ' || *ep == '\t') 1317 mandoc_msg(MANDOCERR_SPACE_EOL, 1318 ln, (int)(ep - buf->buf), NULL); 1319 1320 /* Save comments preceding the title macro in the syntax tree. */ 1321 1322 if (r->options & MPARSE_COMMENT) { 1323 while (*ep == ' ' || *ep == '\t') 1324 ep--; 1325 ep[1] = '\0'; 1326 n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf, 1327 ROFFT_COMMENT, TOKEN_NONE); 1328 n->string = mandoc_strdup(stesc + 2); 1329 roff_node_append(r->man, n); 1330 n->flags |= NODE_VALID | NODE_ENDED; 1331 r->man->next = ROFF_NEXT_SIBLING; 1332 } 1333 1334 /* The comment requests line continuation. */ 1335 1336 if (stesc[1] == '#') { 1337 *stesc = '\0'; 1338 return ROFF_IGN | ROFF_APPEND; 1339 } 1340 1341 /* Discard the comment including preceding whitespace. */ 1342 1343 while (stesc > start && stesc[-1] == ' ' && 1344 (stesc == start + 1 || stesc[-2] != '\\')) 1345 stesc--; 1346 *stesc = '\0'; 1347 return ROFF_CONT; 1348 } 1349 1350 /* 1351 * In the current line, expand escape sequences that produce parsable 1352 * input text. Also check the syntax of the remaining escape sequences, 1353 * which typically produce output glyphs or change formatter state. 1354 */ 1355 static int 1356 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec) 1357 { 1358 char ubuf[24]; /* buffer to print a number */ 1359 struct mctx *ctx; /* current macro call context */ 1360 const char *res; /* the string to be pasted */ 1361 const char *src; /* source for copying */ 1362 char *dst; /* destination for copying */ 1363 int iesc; /* index of leading escape char */ 1364 int inam; /* index of the escape name */ 1365 int iarg; /* index beginning the argument */ 1366 int iendarg; /* index right after the argument */ 1367 int iend; /* index right after the sequence */ 1368 int isrc, idst; /* to reduce \\ and \. in names */ 1369 int deftype; /* type of definition to paste */ 1370 int argi; /* macro argument index */ 1371 int quote_args; /* true for \\$@, false for \\$* */ 1372 int asz; /* length of the replacement */ 1373 int rsz; /* length of the rest of the string */ 1374 int npos; /* position in numeric expression */ 1375 int expand_count; /* to avoid infinite loops */ 1376 1377 expand_count = 0; 1378 while (buf->buf[pos] != '\0') { 1379 1380 /* 1381 * Skip plain ASCII characters. 1382 * If we have a non-standard escape character, 1383 * escape literal backslashes because all processing in 1384 * subsequent functions uses the standard escaping rules. 1385 */ 1386 1387 if (buf->buf[pos] != ec) { 1388 if (ec != ASCII_ESC && buf->buf[pos] == '\\') { 1389 roff_expand_patch(buf, pos, "\\e", pos + 1); 1390 pos++; 1391 } 1392 pos++; 1393 continue; 1394 } 1395 1396 /* 1397 * Parse escape sequences, 1398 * issue diagnostic messages when appropriate, 1399 * and skip sequences that do not need expansion. 1400 * If we have a non-standard escape character, translate 1401 * it to backslashes and translate backslashes to \e. 1402 */ 1403 1404 if (roff_escape(buf->buf, ln, pos, &iesc, &inam, 1405 &iarg, &iendarg, &iend) != ESCAPE_EXPAND) { 1406 while (pos < iend) { 1407 if (buf->buf[pos] == ec) { 1408 buf->buf[pos] = '\\'; 1409 if (pos + 1 < iend) 1410 pos++; 1411 } else if (buf->buf[pos] == '\\') { 1412 roff_expand_patch(buf, 1413 pos, "\\e", pos + 1); 1414 pos++; 1415 iend++; 1416 } 1417 pos++; 1418 } 1419 continue; 1420 } 1421 1422 /* Reduce \\ and \. in names. */ 1423 1424 if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') { 1425 isrc = idst = iarg; 1426 while (isrc < iendarg) { 1427 if (isrc + 1 < iendarg && 1428 buf->buf[isrc] == '\\' && 1429 (buf->buf[isrc + 1] == '\\' || 1430 buf->buf[isrc + 1] == '.')) 1431 isrc++; 1432 buf->buf[idst++] = buf->buf[isrc++]; 1433 } 1434 iendarg -= isrc - idst; 1435 } 1436 1437 /* Handle expansion. */ 1438 1439 res = NULL; 1440 switch (buf->buf[inam]) { 1441 case '*': 1442 if (iendarg == iarg) 1443 break; 1444 deftype = ROFFDEF_USER | ROFFDEF_PRE; 1445 if ((res = roff_getstrn(r, buf->buf + iarg, 1446 iendarg - iarg, &deftype)) != NULL) 1447 break; 1448 1449 /* 1450 * If not overridden, 1451 * let \*(.T through to the formatters. 1452 */ 1453 1454 if (iendarg - iarg == 2 && 1455 buf->buf[iarg] == '.' && 1456 buf->buf[iarg + 1] == 'T') { 1457 roff_setstrn(&r->strtab, ".T", 2, NULL, 0, 0); 1458 pos = iend; 1459 continue; 1460 } 1461 1462 mandoc_msg(MANDOCERR_STR_UNDEF, ln, iesc, 1463 "%.*s", iendarg - iarg, buf->buf + iarg); 1464 break; 1465 1466 case '$': 1467 if (r->mstackpos < 0) { 1468 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, iesc, 1469 "%.*s", iend - iesc, buf->buf + iesc); 1470 break; 1471 } 1472 ctx = r->mstack + r->mstackpos; 1473 argi = buf->buf[iarg] - '1'; 1474 if (argi >= 0 && argi <= 8) { 1475 if (argi < ctx->argc) 1476 res = ctx->argv[argi]; 1477 break; 1478 } 1479 if (buf->buf[iarg] == '*') 1480 quote_args = 0; 1481 else if (buf->buf[iarg] == '@') 1482 quote_args = 1; 1483 else { 1484 mandoc_msg(MANDOCERR_ARG_NONUM, ln, iesc, 1485 "%.*s", iend - iesc, buf->buf + iesc); 1486 break; 1487 } 1488 asz = 0; 1489 for (argi = 0; argi < ctx->argc; argi++) { 1490 if (argi) 1491 asz++; /* blank */ 1492 if (quote_args) 1493 asz += 2; /* quotes */ 1494 asz += strlen(ctx->argv[argi]); 1495 } 1496 if (asz != iend - iesc) { 1497 rsz = buf->sz - iend; 1498 if (asz < iend - iesc) 1499 memmove(buf->buf + iesc + asz, 1500 buf->buf + iend, rsz); 1501 buf->sz = iesc + asz + rsz; 1502 buf->buf = mandoc_realloc(buf->buf, buf->sz); 1503 if (asz > iend - iesc) 1504 memmove(buf->buf + iesc + asz, 1505 buf->buf + iend, rsz); 1506 } 1507 dst = buf->buf + iesc; 1508 for (argi = 0; argi < ctx->argc; argi++) { 1509 if (argi) 1510 *dst++ = ' '; 1511 if (quote_args) 1512 *dst++ = '"'; 1513 src = ctx->argv[argi]; 1514 while (*src != '\0') 1515 *dst++ = *src++; 1516 if (quote_args) 1517 *dst++ = '"'; 1518 } 1519 continue; 1520 case 'A': 1521 ubuf[0] = iendarg > iarg ? '1' : '0'; 1522 ubuf[1] = '\0'; 1523 res = ubuf; 1524 break; 1525 case 'B': 1526 npos = 0; 1527 ubuf[0] = iendarg > iarg && iend > iendarg && 1528 roff_evalnum(r, ln, buf->buf + iarg, &npos, 1529 NULL, ROFFNUM_SCALE) && 1530 npos == iendarg - iarg ? '1' : '0'; 1531 ubuf[1] = '\0'; 1532 res = ubuf; 1533 break; 1534 case 'V': 1535 mandoc_msg(MANDOCERR_UNSUPP, ln, iesc, 1536 "%.*s", iend - iesc, buf->buf + iesc); 1537 roff_expand_patch(buf, iendarg, "}", iend); 1538 roff_expand_patch(buf, iesc, "${", iarg); 1539 continue; 1540 case 'g': 1541 break; 1542 case 'n': 1543 if (iendarg > iarg) 1544 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1545 roff_getregn(r, buf->buf + iarg, 1546 iendarg - iarg, buf->buf[inam + 1])); 1547 else 1548 ubuf[0] = '\0'; 1549 res = ubuf; 1550 break; 1551 case 'w': 1552 (void)snprintf(ubuf, sizeof(ubuf), 1553 "%d", (iendarg - iarg) * 24); 1554 res = ubuf; 1555 break; 1556 default: 1557 break; 1558 } 1559 if (res == NULL) 1560 res = ""; 1561 if (++expand_count > EXPAND_LIMIT || 1562 buf->sz + strlen(res) > SHRT_MAX) { 1563 mandoc_msg(MANDOCERR_ROFFLOOP, ln, iesc, NULL); 1564 return ROFF_IGN; 1565 } 1566 roff_expand_patch(buf, iesc, res, iend); 1567 } 1568 return ROFF_CONT; 1569 } 1570 1571 /* 1572 * Replace the substring from the start position (inclusive) 1573 * to end position (exclusive) with the repl(acement) string. 1574 */ 1575 static void 1576 roff_expand_patch(struct buf *buf, int start, const char *repl, int end) 1577 { 1578 char *nbuf; 1579 1580 buf->sz = mandoc_asprintf(&nbuf, "%.*s%s%s", start, buf->buf, 1581 repl, buf->buf + end) + 1; 1582 free(buf->buf); 1583 buf->buf = nbuf; 1584 } 1585 1586 /* 1587 * Parse a quoted or unquoted roff-style request or macro argument. 1588 * Return a pointer to the parsed argument, which is either the original 1589 * pointer or advanced by one byte in case the argument is quoted. 1590 * NUL-terminate the argument in place. 1591 * Collapse pairs of quotes inside quoted arguments. 1592 * Advance the argument pointer to the next argument, 1593 * or to the NUL byte terminating the argument line. 1594 */ 1595 char * 1596 roff_getarg(struct roff *r, char **cpp, int ln, int *pos) 1597 { 1598 struct buf buf; 1599 char *cp, *start; 1600 int newesc, pairs, quoted, white; 1601 1602 /* Quoting can only start with a new word. */ 1603 start = *cpp; 1604 quoted = 0; 1605 if ('"' == *start) { 1606 quoted = 1; 1607 start++; 1608 } 1609 1610 newesc = pairs = white = 0; 1611 for (cp = start; '\0' != *cp; cp++) { 1612 1613 /* 1614 * Move the following text left 1615 * after quoted quotes and after "\\" and "\t". 1616 */ 1617 if (pairs) 1618 cp[-pairs] = cp[0]; 1619 1620 if ('\\' == cp[0]) { 1621 /* 1622 * In copy mode, translate double to single 1623 * backslashes and backslash-t to literal tabs. 1624 */ 1625 switch (cp[1]) { 1626 case 'a': 1627 case 't': 1628 cp[-pairs] = '\t'; 1629 pairs++; 1630 cp++; 1631 break; 1632 case '\\': 1633 /* 1634 * Signal to roff_expand() that an escape 1635 * sequence resulted from copy-in processing 1636 * and needs to be checked or interpolated. 1637 */ 1638 cp[-pairs] = ASCII_ESC; 1639 newesc = 1; 1640 pairs++; 1641 cp++; 1642 break; 1643 case ' ': 1644 /* Skip escaped blanks. */ 1645 if (0 == quoted) 1646 cp++; 1647 break; 1648 default: 1649 break; 1650 } 1651 } else if (0 == quoted) { 1652 if (' ' == cp[0]) { 1653 /* Unescaped blanks end unquoted args. */ 1654 white = 1; 1655 break; 1656 } 1657 } else if ('"' == cp[0]) { 1658 if ('"' == cp[1]) { 1659 /* Quoted quotes collapse. */ 1660 pairs++; 1661 cp++; 1662 } else { 1663 /* Unquoted quotes end quoted args. */ 1664 quoted = 2; 1665 break; 1666 } 1667 } 1668 } 1669 1670 /* Quoted argument without a closing quote. */ 1671 if (1 == quoted) 1672 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL); 1673 1674 /* NUL-terminate this argument and move to the next one. */ 1675 if (pairs) 1676 cp[-pairs] = '\0'; 1677 if ('\0' != *cp) { 1678 *cp++ = '\0'; 1679 while (' ' == *cp) 1680 cp++; 1681 } 1682 *pos += (int)(cp - start) + (quoted ? 1 : 0); 1683 *cpp = cp; 1684 1685 if ('\0' == *cp && (white || ' ' == cp[-1])) 1686 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); 1687 1688 start = mandoc_strdup(start); 1689 if (newesc == 0) 1690 return start; 1691 1692 buf.buf = start; 1693 buf.sz = strlen(start) + 1; 1694 buf.next = NULL; 1695 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { 1696 free(buf.buf); 1697 buf.buf = mandoc_strdup(""); 1698 } 1699 return buf.buf; 1700 } 1701 1702 1703 /* 1704 * Process text streams. 1705 */ 1706 static int 1707 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) 1708 { 1709 size_t sz; 1710 const char *start; 1711 char *p; 1712 int isz; 1713 enum mandoc_esc esc; 1714 1715 /* Spring the input line trap. */ 1716 1717 if (roffit_lines == 1) { 1718 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1719 free(buf->buf); 1720 buf->buf = p; 1721 buf->sz = isz + 1; 1722 *offs = 0; 1723 free(roffit_macro); 1724 roffit_lines = 0; 1725 return ROFF_REPARSE; 1726 } else if (roffit_lines > 1) 1727 --roffit_lines; 1728 1729 if (roffce_node != NULL && buf->buf[pos] != '\0') { 1730 if (roffce_lines < 1) { 1731 r->man->last = roffce_node; 1732 r->man->next = ROFF_NEXT_SIBLING; 1733 roffce_lines = 0; 1734 roffce_node = NULL; 1735 } else 1736 roffce_lines--; 1737 } 1738 1739 /* Convert all breakable hyphens into ASCII_HYPH. */ 1740 1741 start = p = buf->buf + pos; 1742 1743 while (*p != '\0') { 1744 sz = strcspn(p, "-\\"); 1745 p += sz; 1746 1747 if (*p == '\0') 1748 break; 1749 1750 if (*p == '\\') { 1751 /* Skip over escapes. */ 1752 p++; 1753 esc = mandoc_escape((const char **)&p, NULL, NULL); 1754 if (esc == ESCAPE_ERROR) 1755 break; 1756 while (*p == '-') 1757 p++; 1758 continue; 1759 } else if (p == start) { 1760 p++; 1761 continue; 1762 } 1763 1764 if (isalpha((unsigned char)p[-1]) && 1765 isalpha((unsigned char)p[1])) 1766 *p = ASCII_HYPH; 1767 p++; 1768 } 1769 return ROFF_CONT; 1770 } 1771 1772 int 1773 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len) 1774 { 1775 enum roff_tok t; 1776 int e; 1777 int pos; /* parse point */ 1778 int spos; /* saved parse point for messages */ 1779 int ppos; /* original offset in buf->buf */ 1780 int ctl; /* macro line (boolean) */ 1781 1782 ppos = pos = *offs; 1783 1784 if (len > 80 && r->tbl == NULL && r->eqn == NULL && 1785 (r->man->flags & ROFF_NOFILL) == 0 && 1786 strchr(" .\\", buf->buf[pos]) == NULL && 1787 buf->buf[pos] != r->control && 1788 strcspn(buf->buf, " ") < 80) 1789 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1, 1790 "%.20s...", buf->buf + pos); 1791 1792 /* Handle in-line equation delimiters. */ 1793 1794 if (r->tbl == NULL && 1795 r->last_eqn != NULL && r->last_eqn->delim && 1796 (r->eqn == NULL || r->eqn_inline)) { 1797 e = roff_eqndelim(r, buf, pos); 1798 if (e == ROFF_REPARSE) 1799 return e; 1800 assert(e == ROFF_CONT); 1801 } 1802 1803 /* Handle comments and escape sequences. */ 1804 1805 e = roff_parse_comment(r, buf, ln, pos, r->escape); 1806 if ((e & ROFF_MASK) == ROFF_IGN) 1807 return e; 1808 assert(e == ROFF_CONT); 1809 1810 e = roff_expand(r, buf, ln, pos, r->escape); 1811 if ((e & ROFF_MASK) == ROFF_IGN) 1812 return e; 1813 assert(e == ROFF_CONT); 1814 1815 ctl = roff_getcontrol(r, buf->buf, &pos); 1816 1817 /* 1818 * First, if a scope is open and we're not a macro, pass the 1819 * text through the macro's filter. 1820 * Equations process all content themselves. 1821 * Tables process almost all content themselves, but we want 1822 * to warn about macros before passing it there. 1823 */ 1824 1825 if (r->last != NULL && ! ctl) { 1826 t = r->last->tok; 1827 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1828 if ((e & ROFF_MASK) == ROFF_IGN) 1829 return e; 1830 e &= ~ROFF_MASK; 1831 } else 1832 e = ROFF_IGN; 1833 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { 1834 eqn_read(r->eqn, buf->buf + ppos); 1835 return e; 1836 } 1837 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { 1838 tbl_read(r->tbl, ln, buf->buf, ppos); 1839 roff_addtbl(r->man, ln, r->tbl); 1840 return e; 1841 } 1842 if ( ! ctl) { 1843 r->options &= ~MPARSE_COMMENT; 1844 return roff_parsetext(r, buf, pos, offs) | e; 1845 } 1846 1847 /* Skip empty request lines. */ 1848 1849 if (buf->buf[pos] == '"') { 1850 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL); 1851 return ROFF_IGN; 1852 } else if (buf->buf[pos] == '\0') 1853 return ROFF_IGN; 1854 1855 /* 1856 * If a scope is open, go to the child handler for that macro, 1857 * as it may want to preprocess before doing anything with it. 1858 */ 1859 1860 if (r->last) { 1861 t = r->last->tok; 1862 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1863 } 1864 1865 r->options &= ~MPARSE_COMMENT; 1866 spos = pos; 1867 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1868 return roff_req_or_macro(r, t, buf, ln, spos, pos, offs); 1869 } 1870 1871 /* 1872 * Handle a new request or macro. 1873 * May be called outside any scope or from inside a conditional scope. 1874 */ 1875 static int 1876 roff_req_or_macro(ROFF_ARGS) { 1877 1878 /* For now, tables ignore most macros and some request. */ 1879 1880 if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS || 1881 tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj || 1882 tok == ROFF_sp)) { 1883 mandoc_msg(MANDOCERR_TBLMACRO, 1884 ln, ppos, "%s", buf->buf + ppos); 1885 if (tok != TOKEN_NONE) 1886 return ROFF_IGN; 1887 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1888 pos++; 1889 while (buf->buf[pos] == ' ') 1890 pos++; 1891 tbl_read(r->tbl, ln, buf->buf, pos); 1892 roff_addtbl(r->man, ln, r->tbl); 1893 return ROFF_IGN; 1894 } 1895 1896 /* For now, let high level macros abort .ce mode. */ 1897 1898 if (roffce_node != NULL && 1899 (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ || 1900 tok == ROFF_TH || tok == ROFF_TS)) { 1901 r->man->last = roffce_node; 1902 r->man->next = ROFF_NEXT_SIBLING; 1903 roffce_lines = 0; 1904 roffce_node = NULL; 1905 } 1906 1907 /* 1908 * This is neither a roff request nor a user-defined macro. 1909 * Let the standard macro set parsers handle it. 1910 */ 1911 1912 if (tok == TOKEN_NONE) 1913 return ROFF_CONT; 1914 1915 /* Execute a roff request or a user-defined macro. */ 1916 1917 return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs); 1918 } 1919 1920 /* 1921 * Internal interface function to tell the roff parser that execution 1922 * of the current macro ended. This is required because macro 1923 * definitions usually do not end with a .return request. 1924 */ 1925 void 1926 roff_userret(struct roff *r) 1927 { 1928 struct mctx *ctx; 1929 int i; 1930 1931 assert(r->mstackpos >= 0); 1932 ctx = r->mstack + r->mstackpos; 1933 for (i = 0; i < ctx->argc; i++) 1934 free(ctx->argv[i]); 1935 ctx->argc = 0; 1936 r->mstackpos--; 1937 } 1938 1939 void 1940 roff_endparse(struct roff *r) 1941 { 1942 if (r->last != NULL) 1943 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line, 1944 r->last->col, "%s", roff_name[r->last->tok]); 1945 1946 if (r->eqn != NULL) { 1947 mandoc_msg(MANDOCERR_BLK_NOEND, 1948 r->eqn->node->line, r->eqn->node->pos, "EQ"); 1949 eqn_parse(r->eqn); 1950 r->eqn = NULL; 1951 } 1952 1953 if (r->tbl != NULL) { 1954 tbl_end(r->tbl, 1); 1955 r->tbl = NULL; 1956 } 1957 } 1958 1959 /* 1960 * Parse the request or macro name at buf[*pos]. 1961 * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value. 1962 * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE. 1963 * As a side effect, set r->current_string to the definition or to NULL. 1964 */ 1965 static enum roff_tok 1966 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 1967 { 1968 char *cp; 1969 const char *mac; 1970 size_t maclen; 1971 int deftype; 1972 enum roff_tok t; 1973 1974 cp = buf + *pos; 1975 1976 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 1977 return TOKEN_NONE; 1978 1979 mac = cp; 1980 maclen = roff_getname(r, &cp, ln, ppos); 1981 1982 deftype = ROFFDEF_USER | ROFFDEF_REN; 1983 r->current_string = roff_getstrn(r, mac, maclen, &deftype); 1984 switch (deftype) { 1985 case ROFFDEF_USER: 1986 t = ROFF_USERDEF; 1987 break; 1988 case ROFFDEF_REN: 1989 t = ROFF_RENAMED; 1990 break; 1991 default: 1992 t = roffhash_find(r->reqtab, mac, maclen); 1993 break; 1994 } 1995 if (t != TOKEN_NONE) 1996 *pos = cp - buf; 1997 else if (deftype == ROFFDEF_UNDEF) { 1998 /* Using an undefined macro defines it to be empty. */ 1999 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); 2000 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); 2001 } 2002 return t; 2003 } 2004 2005 /* --- handling of request blocks ----------------------------------------- */ 2006 2007 /* 2008 * Close a macro definition block or an "ignore" block. 2009 */ 2010 static int 2011 roff_cblock(ROFF_ARGS) 2012 { 2013 int rr; 2014 2015 if (r->last == NULL) { 2016 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2017 return ROFF_IGN; 2018 } 2019 2020 switch (r->last->tok) { 2021 case ROFF_am: 2022 case ROFF_ami: 2023 case ROFF_de: 2024 case ROFF_dei: 2025 case ROFF_ig: 2026 break; 2027 case ROFF_am1: 2028 case ROFF_de1: 2029 /* Remapped in roff_block(). */ 2030 abort(); 2031 default: 2032 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2033 return ROFF_IGN; 2034 } 2035 2036 roffnode_pop(r); 2037 roffnode_cleanscope(r); 2038 2039 /* 2040 * If a conditional block with braces is still open, 2041 * check for "\}" block end markers. 2042 */ 2043 2044 if (r->last != NULL && r->last->endspan < 0) { 2045 rr = 1; /* If arguments follow "\}", warn about them. */ 2046 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2047 } 2048 2049 if (buf->buf[pos] != '\0') 2050 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 2051 ".. %s", buf->buf + pos); 2052 2053 return ROFF_IGN; 2054 } 2055 2056 /* 2057 * Pop all nodes ending at the end of the current input line. 2058 * Return the number of loops ended. 2059 */ 2060 static int 2061 roffnode_cleanscope(struct roff *r) 2062 { 2063 int inloop; 2064 2065 inloop = 0; 2066 while (r->last != NULL && r->last->endspan > 0) { 2067 if (--r->last->endspan != 0) 2068 break; 2069 inloop += roffnode_pop(r); 2070 } 2071 return inloop; 2072 } 2073 2074 /* 2075 * Handle the closing "\}" of a conditional block. 2076 * Apart from generating warnings, this only pops nodes. 2077 * Return the number of loops ended. 2078 */ 2079 static int 2080 roff_ccond(struct roff *r, int ln, int ppos) 2081 { 2082 if (NULL == r->last) { 2083 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2084 return 0; 2085 } 2086 2087 switch (r->last->tok) { 2088 case ROFF_el: 2089 case ROFF_ie: 2090 case ROFF_if: 2091 case ROFF_while: 2092 break; 2093 default: 2094 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2095 return 0; 2096 } 2097 2098 if (r->last->endspan > -1) { 2099 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2100 return 0; 2101 } 2102 2103 return roffnode_pop(r) + roffnode_cleanscope(r); 2104 } 2105 2106 static int 2107 roff_block(ROFF_ARGS) 2108 { 2109 const char *name, *value; 2110 char *call, *cp, *iname, *rname; 2111 size_t csz, namesz, rsz; 2112 int deftype; 2113 2114 /* Ignore groff compatibility mode for now. */ 2115 2116 if (tok == ROFF_de1) 2117 tok = ROFF_de; 2118 else if (tok == ROFF_dei1) 2119 tok = ROFF_dei; 2120 else if (tok == ROFF_am1) 2121 tok = ROFF_am; 2122 else if (tok == ROFF_ami1) 2123 tok = ROFF_ami; 2124 2125 /* Parse the macro name argument. */ 2126 2127 cp = buf->buf + pos; 2128 if (tok == ROFF_ig) { 2129 iname = NULL; 2130 namesz = 0; 2131 } else { 2132 iname = cp; 2133 namesz = roff_getname(r, &cp, ln, ppos); 2134 iname[namesz] = '\0'; 2135 } 2136 2137 /* Resolve the macro name argument if it is indirect. */ 2138 2139 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2140 deftype = ROFFDEF_USER; 2141 name = roff_getstrn(r, iname, namesz, &deftype); 2142 if (name == NULL) { 2143 mandoc_msg(MANDOCERR_STR_UNDEF, 2144 ln, (int)(iname - buf->buf), 2145 "%.*s", (int)namesz, iname); 2146 namesz = 0; 2147 } else 2148 namesz = strlen(name); 2149 } else 2150 name = iname; 2151 2152 if (namesz == 0 && tok != ROFF_ig) { 2153 mandoc_msg(MANDOCERR_REQ_EMPTY, 2154 ln, ppos, "%s", roff_name[tok]); 2155 return ROFF_IGN; 2156 } 2157 2158 roffnode_push(r, tok, name, ln, ppos); 2159 2160 /* 2161 * At the beginning of a `de' macro, clear the existing string 2162 * with the same name, if there is one. New content will be 2163 * appended from roff_block_text() in multiline mode. 2164 */ 2165 2166 if (tok == ROFF_de || tok == ROFF_dei) { 2167 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 2168 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2169 } else if (tok == ROFF_am || tok == ROFF_ami) { 2170 deftype = ROFFDEF_ANY; 2171 value = roff_getstrn(r, iname, namesz, &deftype); 2172 switch (deftype) { /* Before appending, ... */ 2173 case ROFFDEF_PRE: /* copy predefined to user-defined. */ 2174 roff_setstrn(&r->strtab, name, namesz, 2175 value, strlen(value), 0); 2176 break; 2177 case ROFFDEF_REN: /* call original standard macro. */ 2178 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2179 (int)strlen(value), value); 2180 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2181 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2182 free(call); 2183 break; 2184 case ROFFDEF_STD: /* rename and call standard macro. */ 2185 rsz = mandoc_asprintf(&rname, "__%s_renamed", name); 2186 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); 2187 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2188 (int)rsz, rname); 2189 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2190 free(call); 2191 free(rname); 2192 break; 2193 default: 2194 break; 2195 } 2196 } 2197 2198 if (*cp == '\0') 2199 return ROFF_IGN; 2200 2201 /* Get the custom end marker. */ 2202 2203 iname = cp; 2204 namesz = roff_getname(r, &cp, ln, ppos); 2205 2206 /* Resolve the end marker if it is indirect. */ 2207 2208 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2209 deftype = ROFFDEF_USER; 2210 name = roff_getstrn(r, iname, namesz, &deftype); 2211 if (name == NULL) { 2212 mandoc_msg(MANDOCERR_STR_UNDEF, 2213 ln, (int)(iname - buf->buf), 2214 "%.*s", (int)namesz, iname); 2215 namesz = 0; 2216 } else 2217 namesz = strlen(name); 2218 } else 2219 name = iname; 2220 2221 if (namesz) 2222 r->last->end = mandoc_strndup(name, namesz); 2223 2224 if (*cp != '\0') 2225 mandoc_msg(MANDOCERR_ARG_EXCESS, 2226 ln, pos, ".%s ... %s", roff_name[tok], cp); 2227 2228 return ROFF_IGN; 2229 } 2230 2231 static int 2232 roff_block_sub(ROFF_ARGS) 2233 { 2234 enum roff_tok t; 2235 int i, j; 2236 2237 /* 2238 * If a custom end marker is a user-defined or predefined macro 2239 * or a request, interpret it. 2240 */ 2241 2242 if (r->last->end) { 2243 for (i = pos, j = 0; r->last->end[j]; j++, i++) 2244 if (buf->buf[i] != r->last->end[j]) 2245 break; 2246 2247 if (r->last->end[j] == '\0' && 2248 (buf->buf[i] == '\0' || 2249 buf->buf[i] == ' ' || 2250 buf->buf[i] == '\t')) { 2251 roffnode_pop(r); 2252 roffnode_cleanscope(r); 2253 2254 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 2255 i++; 2256 2257 pos = i; 2258 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 2259 TOKEN_NONE) 2260 return ROFF_RERUN; 2261 return ROFF_IGN; 2262 } 2263 } 2264 2265 /* Handle the standard end marker. */ 2266 2267 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2268 if (t == ROFF_cblock) 2269 return roff_cblock(r, t, buf, ln, ppos, pos, offs); 2270 2271 /* Not an end marker, so append the line to the block. */ 2272 2273 if (tok != ROFF_ig) 2274 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 2275 return ROFF_IGN; 2276 } 2277 2278 static int 2279 roff_block_text(ROFF_ARGS) 2280 { 2281 2282 if (tok != ROFF_ig) 2283 roff_setstr(r, r->last->name, buf->buf + pos, 2); 2284 2285 return ROFF_IGN; 2286 } 2287 2288 /* 2289 * Check for a closing "\}" and handle it. 2290 * In this function, the final "int *offs" argument is used for 2291 * different purposes than elsewhere: 2292 * Input: *offs == 0: caller wants to discard arguments following \} 2293 * *offs == 1: caller wants to preserve text following \} 2294 * Output: *offs = 0: tell caller to discard input line 2295 * *offs = 1: tell caller to use input line 2296 */ 2297 static int 2298 roff_cond_checkend(ROFF_ARGS) 2299 { 2300 char *ep; 2301 int endloop, irc, rr; 2302 2303 irc = ROFF_IGN; 2304 rr = r->last->rule; 2305 endloop = tok != ROFF_while ? ROFF_IGN : 2306 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2307 if (roffnode_cleanscope(r)) 2308 irc |= endloop; 2309 2310 /* 2311 * If "\}" occurs on a macro line without a preceding macro or 2312 * a text line contains nothing else, drop the line completely. 2313 */ 2314 2315 ep = buf->buf + pos; 2316 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0)) 2317 rr = 0; 2318 2319 /* 2320 * The closing delimiter "\}" rewinds the conditional scope 2321 * but is otherwise ignored when interpreting the line. 2322 */ 2323 2324 while ((ep = strchr(ep, '\\')) != NULL) { 2325 switch (ep[1]) { 2326 case '}': 2327 if (ep[2] == '\0') 2328 ep[0] = '\0'; 2329 else if (rr) 2330 ep[1] = '&'; 2331 else 2332 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2333 if (roff_ccond(r, ln, ep - buf->buf)) 2334 irc |= endloop; 2335 break; 2336 case '\0': 2337 ++ep; 2338 break; 2339 default: 2340 ep += 2; 2341 break; 2342 } 2343 } 2344 *offs = rr; 2345 return irc; 2346 } 2347 2348 /* 2349 * Parse and process a request or macro line in conditional scope. 2350 */ 2351 static int 2352 roff_cond_sub(ROFF_ARGS) 2353 { 2354 struct roffnode *bl; 2355 int irc, rr, spos; 2356 enum roff_tok t; 2357 2358 rr = 0; /* If arguments follow "\}", skip them. */ 2359 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2360 spos = pos; 2361 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2362 2363 /* 2364 * Handle requests and macros if the conditional evaluated 2365 * to true or if they are structurally required. 2366 * The .break request is always handled specially. 2367 */ 2368 2369 if (t == ROFF_break) { 2370 if (irc & ROFF_LOOPMASK) 2371 irc = ROFF_IGN | ROFF_LOOPEXIT; 2372 else if (rr) { 2373 for (bl = r->last; bl != NULL; bl = bl->parent) { 2374 bl->rule = 0; 2375 if (bl->tok == ROFF_while) 2376 break; 2377 } 2378 } 2379 } else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) { 2380 irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs); 2381 if (irc & ROFF_WHILE) 2382 irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT); 2383 } 2384 return irc; 2385 } 2386 2387 /* 2388 * Parse and process a text line in conditional scope. 2389 */ 2390 static int 2391 roff_cond_text(ROFF_ARGS) 2392 { 2393 int irc, rr; 2394 2395 rr = 1; /* If arguments follow "\}", preserve them. */ 2396 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2397 if (rr) 2398 irc |= ROFF_CONT; 2399 return irc; 2400 } 2401 2402 /* --- handling of numeric and conditional expressions -------------------- */ 2403 2404 /* 2405 * Parse a single signed integer number. Stop at the first non-digit. 2406 * If there is at least one digit, return success and advance the 2407 * parse point, else return failure and let the parse point unchanged. 2408 * Ignore overflows, treat them just like the C language. 2409 */ 2410 static int 2411 roff_getnum(const char *v, int *pos, int *res, int flags) 2412 { 2413 int myres, scaled, n, p; 2414 2415 if (NULL == res) 2416 res = &myres; 2417 2418 p = *pos; 2419 n = v[p] == '-'; 2420 if (n || v[p] == '+') 2421 p++; 2422 2423 if (flags & ROFFNUM_WHITE) 2424 while (isspace((unsigned char)v[p])) 2425 p++; 2426 2427 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2428 *res = 10 * *res + v[p] - '0'; 2429 if (p == *pos + n) 2430 return 0; 2431 2432 if (n) 2433 *res = -*res; 2434 2435 /* Each number may be followed by one optional scaling unit. */ 2436 2437 switch (v[p]) { 2438 case 'f': 2439 scaled = *res * 65536; 2440 break; 2441 case 'i': 2442 scaled = *res * 240; 2443 break; 2444 case 'c': 2445 scaled = *res * 240 / 2.54; 2446 break; 2447 case 'v': 2448 case 'P': 2449 scaled = *res * 40; 2450 break; 2451 case 'm': 2452 case 'n': 2453 scaled = *res * 24; 2454 break; 2455 case 'p': 2456 scaled = *res * 10 / 3; 2457 break; 2458 case 'u': 2459 scaled = *res; 2460 break; 2461 case 'M': 2462 scaled = *res * 6 / 25; 2463 break; 2464 default: 2465 scaled = *res; 2466 p--; 2467 break; 2468 } 2469 if (flags & ROFFNUM_SCALE) 2470 *res = scaled; 2471 2472 *pos = p + 1; 2473 return 1; 2474 } 2475 2476 /* 2477 * Evaluate a string comparison condition. 2478 * The first character is the delimiter. 2479 * Succeed if the string up to its second occurrence 2480 * matches the string up to its third occurrence. 2481 * Advance the cursor after the third occurrence 2482 * or lacking that, to the end of the line. 2483 */ 2484 static int 2485 roff_evalstrcond(const char *v, int *pos) 2486 { 2487 const char *s1, *s2, *s3; 2488 int match; 2489 2490 match = 0; 2491 s1 = v + *pos; /* initial delimiter */ 2492 s2 = s1 + 1; /* for scanning the first string */ 2493 s3 = strchr(s2, *s1); /* for scanning the second string */ 2494 2495 if (NULL == s3) /* found no middle delimiter */ 2496 goto out; 2497 2498 while ('\0' != *++s3) { 2499 if (*s2 != *s3) { /* mismatch */ 2500 s3 = strchr(s3, *s1); 2501 break; 2502 } 2503 if (*s3 == *s1) { /* found the final delimiter */ 2504 match = 1; 2505 break; 2506 } 2507 s2++; 2508 } 2509 2510 out: 2511 if (NULL == s3) 2512 s3 = strchr(s2, '\0'); 2513 else if (*s3 != '\0') 2514 s3++; 2515 *pos = s3 - v; 2516 return match; 2517 } 2518 2519 /* 2520 * Evaluate an optionally negated single character, numerical, 2521 * or string condition. 2522 */ 2523 static int 2524 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2525 { 2526 const char *start, *end; 2527 char *cp, *name; 2528 size_t sz; 2529 int deftype, len, number, savepos, istrue, wanttrue; 2530 2531 if ('!' == v[*pos]) { 2532 wanttrue = 0; 2533 (*pos)++; 2534 } else 2535 wanttrue = 1; 2536 2537 switch (v[*pos]) { 2538 case '\0': 2539 return 0; 2540 case 'n': 2541 case 'o': 2542 (*pos)++; 2543 return wanttrue; 2544 case 'e': 2545 case 't': 2546 case 'v': 2547 (*pos)++; 2548 return !wanttrue; 2549 case 'c': 2550 do { 2551 (*pos)++; 2552 } while (v[*pos] == ' '); 2553 2554 /* 2555 * Quirk for groff compatibility: 2556 * The horizontal tab is neither available nor unavailable. 2557 */ 2558 2559 if (v[*pos] == '\t') { 2560 (*pos)++; 2561 return 0; 2562 } 2563 2564 /* Printable ASCII characters are available. */ 2565 2566 if (v[*pos] != '\\') { 2567 (*pos)++; 2568 return wanttrue; 2569 } 2570 2571 end = v + ++*pos; 2572 switch (mandoc_escape(&end, &start, &len)) { 2573 case ESCAPE_SPECIAL: 2574 istrue = mchars_spec2cp(start, len) != -1; 2575 break; 2576 case ESCAPE_UNICODE: 2577 istrue = 1; 2578 break; 2579 case ESCAPE_NUMBERED: 2580 istrue = mchars_num2char(start, len) != -1; 2581 break; 2582 default: 2583 istrue = !wanttrue; 2584 break; 2585 } 2586 *pos = end - v; 2587 return istrue == wanttrue; 2588 case 'd': 2589 case 'r': 2590 cp = v + *pos + 1; 2591 while (*cp == ' ') 2592 cp++; 2593 name = cp; 2594 sz = roff_getname(r, &cp, ln, cp - v); 2595 if (sz == 0) 2596 istrue = 0; 2597 else if (v[*pos] == 'r') 2598 istrue = roff_hasregn(r, name, sz); 2599 else { 2600 deftype = ROFFDEF_ANY; 2601 roff_getstrn(r, name, sz, &deftype); 2602 istrue = !!deftype; 2603 } 2604 *pos = (name + sz) - v; 2605 return istrue == wanttrue; 2606 default: 2607 break; 2608 } 2609 2610 savepos = *pos; 2611 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2612 return (number > 0) == wanttrue; 2613 else if (*pos == savepos) 2614 return roff_evalstrcond(v, pos) == wanttrue; 2615 else 2616 return 0; 2617 } 2618 2619 static int 2620 roff_line_ignore(ROFF_ARGS) 2621 { 2622 2623 return ROFF_IGN; 2624 } 2625 2626 static int 2627 roff_insec(ROFF_ARGS) 2628 { 2629 2630 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]); 2631 return ROFF_IGN; 2632 } 2633 2634 static int 2635 roff_unsupp(ROFF_ARGS) 2636 { 2637 2638 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]); 2639 return ROFF_IGN; 2640 } 2641 2642 static int 2643 roff_cond(ROFF_ARGS) 2644 { 2645 int irc; 2646 2647 roffnode_push(r, tok, NULL, ln, ppos); 2648 2649 /* 2650 * An `.el' has no conditional body: it will consume the value 2651 * of the current rstack entry set in prior `ie' calls or 2652 * defaults to DENY. 2653 * 2654 * If we're not an `el', however, then evaluate the conditional. 2655 */ 2656 2657 r->last->rule = tok == ROFF_el ? 2658 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2659 roff_evalcond(r, ln, buf->buf, &pos); 2660 2661 /* 2662 * An if-else will put the NEGATION of the current evaluated 2663 * conditional into the stack of rules. 2664 */ 2665 2666 if (tok == ROFF_ie) { 2667 if (r->rstackpos + 1 == r->rstacksz) { 2668 r->rstacksz += 16; 2669 r->rstack = mandoc_reallocarray(r->rstack, 2670 r->rstacksz, sizeof(int)); 2671 } 2672 r->rstack[++r->rstackpos] = !r->last->rule; 2673 } 2674 2675 /* If the parent has false as its rule, then so do we. */ 2676 2677 if (r->last->parent && !r->last->parent->rule) 2678 r->last->rule = 0; 2679 2680 /* 2681 * Determine scope. 2682 * If there is nothing on the line after the conditional, 2683 * not even whitespace, use next-line scope. 2684 * Except that .while does not support next-line scope. 2685 */ 2686 2687 if (buf->buf[pos] == '\0' && tok != ROFF_while) { 2688 r->last->endspan = 2; 2689 goto out; 2690 } 2691 2692 while (buf->buf[pos] == ' ') 2693 pos++; 2694 2695 /* An opening brace requests multiline scope. */ 2696 2697 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2698 r->last->endspan = -1; 2699 pos += 2; 2700 while (buf->buf[pos] == ' ') 2701 pos++; 2702 goto out; 2703 } 2704 2705 /* 2706 * Anything else following the conditional causes 2707 * single-line scope. Warn if the scope contains 2708 * nothing but trailing whitespace. 2709 */ 2710 2711 if (buf->buf[pos] == '\0') 2712 mandoc_msg(MANDOCERR_COND_EMPTY, 2713 ln, ppos, "%s", roff_name[tok]); 2714 2715 r->last->endspan = 1; 2716 2717 out: 2718 *offs = pos; 2719 irc = ROFF_RERUN; 2720 if (tok == ROFF_while) 2721 irc |= ROFF_WHILE; 2722 return irc; 2723 } 2724 2725 static int 2726 roff_ds(ROFF_ARGS) 2727 { 2728 char *string; 2729 const char *name; 2730 size_t namesz; 2731 2732 /* Ignore groff compatibility mode for now. */ 2733 2734 if (tok == ROFF_ds1) 2735 tok = ROFF_ds; 2736 else if (tok == ROFF_as1) 2737 tok = ROFF_as; 2738 2739 /* 2740 * The first word is the name of the string. 2741 * If it is empty or terminated by an escape sequence, 2742 * abort the `ds' request without defining anything. 2743 */ 2744 2745 name = string = buf->buf + pos; 2746 if (*name == '\0') 2747 return ROFF_IGN; 2748 2749 namesz = roff_getname(r, &string, ln, pos); 2750 switch (name[namesz]) { 2751 case '\\': 2752 return ROFF_IGN; 2753 case '\t': 2754 string = buf->buf + pos + namesz; 2755 break; 2756 default: 2757 break; 2758 } 2759 2760 /* Read past the initial double-quote, if any. */ 2761 if (*string == '"') 2762 string++; 2763 2764 /* The rest is the value. */ 2765 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2766 ROFF_as == tok); 2767 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2768 return ROFF_IGN; 2769 } 2770 2771 /* 2772 * Parse a single operator, one or two characters long. 2773 * If the operator is recognized, return success and advance the 2774 * parse point, else return failure and let the parse point unchanged. 2775 */ 2776 static int 2777 roff_getop(const char *v, int *pos, char *res) 2778 { 2779 2780 *res = v[*pos]; 2781 2782 switch (*res) { 2783 case '+': 2784 case '-': 2785 case '*': 2786 case '/': 2787 case '%': 2788 case '&': 2789 case ':': 2790 break; 2791 case '<': 2792 switch (v[*pos + 1]) { 2793 case '=': 2794 *res = 'l'; 2795 (*pos)++; 2796 break; 2797 case '>': 2798 *res = '!'; 2799 (*pos)++; 2800 break; 2801 case '?': 2802 *res = 'i'; 2803 (*pos)++; 2804 break; 2805 default: 2806 break; 2807 } 2808 break; 2809 case '>': 2810 switch (v[*pos + 1]) { 2811 case '=': 2812 *res = 'g'; 2813 (*pos)++; 2814 break; 2815 case '?': 2816 *res = 'a'; 2817 (*pos)++; 2818 break; 2819 default: 2820 break; 2821 } 2822 break; 2823 case '=': 2824 if ('=' == v[*pos + 1]) 2825 (*pos)++; 2826 break; 2827 default: 2828 return 0; 2829 } 2830 (*pos)++; 2831 2832 return *res; 2833 } 2834 2835 /* 2836 * Evaluate either a parenthesized numeric expression 2837 * or a single signed integer number. 2838 */ 2839 static int 2840 roff_evalpar(struct roff *r, int ln, 2841 const char *v, int *pos, int *res, int flags) 2842 { 2843 2844 if ('(' != v[*pos]) 2845 return roff_getnum(v, pos, res, flags); 2846 2847 (*pos)++; 2848 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2849 return 0; 2850 2851 /* 2852 * Omission of the closing parenthesis 2853 * is an error in validation mode, 2854 * but ignored in evaluation mode. 2855 */ 2856 2857 if (')' == v[*pos]) 2858 (*pos)++; 2859 else if (NULL == res) 2860 return 0; 2861 2862 return 1; 2863 } 2864 2865 /* 2866 * Evaluate a complete numeric expression. 2867 * Proceed left to right, there is no concept of precedence. 2868 */ 2869 static int 2870 roff_evalnum(struct roff *r, int ln, const char *v, 2871 int *pos, int *res, int flags) 2872 { 2873 int mypos, operand2; 2874 char operator; 2875 2876 if (NULL == pos) { 2877 mypos = 0; 2878 pos = &mypos; 2879 } 2880 2881 if (flags & ROFFNUM_WHITE) 2882 while (isspace((unsigned char)v[*pos])) 2883 (*pos)++; 2884 2885 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2886 return 0; 2887 2888 while (1) { 2889 if (flags & ROFFNUM_WHITE) 2890 while (isspace((unsigned char)v[*pos])) 2891 (*pos)++; 2892 2893 if ( ! roff_getop(v, pos, &operator)) 2894 break; 2895 2896 if (flags & ROFFNUM_WHITE) 2897 while (isspace((unsigned char)v[*pos])) 2898 (*pos)++; 2899 2900 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2901 return 0; 2902 2903 if (flags & ROFFNUM_WHITE) 2904 while (isspace((unsigned char)v[*pos])) 2905 (*pos)++; 2906 2907 if (NULL == res) 2908 continue; 2909 2910 switch (operator) { 2911 case '+': 2912 *res += operand2; 2913 break; 2914 case '-': 2915 *res -= operand2; 2916 break; 2917 case '*': 2918 *res *= operand2; 2919 break; 2920 case '/': 2921 if (operand2 == 0) { 2922 mandoc_msg(MANDOCERR_DIVZERO, 2923 ln, *pos, "%s", v); 2924 *res = 0; 2925 break; 2926 } 2927 *res /= operand2; 2928 break; 2929 case '%': 2930 if (operand2 == 0) { 2931 mandoc_msg(MANDOCERR_DIVZERO, 2932 ln, *pos, "%s", v); 2933 *res = 0; 2934 break; 2935 } 2936 *res %= operand2; 2937 break; 2938 case '<': 2939 *res = *res < operand2; 2940 break; 2941 case '>': 2942 *res = *res > operand2; 2943 break; 2944 case 'l': 2945 *res = *res <= operand2; 2946 break; 2947 case 'g': 2948 *res = *res >= operand2; 2949 break; 2950 case '=': 2951 *res = *res == operand2; 2952 break; 2953 case '!': 2954 *res = *res != operand2; 2955 break; 2956 case '&': 2957 *res = *res && operand2; 2958 break; 2959 case ':': 2960 *res = *res || operand2; 2961 break; 2962 case 'i': 2963 if (operand2 < *res) 2964 *res = operand2; 2965 break; 2966 case 'a': 2967 if (operand2 > *res) 2968 *res = operand2; 2969 break; 2970 default: 2971 abort(); 2972 } 2973 } 2974 return 1; 2975 } 2976 2977 /* --- register management ------------------------------------------------ */ 2978 2979 void 2980 roff_setreg(struct roff *r, const char *name, int val, char sign) 2981 { 2982 roff_setregn(r, name, strlen(name), val, sign, INT_MIN); 2983 } 2984 2985 static void 2986 roff_setregn(struct roff *r, const char *name, size_t len, 2987 int val, char sign, int step) 2988 { 2989 struct roffreg *reg; 2990 2991 /* Search for an existing register with the same name. */ 2992 reg = r->regtab; 2993 2994 while (reg != NULL && (reg->key.sz != len || 2995 strncmp(reg->key.p, name, len) != 0)) 2996 reg = reg->next; 2997 2998 if (NULL == reg) { 2999 /* Create a new register. */ 3000 reg = mandoc_malloc(sizeof(struct roffreg)); 3001 reg->key.p = mandoc_strndup(name, len); 3002 reg->key.sz = len; 3003 reg->val = 0; 3004 reg->step = 0; 3005 reg->next = r->regtab; 3006 r->regtab = reg; 3007 } 3008 3009 if ('+' == sign) 3010 reg->val += val; 3011 else if ('-' == sign) 3012 reg->val -= val; 3013 else 3014 reg->val = val; 3015 if (step != INT_MIN) 3016 reg->step = step; 3017 } 3018 3019 /* 3020 * Handle some predefined read-only number registers. 3021 * For now, return -1 if the requested register is not predefined; 3022 * in case a predefined read-only register having the value -1 3023 * were to turn up, another special value would have to be chosen. 3024 */ 3025 static int 3026 roff_getregro(const struct roff *r, const char *name) 3027 { 3028 3029 switch (*name) { 3030 case '$': /* Number of arguments of the last macro evaluated. */ 3031 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; 3032 case 'A': /* ASCII approximation mode is always off. */ 3033 return 0; 3034 case 'g': /* Groff compatibility mode is always on. */ 3035 return 1; 3036 case 'H': /* Fixed horizontal resolution. */ 3037 return 24; 3038 case 'j': /* Always adjust left margin only. */ 3039 return 0; 3040 case 'T': /* Some output device is always defined. */ 3041 return 1; 3042 case 'V': /* Fixed vertical resolution. */ 3043 return 40; 3044 default: 3045 return -1; 3046 } 3047 } 3048 3049 int 3050 roff_getreg(struct roff *r, const char *name) 3051 { 3052 return roff_getregn(r, name, strlen(name), '\0'); 3053 } 3054 3055 static int 3056 roff_getregn(struct roff *r, const char *name, size_t len, char sign) 3057 { 3058 struct roffreg *reg; 3059 int val; 3060 3061 if ('.' == name[0] && 2 == len) { 3062 val = roff_getregro(r, name + 1); 3063 if (-1 != val) 3064 return val; 3065 } 3066 3067 for (reg = r->regtab; reg; reg = reg->next) { 3068 if (len == reg->key.sz && 3069 0 == strncmp(name, reg->key.p, len)) { 3070 switch (sign) { 3071 case '+': 3072 reg->val += reg->step; 3073 break; 3074 case '-': 3075 reg->val -= reg->step; 3076 break; 3077 default: 3078 break; 3079 } 3080 return reg->val; 3081 } 3082 } 3083 3084 roff_setregn(r, name, len, 0, '\0', INT_MIN); 3085 return 0; 3086 } 3087 3088 static int 3089 roff_hasregn(const struct roff *r, const char *name, size_t len) 3090 { 3091 struct roffreg *reg; 3092 int val; 3093 3094 if ('.' == name[0] && 2 == len) { 3095 val = roff_getregro(r, name + 1); 3096 if (-1 != val) 3097 return 1; 3098 } 3099 3100 for (reg = r->regtab; reg; reg = reg->next) 3101 if (len == reg->key.sz && 3102 0 == strncmp(name, reg->key.p, len)) 3103 return 1; 3104 3105 return 0; 3106 } 3107 3108 static void 3109 roff_freereg(struct roffreg *reg) 3110 { 3111 struct roffreg *old_reg; 3112 3113 while (NULL != reg) { 3114 free(reg->key.p); 3115 old_reg = reg; 3116 reg = reg->next; 3117 free(old_reg); 3118 } 3119 } 3120 3121 static int 3122 roff_nr(ROFF_ARGS) 3123 { 3124 char *key, *val, *step; 3125 size_t keysz; 3126 int iv, is, len; 3127 char sign; 3128 3129 key = val = buf->buf + pos; 3130 if (*key == '\0') 3131 return ROFF_IGN; 3132 3133 keysz = roff_getname(r, &val, ln, pos); 3134 if (key[keysz] == '\\' || key[keysz] == '\t') 3135 return ROFF_IGN; 3136 3137 sign = *val; 3138 if (sign == '+' || sign == '-') 3139 val++; 3140 3141 len = 0; 3142 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) 3143 return ROFF_IGN; 3144 3145 step = val + len; 3146 while (isspace((unsigned char)*step)) 3147 step++; 3148 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) 3149 is = INT_MIN; 3150 3151 roff_setregn(r, key, keysz, iv, sign, is); 3152 return ROFF_IGN; 3153 } 3154 3155 static int 3156 roff_rr(ROFF_ARGS) 3157 { 3158 struct roffreg *reg, **prev; 3159 char *name, *cp; 3160 size_t namesz; 3161 3162 name = cp = buf->buf + pos; 3163 if (*name == '\0') 3164 return ROFF_IGN; 3165 namesz = roff_getname(r, &cp, ln, pos); 3166 name[namesz] = '\0'; 3167 3168 prev = &r->regtab; 3169 while (1) { 3170 reg = *prev; 3171 if (reg == NULL || !strcmp(name, reg->key.p)) 3172 break; 3173 prev = ®->next; 3174 } 3175 if (reg != NULL) { 3176 *prev = reg->next; 3177 free(reg->key.p); 3178 free(reg); 3179 } 3180 return ROFF_IGN; 3181 } 3182 3183 /* --- handler functions for roff requests -------------------------------- */ 3184 3185 static int 3186 roff_rm(ROFF_ARGS) 3187 { 3188 const char *name; 3189 char *cp; 3190 size_t namesz; 3191 3192 cp = buf->buf + pos; 3193 while (*cp != '\0') { 3194 name = cp; 3195 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 3196 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 3197 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3198 if (name[namesz] == '\\' || name[namesz] == '\t') 3199 break; 3200 } 3201 return ROFF_IGN; 3202 } 3203 3204 static int 3205 roff_it(ROFF_ARGS) 3206 { 3207 int iv; 3208 3209 /* Parse the number of lines. */ 3210 3211 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 3212 mandoc_msg(MANDOCERR_IT_NONUM, 3213 ln, ppos, "%s", buf->buf + 1); 3214 return ROFF_IGN; 3215 } 3216 3217 while (isspace((unsigned char)buf->buf[pos])) 3218 pos++; 3219 3220 /* 3221 * Arm the input line trap. 3222 * Special-casing "an-trap" is an ugly workaround to cope 3223 * with DocBook stupidly fiddling with man(7) internals. 3224 */ 3225 3226 roffit_lines = iv; 3227 roffit_macro = mandoc_strdup(iv != 1 || 3228 strcmp(buf->buf + pos, "an-trap") ? 3229 buf->buf + pos : "br"); 3230 return ROFF_IGN; 3231 } 3232 3233 static int 3234 roff_Dd(ROFF_ARGS) 3235 { 3236 int mask; 3237 enum roff_tok t, te; 3238 3239 switch (tok) { 3240 case ROFF_Dd: 3241 tok = MDOC_Dd; 3242 te = MDOC_MAX; 3243 if (r->format == 0) 3244 r->format = MPARSE_MDOC; 3245 mask = MPARSE_MDOC | MPARSE_QUICK; 3246 break; 3247 case ROFF_TH: 3248 tok = MAN_TH; 3249 te = MAN_MAX; 3250 if (r->format == 0) 3251 r->format = MPARSE_MAN; 3252 mask = MPARSE_QUICK; 3253 break; 3254 default: 3255 abort(); 3256 } 3257 if ((r->options & mask) == 0) 3258 for (t = tok; t < te; t++) 3259 roff_setstr(r, roff_name[t], NULL, 0); 3260 return ROFF_CONT; 3261 } 3262 3263 static int 3264 roff_TE(ROFF_ARGS) 3265 { 3266 r->man->flags &= ~ROFF_NONOFILL; 3267 if (r->tbl == NULL) { 3268 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE"); 3269 return ROFF_IGN; 3270 } 3271 if (tbl_end(r->tbl, 0) == 0) { 3272 r->tbl = NULL; 3273 free(buf->buf); 3274 buf->buf = mandoc_strdup(".sp"); 3275 buf->sz = 4; 3276 *offs = 0; 3277 return ROFF_REPARSE; 3278 } 3279 r->tbl = NULL; 3280 return ROFF_IGN; 3281 } 3282 3283 static int 3284 roff_T_(ROFF_ARGS) 3285 { 3286 3287 if (NULL == r->tbl) 3288 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&"); 3289 else 3290 tbl_restart(ln, ppos, r->tbl); 3291 3292 return ROFF_IGN; 3293 } 3294 3295 /* 3296 * Handle in-line equation delimiters. 3297 */ 3298 static int 3299 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 3300 { 3301 char *cp1, *cp2; 3302 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 3303 3304 /* 3305 * Outside equations, look for an opening delimiter. 3306 * If we are inside an equation, we already know it is 3307 * in-line, or this function wouldn't have been called; 3308 * so look for a closing delimiter. 3309 */ 3310 3311 cp1 = buf->buf + pos; 3312 cp2 = strchr(cp1, r->eqn == NULL ? 3313 r->last_eqn->odelim : r->last_eqn->cdelim); 3314 if (cp2 == NULL) 3315 return ROFF_CONT; 3316 3317 *cp2++ = '\0'; 3318 bef_pr = bef_nl = aft_nl = aft_pr = ""; 3319 3320 /* Handle preceding text, protecting whitespace. */ 3321 3322 if (*buf->buf != '\0') { 3323 if (r->eqn == NULL) 3324 bef_pr = "\\&"; 3325 bef_nl = "\n"; 3326 } 3327 3328 /* 3329 * Prepare replacing the delimiter with an equation macro 3330 * and drop leading white space from the equation. 3331 */ 3332 3333 if (r->eqn == NULL) { 3334 while (*cp2 == ' ') 3335 cp2++; 3336 mac = ".EQ"; 3337 } else 3338 mac = ".EN"; 3339 3340 /* Handle following text, protecting whitespace. */ 3341 3342 if (*cp2 != '\0') { 3343 aft_nl = "\n"; 3344 if (r->eqn != NULL) 3345 aft_pr = "\\&"; 3346 } 3347 3348 /* Do the actual replacement. */ 3349 3350 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 3351 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 3352 free(buf->buf); 3353 buf->buf = cp1; 3354 3355 /* Toggle the in-line state of the eqn subsystem. */ 3356 3357 r->eqn_inline = r->eqn == NULL; 3358 return ROFF_REPARSE; 3359 } 3360 3361 static int 3362 roff_EQ(ROFF_ARGS) 3363 { 3364 struct roff_node *n; 3365 3366 if (r->man->meta.macroset == MACROSET_MAN) 3367 man_breakscope(r->man, ROFF_EQ); 3368 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); 3369 if (ln > r->man->last->line) 3370 n->flags |= NODE_LINE; 3371 n->eqn = eqn_box_new(); 3372 roff_node_append(r->man, n); 3373 r->man->next = ROFF_NEXT_SIBLING; 3374 3375 assert(r->eqn == NULL); 3376 if (r->last_eqn == NULL) 3377 r->last_eqn = eqn_alloc(); 3378 else 3379 eqn_reset(r->last_eqn); 3380 r->eqn = r->last_eqn; 3381 r->eqn->node = n; 3382 3383 if (buf->buf[pos] != '\0') 3384 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3385 ".EQ %s", buf->buf + pos); 3386 3387 return ROFF_IGN; 3388 } 3389 3390 static int 3391 roff_EN(ROFF_ARGS) 3392 { 3393 if (r->eqn != NULL) { 3394 eqn_parse(r->eqn); 3395 r->eqn = NULL; 3396 } else 3397 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN"); 3398 if (buf->buf[pos] != '\0') 3399 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3400 "EN %s", buf->buf + pos); 3401 return ROFF_IGN; 3402 } 3403 3404 static int 3405 roff_TS(ROFF_ARGS) 3406 { 3407 if (r->tbl != NULL) { 3408 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); 3409 tbl_end(r->tbl, 0); 3410 } 3411 r->man->flags |= ROFF_NONOFILL; 3412 r->tbl = tbl_alloc(ppos, ln, r->last_tbl); 3413 if (r->last_tbl == NULL) 3414 r->first_tbl = r->tbl; 3415 r->last_tbl = r->tbl; 3416 return ROFF_IGN; 3417 } 3418 3419 static int 3420 roff_noarg(ROFF_ARGS) 3421 { 3422 if (r->man->flags & (MAN_BLINE | MAN_ELINE)) 3423 man_breakscope(r->man, tok); 3424 if (tok == ROFF_brp) 3425 tok = ROFF_br; 3426 roff_elem_alloc(r->man, ln, ppos, tok); 3427 if (buf->buf[pos] != '\0') 3428 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3429 "%s %s", roff_name[tok], buf->buf + pos); 3430 if (tok == ROFF_nf) 3431 r->man->flags |= ROFF_NOFILL; 3432 else if (tok == ROFF_fi) 3433 r->man->flags &= ~ROFF_NOFILL; 3434 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3435 r->man->next = ROFF_NEXT_SIBLING; 3436 return ROFF_IGN; 3437 } 3438 3439 static int 3440 roff_onearg(ROFF_ARGS) 3441 { 3442 struct roff_node *n; 3443 char *cp; 3444 int npos; 3445 3446 if (r->man->flags & (MAN_BLINE | MAN_ELINE) && 3447 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || 3448 tok == ROFF_ti)) 3449 man_breakscope(r->man, tok); 3450 3451 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { 3452 r->man->last = roffce_node; 3453 r->man->next = ROFF_NEXT_SIBLING; 3454 } 3455 3456 roff_elem_alloc(r->man, ln, ppos, tok); 3457 n = r->man->last; 3458 3459 cp = buf->buf + pos; 3460 if (*cp != '\0') { 3461 while (*cp != '\0' && *cp != ' ') 3462 cp++; 3463 while (*cp == ' ') 3464 *cp++ = '\0'; 3465 if (*cp != '\0') 3466 mandoc_msg(MANDOCERR_ARG_EXCESS, 3467 ln, (int)(cp - buf->buf), 3468 "%s ... %s", roff_name[tok], cp); 3469 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3470 } 3471 3472 if (tok == ROFF_ce || tok == ROFF_rj) { 3473 if (r->man->last->type == ROFFT_ELEM) { 3474 roff_word_alloc(r->man, ln, pos, "1"); 3475 r->man->last->flags |= NODE_NOSRC; 3476 } 3477 npos = 0; 3478 if (roff_evalnum(r, ln, r->man->last->string, &npos, 3479 &roffce_lines, 0) == 0) { 3480 mandoc_msg(MANDOCERR_CE_NONUM, 3481 ln, pos, "ce %s", buf->buf + pos); 3482 roffce_lines = 1; 3483 } 3484 if (roffce_lines < 1) { 3485 r->man->last = r->man->last->parent; 3486 roffce_node = NULL; 3487 roffce_lines = 0; 3488 } else 3489 roffce_node = r->man->last->parent; 3490 } else { 3491 n->flags |= NODE_VALID | NODE_ENDED; 3492 r->man->last = n; 3493 } 3494 n->flags |= NODE_LINE; 3495 r->man->next = ROFF_NEXT_SIBLING; 3496 return ROFF_IGN; 3497 } 3498 3499 static int 3500 roff_manyarg(ROFF_ARGS) 3501 { 3502 struct roff_node *n; 3503 char *sp, *ep; 3504 3505 roff_elem_alloc(r->man, ln, ppos, tok); 3506 n = r->man->last; 3507 3508 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { 3509 while (*ep != '\0' && *ep != ' ') 3510 ep++; 3511 while (*ep == ' ') 3512 *ep++ = '\0'; 3513 roff_word_alloc(r->man, ln, sp - buf->buf, sp); 3514 } 3515 3516 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3517 r->man->last = n; 3518 r->man->next = ROFF_NEXT_SIBLING; 3519 return ROFF_IGN; 3520 } 3521 3522 static int 3523 roff_als(ROFF_ARGS) 3524 { 3525 char *oldn, *newn, *end, *value; 3526 size_t oldsz, newsz, valsz; 3527 3528 newn = oldn = buf->buf + pos; 3529 if (*newn == '\0') 3530 return ROFF_IGN; 3531 3532 newsz = roff_getname(r, &oldn, ln, pos); 3533 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0') 3534 return ROFF_IGN; 3535 3536 end = oldn; 3537 oldsz = roff_getname(r, &end, ln, oldn - buf->buf); 3538 if (oldsz == 0) 3539 return ROFF_IGN; 3540 3541 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", 3542 (int)oldsz, oldn); 3543 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); 3544 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3545 free(value); 3546 return ROFF_IGN; 3547 } 3548 3549 /* 3550 * The .break request only makes sense inside conditionals, 3551 * and that case is already handled in roff_cond_sub(). 3552 */ 3553 static int 3554 roff_break(ROFF_ARGS) 3555 { 3556 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break"); 3557 return ROFF_IGN; 3558 } 3559 3560 static int 3561 roff_cc(ROFF_ARGS) 3562 { 3563 const char *p; 3564 3565 p = buf->buf + pos; 3566 3567 if (*p == '\0' || (r->control = *p++) == '.') 3568 r->control = '\0'; 3569 3570 if (*p != '\0') 3571 mandoc_msg(MANDOCERR_ARG_EXCESS, 3572 ln, p - buf->buf, "cc ... %s", p); 3573 3574 return ROFF_IGN; 3575 } 3576 3577 static int 3578 roff_char(ROFF_ARGS) 3579 { 3580 const char *p, *kp, *vp; 3581 size_t ksz, vsz; 3582 int font; 3583 3584 /* Parse the character to be replaced. */ 3585 3586 kp = buf->buf + pos; 3587 p = kp + 1; 3588 if (*kp == '\0' || (*kp == '\\' && 3589 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) || 3590 (*p != ' ' && *p != '\0')) { 3591 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp); 3592 return ROFF_IGN; 3593 } 3594 ksz = p - kp; 3595 while (*p == ' ') 3596 p++; 3597 3598 /* 3599 * If the replacement string contains a font escape sequence, 3600 * we have to restore the font at the end. 3601 */ 3602 3603 vp = p; 3604 vsz = strlen(p); 3605 font = 0; 3606 while (*p != '\0') { 3607 if (*p++ != '\\') 3608 continue; 3609 switch (mandoc_escape(&p, NULL, NULL)) { 3610 case ESCAPE_FONT: 3611 case ESCAPE_FONTROMAN: 3612 case ESCAPE_FONTITALIC: 3613 case ESCAPE_FONTBOLD: 3614 case ESCAPE_FONTBI: 3615 case ESCAPE_FONTCR: 3616 case ESCAPE_FONTCB: 3617 case ESCAPE_FONTCI: 3618 case ESCAPE_FONTPREV: 3619 font++; 3620 break; 3621 default: 3622 break; 3623 } 3624 } 3625 if (font > 1) 3626 mandoc_msg(MANDOCERR_CHAR_FONT, 3627 ln, (int)(vp - buf->buf), "%s", vp); 3628 3629 /* 3630 * Approximate the effect of .char using the .tr tables. 3631 * XXX In groff, .char and .tr interact differently. 3632 */ 3633 3634 if (ksz == 1) { 3635 if (r->xtab == NULL) 3636 r->xtab = mandoc_calloc(128, sizeof(*r->xtab)); 3637 assert((unsigned int)*kp < 128); 3638 free(r->xtab[(int)*kp].p); 3639 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p, 3640 "%s%s", vp, font ? "\fP" : ""); 3641 } else { 3642 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0); 3643 if (font) 3644 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1); 3645 } 3646 return ROFF_IGN; 3647 } 3648 3649 static int 3650 roff_ec(ROFF_ARGS) 3651 { 3652 const char *p; 3653 3654 p = buf->buf + pos; 3655 if (*p == '\0') 3656 r->escape = '\\'; 3657 else { 3658 r->escape = *p; 3659 if (*++p != '\0') 3660 mandoc_msg(MANDOCERR_ARG_EXCESS, ln, 3661 (int)(p - buf->buf), "ec ... %s", p); 3662 } 3663 return ROFF_IGN; 3664 } 3665 3666 static int 3667 roff_eo(ROFF_ARGS) 3668 { 3669 r->escape = '\0'; 3670 if (buf->buf[pos] != '\0') 3671 mandoc_msg(MANDOCERR_ARG_SKIP, 3672 ln, pos, "eo %s", buf->buf + pos); 3673 return ROFF_IGN; 3674 } 3675 3676 static int 3677 roff_mc(ROFF_ARGS) 3678 { 3679 struct roff_node *n; 3680 char *cp; 3681 3682 /* Parse the first argument. */ 3683 3684 cp = buf->buf + pos; 3685 if (*cp != '\0') 3686 cp++; 3687 if (buf->buf[pos] == '\\') { 3688 switch (mandoc_escape((const char **)&cp, NULL, NULL)) { 3689 case ESCAPE_SPECIAL: 3690 case ESCAPE_UNICODE: 3691 case ESCAPE_NUMBERED: 3692 break; 3693 default: 3694 *cp = '\0'; 3695 mandoc_msg(MANDOCERR_MC_ESC, ln, pos, 3696 "mc %s", buf->buf + pos); 3697 buf->buf[pos] = '\0'; 3698 break; 3699 } 3700 } 3701 3702 /* Ignore additional arguments. */ 3703 3704 while (*cp == ' ') 3705 *cp++ = '\0'; 3706 if (*cp != '\0') { 3707 mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf), 3708 "mc ... %s", cp); 3709 *cp = '\0'; 3710 } 3711 3712 /* Create the .mc node. */ 3713 3714 roff_elem_alloc(r->man, ln, ppos, tok); 3715 n = r->man->last; 3716 if (buf->buf[pos] != '\0') 3717 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3718 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3719 r->man->last = n; 3720 r->man->next = ROFF_NEXT_SIBLING; 3721 return ROFF_IGN; 3722 } 3723 3724 static int 3725 roff_nop(ROFF_ARGS) 3726 { 3727 while (buf->buf[pos] == ' ') 3728 pos++; 3729 *offs = pos; 3730 return ROFF_RERUN; 3731 } 3732 3733 static int 3734 roff_tr(ROFF_ARGS) 3735 { 3736 const char *p, *first, *second; 3737 size_t fsz, ssz; 3738 3739 p = buf->buf + pos; 3740 3741 if (*p == '\0') { 3742 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr"); 3743 return ROFF_IGN; 3744 } 3745 3746 while (*p != '\0') { 3747 fsz = ssz = 1; 3748 3749 first = p++; 3750 if (*first == '\\') { 3751 if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR) 3752 return ROFF_IGN; 3753 fsz = (size_t)(p - first); 3754 } 3755 3756 second = p++; 3757 if (*second == '\\') { 3758 if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR) 3759 return ROFF_IGN; 3760 ssz = (size_t)(p - second); 3761 } else if (*second == '\0') { 3762 mandoc_msg(MANDOCERR_TR_ODD, ln, 3763 (int)(first - buf->buf), "tr %s", first); 3764 second = " "; 3765 p--; 3766 } 3767 3768 if (fsz > 1) { 3769 roff_setstrn(&r->xmbtab, first, fsz, 3770 second, ssz, 0); 3771 continue; 3772 } 3773 3774 if (r->xtab == NULL) 3775 r->xtab = mandoc_calloc(128, 3776 sizeof(struct roffstr)); 3777 3778 free(r->xtab[(int)*first].p); 3779 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 3780 r->xtab[(int)*first].sz = ssz; 3781 } 3782 3783 return ROFF_IGN; 3784 } 3785 3786 /* 3787 * Implementation of the .return request. 3788 * There is no need to call roff_userret() from here. 3789 * The read module will call that after rewinding the reader stack 3790 * to the place from where the current macro was called. 3791 */ 3792 static int 3793 roff_return(ROFF_ARGS) 3794 { 3795 if (r->mstackpos >= 0) 3796 return ROFF_IGN | ROFF_USERRET; 3797 3798 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return"); 3799 return ROFF_IGN; 3800 } 3801 3802 static int 3803 roff_rn(ROFF_ARGS) 3804 { 3805 const char *value; 3806 char *oldn, *newn, *end; 3807 size_t oldsz, newsz; 3808 int deftype; 3809 3810 oldn = newn = buf->buf + pos; 3811 if (*oldn == '\0') 3812 return ROFF_IGN; 3813 3814 oldsz = roff_getname(r, &newn, ln, pos); 3815 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0') 3816 return ROFF_IGN; 3817 3818 end = newn; 3819 newsz = roff_getname(r, &end, ln, newn - buf->buf); 3820 if (newsz == 0) 3821 return ROFF_IGN; 3822 3823 deftype = ROFFDEF_ANY; 3824 value = roff_getstrn(r, oldn, oldsz, &deftype); 3825 switch (deftype) { 3826 case ROFFDEF_USER: 3827 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3828 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); 3829 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3830 break; 3831 case ROFFDEF_PRE: 3832 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3833 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3834 break; 3835 case ROFFDEF_REN: 3836 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); 3837 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); 3838 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3839 break; 3840 case ROFFDEF_STD: 3841 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); 3842 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3843 break; 3844 default: 3845 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3846 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3847 break; 3848 } 3849 return ROFF_IGN; 3850 } 3851 3852 static int 3853 roff_shift(ROFF_ARGS) 3854 { 3855 struct mctx *ctx; 3856 int argpos, levels, i; 3857 3858 argpos = pos; 3859 levels = 1; 3860 if (buf->buf[pos] != '\0' && 3861 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { 3862 mandoc_msg(MANDOCERR_CE_NONUM, 3863 ln, pos, "shift %s", buf->buf + pos); 3864 levels = 1; 3865 } 3866 if (r->mstackpos < 0) { 3867 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift"); 3868 return ROFF_IGN; 3869 } 3870 ctx = r->mstack + r->mstackpos; 3871 if (levels > ctx->argc) { 3872 mandoc_msg(MANDOCERR_SHIFT, 3873 ln, argpos, "%d, but max is %d", levels, ctx->argc); 3874 levels = ctx->argc; 3875 } 3876 if (levels < 0) { 3877 mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels); 3878 levels = 0; 3879 } 3880 if (levels == 0) 3881 return ROFF_IGN; 3882 for (i = 0; i < levels; i++) 3883 free(ctx->argv[i]); 3884 ctx->argc -= levels; 3885 for (i = 0; i < ctx->argc; i++) 3886 ctx->argv[i] = ctx->argv[i + levels]; 3887 return ROFF_IGN; 3888 } 3889 3890 static int 3891 roff_so(ROFF_ARGS) 3892 { 3893 char *name, *cp; 3894 3895 name = buf->buf + pos; 3896 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name); 3897 3898 /* 3899 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3900 * opening anything that's not in our cwd or anything beneath 3901 * it. Thus, explicitly disallow traversing up the file-system 3902 * or using absolute paths. 3903 */ 3904 3905 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3906 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name); 3907 buf->sz = mandoc_asprintf(&cp, 3908 ".sp\nSee the file %s.\n.sp", name) + 1; 3909 free(buf->buf); 3910 buf->buf = cp; 3911 *offs = 0; 3912 return ROFF_REPARSE; 3913 } 3914 3915 *offs = pos; 3916 return ROFF_SO; 3917 } 3918 3919 /* --- user defined strings and macros ------------------------------------ */ 3920 3921 static int 3922 roff_userdef(ROFF_ARGS) 3923 { 3924 struct mctx *ctx; 3925 char *arg, *ap, *dst, *src; 3926 size_t sz; 3927 3928 /* If the macro is empty, ignore it altogether. */ 3929 3930 if (*r->current_string == '\0') 3931 return ROFF_IGN; 3932 3933 /* Initialize a new macro stack context. */ 3934 3935 if (++r->mstackpos == r->mstacksz) { 3936 r->mstack = mandoc_recallocarray(r->mstack, 3937 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); 3938 r->mstacksz += 8; 3939 } 3940 ctx = r->mstack + r->mstackpos; 3941 ctx->argc = 0; 3942 3943 /* 3944 * Collect pointers to macro argument strings, 3945 * NUL-terminating them and escaping quotes. 3946 */ 3947 3948 src = buf->buf + pos; 3949 while (*src != '\0') { 3950 if (ctx->argc == ctx->argsz) { 3951 ctx->argsz += 8; 3952 ctx->argv = mandoc_reallocarray(ctx->argv, 3953 ctx->argsz, sizeof(*ctx->argv)); 3954 } 3955 arg = roff_getarg(r, &src, ln, &pos); 3956 sz = 1; /* For the terminating NUL. */ 3957 for (ap = arg; *ap != '\0'; ap++) 3958 sz += *ap == '"' ? 4 : 1; 3959 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); 3960 for (ap = arg; *ap != '\0'; ap++) { 3961 if (*ap == '"') { 3962 memcpy(dst, "\\(dq", 4); 3963 dst += 4; 3964 } else 3965 *dst++ = *ap; 3966 } 3967 *dst = '\0'; 3968 free(arg); 3969 } 3970 3971 /* Replace the macro invocation by the macro definition. */ 3972 3973 free(buf->buf); 3974 buf->buf = mandoc_strdup(r->current_string); 3975 buf->sz = strlen(buf->buf) + 1; 3976 *offs = 0; 3977 3978 return buf->buf[buf->sz - 2] == '\n' ? 3979 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND; 3980 } 3981 3982 /* 3983 * Calling a high-level macro that was renamed with .rn. 3984 * r->current_string has already been set up by roff_parse(). 3985 */ 3986 static int 3987 roff_renamed(ROFF_ARGS) 3988 { 3989 char *nbuf; 3990 3991 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, 3992 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; 3993 free(buf->buf); 3994 buf->buf = nbuf; 3995 *offs = 0; 3996 return ROFF_CONT; 3997 } 3998 3999 /* 4000 * Measure the length in bytes of the roff identifier at *cpp 4001 * and advance the pointer to the next word. 4002 */ 4003 static size_t 4004 roff_getname(struct roff *r, char **cpp, int ln, int pos) 4005 { 4006 char *name, *cp; 4007 int namesz, inam, iend; 4008 4009 name = *cpp; 4010 if (*name == '\0') 4011 return 0; 4012 4013 /* Advance cp to the byte after the end of the name. */ 4014 4015 cp = name; 4016 namesz = 0; 4017 for (;;) { 4018 if (*cp == '\0') 4019 break; 4020 if (*cp == ' ' || *cp == '\t') { 4021 cp++; 4022 break; 4023 } 4024 if (*cp != '\\') { 4025 if (name + namesz < cp) { 4026 name[namesz] = *cp; 4027 *cp = ' '; 4028 } 4029 namesz++; 4030 cp++; 4031 continue; 4032 } 4033 if (cp[1] == '{' || cp[1] == '}') 4034 break; 4035 if (roff_escape(cp, 0, 0, NULL, &inam, 4036 NULL, NULL, &iend) != ESCAPE_UNDEF) { 4037 mandoc_msg(MANDOCERR_NAMESC, ln, pos, 4038 "%.*s%.*s", namesz, name, iend, cp); 4039 cp += iend; 4040 break; 4041 } 4042 4043 /* 4044 * In an identifier, \\, \., \G and so on 4045 * are reduced to \, ., G and so on, 4046 * vaguely similar to copy mode. 4047 */ 4048 4049 name[namesz++] = cp[inam]; 4050 while (iend--) { 4051 if (cp >= name + namesz) 4052 *cp = ' '; 4053 cp++; 4054 } 4055 } 4056 4057 /* Read past spaces. */ 4058 4059 while (*cp == ' ') 4060 cp++; 4061 4062 *cpp = cp; 4063 return namesz; 4064 } 4065 4066 /* 4067 * Store *string into the user-defined string called *name. 4068 * To clear an existing entry, call with (*r, *name, NULL, 0). 4069 * append == 0: replace mode 4070 * append == 1: single-line append mode 4071 * append == 2: multiline append mode, append '\n' after each call 4072 */ 4073 static void 4074 roff_setstr(struct roff *r, const char *name, const char *string, 4075 int append) 4076 { 4077 size_t namesz; 4078 4079 namesz = strlen(name); 4080 roff_setstrn(&r->strtab, name, namesz, string, 4081 string ? strlen(string) : 0, append); 4082 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 4083 } 4084 4085 static void 4086 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 4087 const char *string, size_t stringsz, int append) 4088 { 4089 struct roffkv *n; 4090 char *c; 4091 int i; 4092 size_t oldch, newch; 4093 4094 /* Search for an existing string with the same name. */ 4095 n = *r; 4096 4097 while (n && (namesz != n->key.sz || 4098 strncmp(n->key.p, name, namesz))) 4099 n = n->next; 4100 4101 if (NULL == n) { 4102 /* Create a new string table entry. */ 4103 n = mandoc_malloc(sizeof(struct roffkv)); 4104 n->key.p = mandoc_strndup(name, namesz); 4105 n->key.sz = namesz; 4106 n->val.p = NULL; 4107 n->val.sz = 0; 4108 n->next = *r; 4109 *r = n; 4110 } else if (0 == append) { 4111 free(n->val.p); 4112 n->val.p = NULL; 4113 n->val.sz = 0; 4114 } 4115 4116 if (NULL == string) 4117 return; 4118 4119 /* 4120 * One additional byte for the '\n' in multiline mode, 4121 * and one for the terminating '\0'. 4122 */ 4123 newch = stringsz + (1 < append ? 2u : 1u); 4124 4125 if (NULL == n->val.p) { 4126 n->val.p = mandoc_malloc(newch); 4127 *n->val.p = '\0'; 4128 oldch = 0; 4129 } else { 4130 oldch = n->val.sz; 4131 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 4132 } 4133 4134 /* Skip existing content in the destination buffer. */ 4135 c = n->val.p + (int)oldch; 4136 4137 /* Append new content to the destination buffer. */ 4138 i = 0; 4139 while (i < (int)stringsz) { 4140 /* 4141 * Rudimentary roff copy mode: 4142 * Handle escaped backslashes. 4143 */ 4144 if ('\\' == string[i] && '\\' == string[i + 1]) 4145 i++; 4146 *c++ = string[i++]; 4147 } 4148 4149 /* Append terminating bytes. */ 4150 if (1 < append) 4151 *c++ = '\n'; 4152 4153 *c = '\0'; 4154 n->val.sz = (int)(c - n->val.p); 4155 } 4156 4157 static const char * 4158 roff_getstrn(struct roff *r, const char *name, size_t len, 4159 int *deftype) 4160 { 4161 const struct roffkv *n; 4162 int found, i; 4163 enum roff_tok tok; 4164 4165 found = 0; 4166 for (n = r->strtab; n != NULL; n = n->next) { 4167 if (strncmp(name, n->key.p, len) != 0 || 4168 n->key.p[len] != '\0' || n->val.p == NULL) 4169 continue; 4170 if (*deftype & ROFFDEF_USER) { 4171 *deftype = ROFFDEF_USER; 4172 return n->val.p; 4173 } else { 4174 found = 1; 4175 break; 4176 } 4177 } 4178 for (n = r->rentab; n != NULL; n = n->next) { 4179 if (strncmp(name, n->key.p, len) != 0 || 4180 n->key.p[len] != '\0' || n->val.p == NULL) 4181 continue; 4182 if (*deftype & ROFFDEF_REN) { 4183 *deftype = ROFFDEF_REN; 4184 return n->val.p; 4185 } else { 4186 found = 1; 4187 break; 4188 } 4189 } 4190 for (i = 0; i < PREDEFS_MAX; i++) { 4191 if (strncmp(name, predefs[i].name, len) != 0 || 4192 predefs[i].name[len] != '\0') 4193 continue; 4194 if (*deftype & ROFFDEF_PRE) { 4195 *deftype = ROFFDEF_PRE; 4196 return predefs[i].str; 4197 } else { 4198 found = 1; 4199 break; 4200 } 4201 } 4202 if (r->man->meta.macroset != MACROSET_MAN) { 4203 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { 4204 if (strncmp(name, roff_name[tok], len) != 0 || 4205 roff_name[tok][len] != '\0') 4206 continue; 4207 if (*deftype & ROFFDEF_STD) { 4208 *deftype = ROFFDEF_STD; 4209 return NULL; 4210 } else { 4211 found = 1; 4212 break; 4213 } 4214 } 4215 } 4216 if (r->man->meta.macroset != MACROSET_MDOC) { 4217 for (tok = MAN_TH; tok < MAN_MAX; tok++) { 4218 if (strncmp(name, roff_name[tok], len) != 0 || 4219 roff_name[tok][len] != '\0') 4220 continue; 4221 if (*deftype & ROFFDEF_STD) { 4222 *deftype = ROFFDEF_STD; 4223 return NULL; 4224 } else { 4225 found = 1; 4226 break; 4227 } 4228 } 4229 } 4230 4231 if (found == 0 && *deftype != ROFFDEF_ANY) { 4232 if (*deftype & ROFFDEF_REN) { 4233 /* 4234 * This might still be a request, 4235 * so do not treat it as undefined yet. 4236 */ 4237 *deftype = ROFFDEF_UNDEF; 4238 return NULL; 4239 } 4240 4241 /* Using an undefined string defines it to be empty. */ 4242 4243 roff_setstrn(&r->strtab, name, len, "", 0, 0); 4244 roff_setstrn(&r->rentab, name, len, NULL, 0, 0); 4245 } 4246 4247 *deftype = 0; 4248 return NULL; 4249 } 4250 4251 static void 4252 roff_freestr(struct roffkv *r) 4253 { 4254 struct roffkv *n, *nn; 4255 4256 for (n = r; n; n = nn) { 4257 free(n->key.p); 4258 free(n->val.p); 4259 nn = n->next; 4260 free(n); 4261 } 4262 } 4263 4264 /* --- accessors and utility functions ------------------------------------ */ 4265 4266 /* 4267 * Duplicate an input string, making the appropriate character 4268 * conversations (as stipulated by `tr') along the way. 4269 * Returns a heap-allocated string with all the replacements made. 4270 */ 4271 char * 4272 roff_strdup(const struct roff *r, const char *p) 4273 { 4274 const struct roffkv *cp; 4275 char *res; 4276 const char *pp; 4277 size_t ssz, sz; 4278 enum mandoc_esc esc; 4279 4280 if (NULL == r->xmbtab && NULL == r->xtab) 4281 return mandoc_strdup(p); 4282 else if ('\0' == *p) 4283 return mandoc_strdup(""); 4284 4285 /* 4286 * Step through each character looking for term matches 4287 * (remember that a `tr' can be invoked with an escape, which is 4288 * a glyph but the escape is multi-character). 4289 * We only do this if the character hash has been initialised 4290 * and the string is >0 length. 4291 */ 4292 4293 res = NULL; 4294 ssz = 0; 4295 4296 while ('\0' != *p) { 4297 assert((unsigned int)*p < 128); 4298 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { 4299 sz = r->xtab[(int)*p].sz; 4300 res = mandoc_realloc(res, ssz + sz + 1); 4301 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 4302 ssz += sz; 4303 p++; 4304 continue; 4305 } else if ('\\' != *p) { 4306 res = mandoc_realloc(res, ssz + 2); 4307 res[ssz++] = *p++; 4308 continue; 4309 } 4310 4311 /* Search for term matches. */ 4312 for (cp = r->xmbtab; cp; cp = cp->next) 4313 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 4314 break; 4315 4316 if (NULL != cp) { 4317 /* 4318 * A match has been found. 4319 * Append the match to the array and move 4320 * forward by its keysize. 4321 */ 4322 res = mandoc_realloc(res, 4323 ssz + cp->val.sz + 1); 4324 memcpy(res + ssz, cp->val.p, cp->val.sz); 4325 ssz += cp->val.sz; 4326 p += (int)cp->key.sz; 4327 continue; 4328 } 4329 4330 /* 4331 * Handle escapes carefully: we need to copy 4332 * over just the escape itself, or else we might 4333 * do replacements within the escape itself. 4334 * Make sure to pass along the bogus string. 4335 */ 4336 pp = p++; 4337 esc = mandoc_escape(&p, NULL, NULL); 4338 if (ESCAPE_ERROR == esc) { 4339 sz = strlen(pp); 4340 res = mandoc_realloc(res, ssz + sz + 1); 4341 memcpy(res + ssz, pp, sz); 4342 break; 4343 } 4344 /* 4345 * We bail out on bad escapes. 4346 * No need to warn: we already did so when 4347 * roff_expand() was called. 4348 */ 4349 sz = (int)(p - pp); 4350 res = mandoc_realloc(res, ssz + sz + 1); 4351 memcpy(res + ssz, pp, sz); 4352 ssz += sz; 4353 } 4354 4355 res[(int)ssz] = '\0'; 4356 return res; 4357 } 4358 4359 int 4360 roff_getformat(const struct roff *r) 4361 { 4362 4363 return r->format; 4364 } 4365 4366 /* 4367 * Find out whether a line is a macro line or not. 4368 * If it is, adjust the current position and return one; if it isn't, 4369 * return zero and don't change the current position. 4370 * If the control character has been set with `.cc', then let that grain 4371 * precedence. 4372 * This is slightly contrary to groff, where using the non-breaking 4373 * control character when `cc' has been invoked will cause the 4374 * non-breaking macro contents to be printed verbatim. 4375 */ 4376 int 4377 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 4378 { 4379 int pos; 4380 4381 pos = *ppos; 4382 4383 if (r->control != '\0' && cp[pos] == r->control) 4384 pos++; 4385 else if (r->control != '\0') 4386 return 0; 4387 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 4388 pos += 2; 4389 else if ('.' == cp[pos] || '\'' == cp[pos]) 4390 pos++; 4391 else 4392 return 0; 4393 4394 while (' ' == cp[pos] || '\t' == cp[pos]) 4395 pos++; 4396 4397 *ppos = pos; 4398 return 1; 4399 } 4400