1 /* $OpenBSD: roff.c,v 1.252 2021/10/04 14:18:42 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Implementation of the roff(7) parser for mandoc(1). 19 */ 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <limits.h> 25 #include <stddef.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 31 #include "mandoc_aux.h" 32 #include "mandoc_ohash.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mandoc_parse.h" 36 #include "libmandoc.h" 37 #include "roff_int.h" 38 #include "tbl_parse.h" 39 #include "eqn_parse.h" 40 41 /* 42 * ASCII_ESC is used to signal from roff_getarg() to roff_expand() 43 * that an escape sequence resulted from copy-in processing and 44 * needs to be checked or interpolated. As it is used nowhere 45 * else, it is defined here rather than in a header file. 46 */ 47 #define ASCII_ESC 27 48 49 /* Maximum number of string expansions per line, to break infinite loops. */ 50 #define EXPAND_LIMIT 1000 51 52 /* Types of definitions of macros and strings. */ 53 #define ROFFDEF_USER (1 << 1) /* User-defined. */ 54 #define ROFFDEF_PRE (1 << 2) /* Predefined. */ 55 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ 56 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ 57 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ 58 ROFFDEF_REN | ROFFDEF_STD) 59 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ 60 61 /* --- data types --------------------------------------------------------- */ 62 63 /* 64 * An incredibly-simple string buffer. 65 */ 66 struct roffstr { 67 char *p; /* nil-terminated buffer */ 68 size_t sz; /* saved strlen(p) */ 69 }; 70 71 /* 72 * A key-value roffstr pair as part of a singly-linked list. 73 */ 74 struct roffkv { 75 struct roffstr key; 76 struct roffstr val; 77 struct roffkv *next; /* next in list */ 78 }; 79 80 /* 81 * A single number register as part of a singly-linked list. 82 */ 83 struct roffreg { 84 struct roffstr key; 85 int val; 86 int step; 87 struct roffreg *next; 88 }; 89 90 /* 91 * Association of request and macro names with token IDs. 92 */ 93 struct roffreq { 94 enum roff_tok tok; 95 char name[]; 96 }; 97 98 /* 99 * A macro processing context. 100 * More than one is needed when macro calls are nested. 101 */ 102 struct mctx { 103 char **argv; 104 int argc; 105 int argsz; 106 }; 107 108 struct roff { 109 struct roff_man *man; /* mdoc or man parser */ 110 struct roffnode *last; /* leaf of stack */ 111 struct mctx *mstack; /* stack of macro contexts */ 112 int *rstack; /* stack of inverted `ie' values */ 113 struct ohash *reqtab; /* request lookup table */ 114 struct roffreg *regtab; /* number registers */ 115 struct roffkv *strtab; /* user-defined strings & macros */ 116 struct roffkv *rentab; /* renamed strings & macros */ 117 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 118 struct roffstr *xtab; /* single-byte trans table (`tr') */ 119 const char *current_string; /* value of last called user macro */ 120 struct tbl_node *first_tbl; /* first table parsed */ 121 struct tbl_node *last_tbl; /* last table parsed */ 122 struct tbl_node *tbl; /* current table being parsed */ 123 struct eqn_node *last_eqn; /* equation parser */ 124 struct eqn_node *eqn; /* active equation parser */ 125 int eqn_inline; /* current equation is inline */ 126 int options; /* parse options */ 127 int mstacksz; /* current size of mstack */ 128 int mstackpos; /* position in mstack */ 129 int rstacksz; /* current size limit of rstack */ 130 int rstackpos; /* position in rstack */ 131 int format; /* current file in mdoc or man format */ 132 char control; /* control character */ 133 char escape; /* escape character */ 134 }; 135 136 /* 137 * A macro definition, condition, or ignored block. 138 */ 139 struct roffnode { 140 enum roff_tok tok; /* type of node */ 141 struct roffnode *parent; /* up one in stack */ 142 int line; /* parse line */ 143 int col; /* parse col */ 144 char *name; /* node name, e.g. macro name */ 145 char *end; /* custom end macro of the block */ 146 int endspan; /* scope to: 1=eol 2=next line -1=\} */ 147 int rule; /* content is: 1=evaluated 0=skipped */ 148 }; 149 150 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 151 enum roff_tok tok, /* tok of macro */ \ 152 struct buf *buf, /* input buffer */ \ 153 int ln, /* parse line */ \ 154 int ppos, /* original pos in buffer */ \ 155 int pos, /* current pos in buffer */ \ 156 int *offs /* reset offset of buffer data */ 157 158 typedef int (*roffproc)(ROFF_ARGS); 159 160 struct roffmac { 161 roffproc proc; /* process new macro */ 162 roffproc text; /* process as child text of macro */ 163 roffproc sub; /* process as child of macro */ 164 int flags; 165 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 166 }; 167 168 struct predef { 169 const char *name; /* predefined input name */ 170 const char *str; /* replacement symbol */ 171 }; 172 173 #define PREDEF(__name, __str) \ 174 { (__name), (__str) }, 175 176 /* --- function prototypes ------------------------------------------------ */ 177 178 static int roffnode_cleanscope(struct roff *); 179 static int roffnode_pop(struct roff *); 180 static void roffnode_push(struct roff *, enum roff_tok, 181 const char *, int, int); 182 static void roff_addtbl(struct roff_man *, int, struct tbl_node *); 183 static int roff_als(ROFF_ARGS); 184 static int roff_block(ROFF_ARGS); 185 static int roff_block_text(ROFF_ARGS); 186 static int roff_block_sub(ROFF_ARGS); 187 static int roff_break(ROFF_ARGS); 188 static int roff_cblock(ROFF_ARGS); 189 static int roff_cc(ROFF_ARGS); 190 static int roff_ccond(struct roff *, int, int); 191 static int roff_char(ROFF_ARGS); 192 static int roff_cond(ROFF_ARGS); 193 static int roff_cond_checkend(ROFF_ARGS); 194 static int roff_cond_text(ROFF_ARGS); 195 static int roff_cond_sub(ROFF_ARGS); 196 static int roff_ds(ROFF_ARGS); 197 static int roff_ec(ROFF_ARGS); 198 static int roff_eo(ROFF_ARGS); 199 static int roff_eqndelim(struct roff *, struct buf *, int); 200 static int roff_evalcond(struct roff *, int, char *, int *); 201 static int roff_evalnum(struct roff *, int, 202 const char *, int *, int *, int); 203 static int roff_evalpar(struct roff *, int, 204 const char *, int *, int *, int); 205 static int roff_evalstrcond(const char *, int *); 206 static int roff_expand(struct roff *, struct buf *, 207 int, int, char); 208 static void roff_free1(struct roff *); 209 static void roff_freereg(struct roffreg *); 210 static void roff_freestr(struct roffkv *); 211 static size_t roff_getname(struct roff *, char **, int, int); 212 static int roff_getnum(const char *, int *, int *, int); 213 static int roff_getop(const char *, int *, char *); 214 static int roff_getregn(struct roff *, 215 const char *, size_t, char); 216 static int roff_getregro(const struct roff *, 217 const char *name); 218 static const char *roff_getstrn(struct roff *, 219 const char *, size_t, int *); 220 static int roff_hasregn(const struct roff *, 221 const char *, size_t); 222 static int roff_insec(ROFF_ARGS); 223 static int roff_it(ROFF_ARGS); 224 static int roff_line_ignore(ROFF_ARGS); 225 static void roff_man_alloc1(struct roff_man *); 226 static void roff_man_free1(struct roff_man *); 227 static int roff_manyarg(ROFF_ARGS); 228 static int roff_noarg(ROFF_ARGS); 229 static int roff_nop(ROFF_ARGS); 230 static int roff_nr(ROFF_ARGS); 231 static int roff_onearg(ROFF_ARGS); 232 static enum roff_tok roff_parse(struct roff *, char *, int *, 233 int, int); 234 static int roff_parsetext(struct roff *, struct buf *, 235 int, int *); 236 static int roff_renamed(ROFF_ARGS); 237 static int roff_return(ROFF_ARGS); 238 static int roff_rm(ROFF_ARGS); 239 static int roff_rn(ROFF_ARGS); 240 static int roff_rr(ROFF_ARGS); 241 static void roff_setregn(struct roff *, const char *, 242 size_t, int, char, int); 243 static void roff_setstr(struct roff *, 244 const char *, const char *, int); 245 static void roff_setstrn(struct roffkv **, const char *, 246 size_t, const char *, size_t, int); 247 static int roff_shift(ROFF_ARGS); 248 static int roff_so(ROFF_ARGS); 249 static int roff_tr(ROFF_ARGS); 250 static int roff_Dd(ROFF_ARGS); 251 static int roff_TE(ROFF_ARGS); 252 static int roff_TS(ROFF_ARGS); 253 static int roff_EQ(ROFF_ARGS); 254 static int roff_EN(ROFF_ARGS); 255 static int roff_T_(ROFF_ARGS); 256 static int roff_unsupp(ROFF_ARGS); 257 static int roff_userdef(ROFF_ARGS); 258 259 /* --- constant data ------------------------------------------------------ */ 260 261 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 262 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 263 264 const char *__roff_name[MAN_MAX + 1] = { 265 "br", "ce", "fi", "ft", 266 "ll", "mc", "nf", 267 "po", "rj", "sp", 268 "ta", "ti", NULL, 269 "ab", "ad", "af", "aln", 270 "als", "am", "am1", "ami", 271 "ami1", "as", "as1", "asciify", 272 "backtrace", "bd", "bleedat", "blm", 273 "box", "boxa", "bp", "BP", 274 "break", "breakchar", "brnl", "brp", 275 "brpnl", "c2", "cc", 276 "cf", "cflags", "ch", "char", 277 "chop", "class", "close", "CL", 278 "color", "composite", "continue", "cp", 279 "cropat", "cs", "cu", "da", 280 "dch", "Dd", "de", "de1", 281 "defcolor", "dei", "dei1", "device", 282 "devicem", "di", "do", "ds", 283 "ds1", "dwh", "dt", "ec", 284 "ecr", "ecs", "el", "em", 285 "EN", "eo", "EP", "EQ", 286 "errprint", "ev", "evc", "ex", 287 "fallback", "fam", "fc", "fchar", 288 "fcolor", "fdeferlig", "feature", "fkern", 289 "fl", "flig", "fp", "fps", 290 "fschar", "fspacewidth", "fspecial", "ftr", 291 "fzoom", "gcolor", "hc", "hcode", 292 "hidechar", "hla", "hlm", "hpf", 293 "hpfa", "hpfcode", "hw", "hy", 294 "hylang", "hylen", "hym", "hypp", 295 "hys", "ie", "if", "ig", 296 "index", "it", "itc", "IX", 297 "kern", "kernafter", "kernbefore", "kernpair", 298 "lc", "lc_ctype", "lds", "length", 299 "letadj", "lf", "lg", "lhang", 300 "linetabs", "lnr", "lnrf", "lpfx", 301 "ls", "lsm", "lt", 302 "mediasize", "minss", "mk", "mso", 303 "na", "ne", "nh", "nhychar", 304 "nm", "nn", "nop", "nr", 305 "nrf", "nroff", "ns", "nx", 306 "open", "opena", "os", "output", 307 "padj", "papersize", "pc", "pev", 308 "pi", "PI", "pl", "pm", 309 "pn", "pnr", "ps", 310 "psbb", "pshape", "pso", "ptr", 311 "pvs", "rchar", "rd", "recursionlimit", 312 "return", "rfschar", "rhang", 313 "rm", "rn", "rnn", "rr", 314 "rs", "rt", "schar", "sentchar", 315 "shc", "shift", "sizes", "so", 316 "spacewidth", "special", "spreadwarn", "ss", 317 "sty", "substring", "sv", "sy", 318 "T&", "tc", "TE", 319 "TH", "tkf", "tl", 320 "tm", "tm1", "tmc", "tr", 321 "track", "transchar", "trf", "trimat", 322 "trin", "trnt", "troff", "TS", 323 "uf", "ul", "unformat", "unwatch", 324 "unwatchn", "vpt", "vs", "warn", 325 "warnscale", "watch", "watchlength", "watchn", 326 "wh", "while", "write", "writec", 327 "writem", "xflag", ".", NULL, 328 NULL, "text", 329 "Dd", "Dt", "Os", "Sh", 330 "Ss", "Pp", "D1", "Dl", 331 "Bd", "Ed", "Bl", "El", 332 "It", "Ad", "An", "Ap", 333 "Ar", "Cd", "Cm", "Dv", 334 "Er", "Ev", "Ex", "Fa", 335 "Fd", "Fl", "Fn", "Ft", 336 "Ic", "In", "Li", "Nd", 337 "Nm", "Op", "Ot", "Pa", 338 "Rv", "St", "Va", "Vt", 339 "Xr", "%A", "%B", "%D", 340 "%I", "%J", "%N", "%O", 341 "%P", "%R", "%T", "%V", 342 "Ac", "Ao", "Aq", "At", 343 "Bc", "Bf", "Bo", "Bq", 344 "Bsx", "Bx", "Db", "Dc", 345 "Do", "Dq", "Ec", "Ef", 346 "Em", "Eo", "Fx", "Ms", 347 "No", "Ns", "Nx", "Ox", 348 "Pc", "Pf", "Po", "Pq", 349 "Qc", "Ql", "Qo", "Qq", 350 "Re", "Rs", "Sc", "So", 351 "Sq", "Sm", "Sx", "Sy", 352 "Tn", "Ux", "Xc", "Xo", 353 "Fo", "Fc", "Oo", "Oc", 354 "Bk", "Ek", "Bt", "Hf", 355 "Fr", "Ud", "Lb", "Lp", 356 "Lk", "Mt", "Brq", "Bro", 357 "Brc", "%C", "Es", "En", 358 "Dx", "%Q", "%U", "Ta", 359 "Tg", NULL, 360 "TH", "SH", "SS", "TP", 361 "TQ", 362 "LP", "PP", "P", "IP", 363 "HP", "SM", "SB", "BI", 364 "IB", "BR", "RB", "R", 365 "B", "I", "IR", "RI", 366 "RE", "RS", "DT", "UC", 367 "PD", "AT", "in", 368 "SY", "YS", "OP", 369 "EX", "EE", "UR", 370 "UE", "MT", "ME", NULL 371 }; 372 const char *const *roff_name = __roff_name; 373 374 static struct roffmac roffs[TOKEN_NONE] = { 375 { roff_noarg, NULL, NULL, 0 }, /* br */ 376 { roff_onearg, NULL, NULL, 0 }, /* ce */ 377 { roff_noarg, NULL, NULL, 0 }, /* fi */ 378 { roff_onearg, NULL, NULL, 0 }, /* ft */ 379 { roff_onearg, NULL, NULL, 0 }, /* ll */ 380 { roff_onearg, NULL, NULL, 0 }, /* mc */ 381 { roff_noarg, NULL, NULL, 0 }, /* nf */ 382 { roff_onearg, NULL, NULL, 0 }, /* po */ 383 { roff_onearg, NULL, NULL, 0 }, /* rj */ 384 { roff_onearg, NULL, NULL, 0 }, /* sp */ 385 { roff_manyarg, NULL, NULL, 0 }, /* ta */ 386 { roff_onearg, NULL, NULL, 0 }, /* ti */ 387 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ 388 { roff_unsupp, NULL, NULL, 0 }, /* ab */ 389 { roff_line_ignore, NULL, NULL, 0 }, /* ad */ 390 { roff_line_ignore, NULL, NULL, 0 }, /* af */ 391 { roff_unsupp, NULL, NULL, 0 }, /* aln */ 392 { roff_als, NULL, NULL, 0 }, /* als */ 393 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ 394 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ 395 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ 396 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ 397 { roff_ds, NULL, NULL, 0 }, /* as */ 398 { roff_ds, NULL, NULL, 0 }, /* as1 */ 399 { roff_unsupp, NULL, NULL, 0 }, /* asciify */ 400 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ 401 { roff_line_ignore, NULL, NULL, 0 }, /* bd */ 402 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ 403 { roff_unsupp, NULL, NULL, 0 }, /* blm */ 404 { roff_unsupp, NULL, NULL, 0 }, /* box */ 405 { roff_unsupp, NULL, NULL, 0 }, /* boxa */ 406 { roff_line_ignore, NULL, NULL, 0 }, /* bp */ 407 { roff_unsupp, NULL, NULL, 0 }, /* BP */ 408 { roff_break, NULL, NULL, 0 }, /* break */ 409 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ 410 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ 411 { roff_noarg, NULL, NULL, 0 }, /* brp */ 412 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ 413 { roff_unsupp, NULL, NULL, 0 }, /* c2 */ 414 { roff_cc, NULL, NULL, 0 }, /* cc */ 415 { roff_insec, NULL, NULL, 0 }, /* cf */ 416 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ 417 { roff_line_ignore, NULL, NULL, 0 }, /* ch */ 418 { roff_char, NULL, NULL, 0 }, /* char */ 419 { roff_unsupp, NULL, NULL, 0 }, /* chop */ 420 { roff_line_ignore, NULL, NULL, 0 }, /* class */ 421 { roff_insec, NULL, NULL, 0 }, /* close */ 422 { roff_unsupp, NULL, NULL, 0 }, /* CL */ 423 { roff_line_ignore, NULL, NULL, 0 }, /* color */ 424 { roff_unsupp, NULL, NULL, 0 }, /* composite */ 425 { roff_unsupp, NULL, NULL, 0 }, /* continue */ 426 { roff_line_ignore, NULL, NULL, 0 }, /* cp */ 427 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ 428 { roff_line_ignore, NULL, NULL, 0 }, /* cs */ 429 { roff_line_ignore, NULL, NULL, 0 }, /* cu */ 430 { roff_unsupp, NULL, NULL, 0 }, /* da */ 431 { roff_unsupp, NULL, NULL, 0 }, /* dch */ 432 { roff_Dd, NULL, NULL, 0 }, /* Dd */ 433 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ 434 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ 435 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ 436 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ 437 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ 438 { roff_unsupp, NULL, NULL, 0 }, /* device */ 439 { roff_unsupp, NULL, NULL, 0 }, /* devicem */ 440 { roff_unsupp, NULL, NULL, 0 }, /* di */ 441 { roff_unsupp, NULL, NULL, 0 }, /* do */ 442 { roff_ds, NULL, NULL, 0 }, /* ds */ 443 { roff_ds, NULL, NULL, 0 }, /* ds1 */ 444 { roff_unsupp, NULL, NULL, 0 }, /* dwh */ 445 { roff_unsupp, NULL, NULL, 0 }, /* dt */ 446 { roff_ec, NULL, NULL, 0 }, /* ec */ 447 { roff_unsupp, NULL, NULL, 0 }, /* ecr */ 448 { roff_unsupp, NULL, NULL, 0 }, /* ecs */ 449 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ 450 { roff_unsupp, NULL, NULL, 0 }, /* em */ 451 { roff_EN, NULL, NULL, 0 }, /* EN */ 452 { roff_eo, NULL, NULL, 0 }, /* eo */ 453 { roff_unsupp, NULL, NULL, 0 }, /* EP */ 454 { roff_EQ, NULL, NULL, 0 }, /* EQ */ 455 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ 456 { roff_unsupp, NULL, NULL, 0 }, /* ev */ 457 { roff_unsupp, NULL, NULL, 0 }, /* evc */ 458 { roff_unsupp, NULL, NULL, 0 }, /* ex */ 459 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ 460 { roff_line_ignore, NULL, NULL, 0 }, /* fam */ 461 { roff_unsupp, NULL, NULL, 0 }, /* fc */ 462 { roff_unsupp, NULL, NULL, 0 }, /* fchar */ 463 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ 464 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ 465 { roff_line_ignore, NULL, NULL, 0 }, /* feature */ 466 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ 467 { roff_line_ignore, NULL, NULL, 0 }, /* fl */ 468 { roff_line_ignore, NULL, NULL, 0 }, /* flig */ 469 { roff_line_ignore, NULL, NULL, 0 }, /* fp */ 470 { roff_line_ignore, NULL, NULL, 0 }, /* fps */ 471 { roff_unsupp, NULL, NULL, 0 }, /* fschar */ 472 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ 473 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ 474 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ 475 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ 476 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ 477 { roff_line_ignore, NULL, NULL, 0 }, /* hc */ 478 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ 479 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ 480 { roff_line_ignore, NULL, NULL, 0 }, /* hla */ 481 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ 482 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ 483 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ 484 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ 485 { roff_line_ignore, NULL, NULL, 0 }, /* hw */ 486 { roff_line_ignore, NULL, NULL, 0 }, /* hy */ 487 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ 488 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ 489 { roff_line_ignore, NULL, NULL, 0 }, /* hym */ 490 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ 491 { roff_line_ignore, NULL, NULL, 0 }, /* hys */ 492 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ 493 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ 494 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ 495 { roff_unsupp, NULL, NULL, 0 }, /* index */ 496 { roff_it, NULL, NULL, 0 }, /* it */ 497 { roff_unsupp, NULL, NULL, 0 }, /* itc */ 498 { roff_line_ignore, NULL, NULL, 0 }, /* IX */ 499 { roff_line_ignore, NULL, NULL, 0 }, /* kern */ 500 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ 501 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ 502 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ 503 { roff_unsupp, NULL, NULL, 0 }, /* lc */ 504 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ 505 { roff_unsupp, NULL, NULL, 0 }, /* lds */ 506 { roff_unsupp, NULL, NULL, 0 }, /* length */ 507 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ 508 { roff_insec, NULL, NULL, 0 }, /* lf */ 509 { roff_line_ignore, NULL, NULL, 0 }, /* lg */ 510 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ 511 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ 512 { roff_unsupp, NULL, NULL, 0 }, /* lnr */ 513 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ 514 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ 515 { roff_line_ignore, NULL, NULL, 0 }, /* ls */ 516 { roff_unsupp, NULL, NULL, 0 }, /* lsm */ 517 { roff_line_ignore, NULL, NULL, 0 }, /* lt */ 518 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ 519 { roff_line_ignore, NULL, NULL, 0 }, /* minss */ 520 { roff_line_ignore, NULL, NULL, 0 }, /* mk */ 521 { roff_insec, NULL, NULL, 0 }, /* mso */ 522 { roff_line_ignore, NULL, NULL, 0 }, /* na */ 523 { roff_line_ignore, NULL, NULL, 0 }, /* ne */ 524 { roff_line_ignore, NULL, NULL, 0 }, /* nh */ 525 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ 526 { roff_unsupp, NULL, NULL, 0 }, /* nm */ 527 { roff_unsupp, NULL, NULL, 0 }, /* nn */ 528 { roff_nop, NULL, NULL, 0 }, /* nop */ 529 { roff_nr, NULL, NULL, 0 }, /* nr */ 530 { roff_unsupp, NULL, NULL, 0 }, /* nrf */ 531 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ 532 { roff_line_ignore, NULL, NULL, 0 }, /* ns */ 533 { roff_insec, NULL, NULL, 0 }, /* nx */ 534 { roff_insec, NULL, NULL, 0 }, /* open */ 535 { roff_insec, NULL, NULL, 0 }, /* opena */ 536 { roff_line_ignore, NULL, NULL, 0 }, /* os */ 537 { roff_unsupp, NULL, NULL, 0 }, /* output */ 538 { roff_line_ignore, NULL, NULL, 0 }, /* padj */ 539 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ 540 { roff_line_ignore, NULL, NULL, 0 }, /* pc */ 541 { roff_line_ignore, NULL, NULL, 0 }, /* pev */ 542 { roff_insec, NULL, NULL, 0 }, /* pi */ 543 { roff_unsupp, NULL, NULL, 0 }, /* PI */ 544 { roff_line_ignore, NULL, NULL, 0 }, /* pl */ 545 { roff_line_ignore, NULL, NULL, 0 }, /* pm */ 546 { roff_line_ignore, NULL, NULL, 0 }, /* pn */ 547 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ 548 { roff_line_ignore, NULL, NULL, 0 }, /* ps */ 549 { roff_unsupp, NULL, NULL, 0 }, /* psbb */ 550 { roff_unsupp, NULL, NULL, 0 }, /* pshape */ 551 { roff_insec, NULL, NULL, 0 }, /* pso */ 552 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ 553 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ 554 { roff_unsupp, NULL, NULL, 0 }, /* rchar */ 555 { roff_line_ignore, NULL, NULL, 0 }, /* rd */ 556 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ 557 { roff_return, NULL, NULL, 0 }, /* return */ 558 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ 559 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ 560 { roff_rm, NULL, NULL, 0 }, /* rm */ 561 { roff_rn, NULL, NULL, 0 }, /* rn */ 562 { roff_unsupp, NULL, NULL, 0 }, /* rnn */ 563 { roff_rr, NULL, NULL, 0 }, /* rr */ 564 { roff_line_ignore, NULL, NULL, 0 }, /* rs */ 565 { roff_line_ignore, NULL, NULL, 0 }, /* rt */ 566 { roff_unsupp, NULL, NULL, 0 }, /* schar */ 567 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ 568 { roff_line_ignore, NULL, NULL, 0 }, /* shc */ 569 { roff_shift, NULL, NULL, 0 }, /* shift */ 570 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ 571 { roff_so, NULL, NULL, 0 }, /* so */ 572 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ 573 { roff_line_ignore, NULL, NULL, 0 }, /* special */ 574 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ 575 { roff_line_ignore, NULL, NULL, 0 }, /* ss */ 576 { roff_line_ignore, NULL, NULL, 0 }, /* sty */ 577 { roff_unsupp, NULL, NULL, 0 }, /* substring */ 578 { roff_line_ignore, NULL, NULL, 0 }, /* sv */ 579 { roff_insec, NULL, NULL, 0 }, /* sy */ 580 { roff_T_, NULL, NULL, 0 }, /* T& */ 581 { roff_unsupp, NULL, NULL, 0 }, /* tc */ 582 { roff_TE, NULL, NULL, 0 }, /* TE */ 583 { roff_Dd, NULL, NULL, 0 }, /* TH */ 584 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ 585 { roff_unsupp, NULL, NULL, 0 }, /* tl */ 586 { roff_line_ignore, NULL, NULL, 0 }, /* tm */ 587 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ 588 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ 589 { roff_tr, NULL, NULL, 0 }, /* tr */ 590 { roff_line_ignore, NULL, NULL, 0 }, /* track */ 591 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ 592 { roff_insec, NULL, NULL, 0 }, /* trf */ 593 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ 594 { roff_unsupp, NULL, NULL, 0 }, /* trin */ 595 { roff_unsupp, NULL, NULL, 0 }, /* trnt */ 596 { roff_line_ignore, NULL, NULL, 0 }, /* troff */ 597 { roff_TS, NULL, NULL, 0 }, /* TS */ 598 { roff_line_ignore, NULL, NULL, 0 }, /* uf */ 599 { roff_line_ignore, NULL, NULL, 0 }, /* ul */ 600 { roff_unsupp, NULL, NULL, 0 }, /* unformat */ 601 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ 602 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ 603 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ 604 { roff_line_ignore, NULL, NULL, 0 }, /* vs */ 605 { roff_line_ignore, NULL, NULL, 0 }, /* warn */ 606 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ 607 { roff_line_ignore, NULL, NULL, 0 }, /* watch */ 608 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ 609 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ 610 { roff_unsupp, NULL, NULL, 0 }, /* wh */ 611 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/ 612 { roff_insec, NULL, NULL, 0 }, /* write */ 613 { roff_insec, NULL, NULL, 0 }, /* writec */ 614 { roff_insec, NULL, NULL, 0 }, /* writem */ 615 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ 616 { roff_cblock, NULL, NULL, 0 }, /* . */ 617 { roff_renamed, NULL, NULL, 0 }, 618 { roff_userdef, NULL, NULL, 0 } 619 }; 620 621 /* Array of injected predefined strings. */ 622 #define PREDEFS_MAX 38 623 static const struct predef predefs[PREDEFS_MAX] = { 624 #include "predefs.in" 625 }; 626 627 static int roffce_lines; /* number of input lines to center */ 628 static struct roff_node *roffce_node; /* active request */ 629 static int roffit_lines; /* number of lines to delay */ 630 static char *roffit_macro; /* nil-terminated macro line */ 631 632 633 /* --- request table ------------------------------------------------------ */ 634 635 struct ohash * 636 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) 637 { 638 struct ohash *htab; 639 struct roffreq *req; 640 enum roff_tok tok; 641 size_t sz; 642 unsigned int slot; 643 644 htab = mandoc_malloc(sizeof(*htab)); 645 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); 646 647 for (tok = mintok; tok < maxtok; tok++) { 648 if (roff_name[tok] == NULL) 649 continue; 650 sz = strlen(roff_name[tok]); 651 req = mandoc_malloc(sizeof(*req) + sz + 1); 652 req->tok = tok; 653 memcpy(req->name, roff_name[tok], sz + 1); 654 slot = ohash_qlookup(htab, req->name); 655 ohash_insert(htab, slot, req); 656 } 657 return htab; 658 } 659 660 void 661 roffhash_free(struct ohash *htab) 662 { 663 struct roffreq *req; 664 unsigned int slot; 665 666 if (htab == NULL) 667 return; 668 for (req = ohash_first(htab, &slot); req != NULL; 669 req = ohash_next(htab, &slot)) 670 free(req); 671 ohash_delete(htab); 672 free(htab); 673 } 674 675 enum roff_tok 676 roffhash_find(struct ohash *htab, const char *name, size_t sz) 677 { 678 struct roffreq *req; 679 const char *end; 680 681 if (sz) { 682 end = name + sz; 683 req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); 684 } else 685 req = ohash_find(htab, ohash_qlookup(htab, name)); 686 return req == NULL ? TOKEN_NONE : req->tok; 687 } 688 689 /* --- stack of request blocks -------------------------------------------- */ 690 691 /* 692 * Pop the current node off of the stack of roff instructions currently 693 * pending. Return 1 if it is a loop or 0 otherwise. 694 */ 695 static int 696 roffnode_pop(struct roff *r) 697 { 698 struct roffnode *p; 699 int inloop; 700 701 p = r->last; 702 inloop = p->tok == ROFF_while; 703 r->last = p->parent; 704 free(p->name); 705 free(p->end); 706 free(p); 707 return inloop; 708 } 709 710 /* 711 * Push a roff node onto the instruction stack. This must later be 712 * removed with roffnode_pop(). 713 */ 714 static void 715 roffnode_push(struct roff *r, enum roff_tok tok, const char *name, 716 int line, int col) 717 { 718 struct roffnode *p; 719 720 p = mandoc_calloc(1, sizeof(struct roffnode)); 721 p->tok = tok; 722 if (name) 723 p->name = mandoc_strdup(name); 724 p->parent = r->last; 725 p->line = line; 726 p->col = col; 727 p->rule = p->parent ? p->parent->rule : 0; 728 729 r->last = p; 730 } 731 732 /* --- roff parser state data management ---------------------------------- */ 733 734 static void 735 roff_free1(struct roff *r) 736 { 737 int i; 738 739 tbl_free(r->first_tbl); 740 r->first_tbl = r->last_tbl = r->tbl = NULL; 741 742 eqn_free(r->last_eqn); 743 r->last_eqn = r->eqn = NULL; 744 745 while (r->mstackpos >= 0) 746 roff_userret(r); 747 748 while (r->last) 749 roffnode_pop(r); 750 751 free (r->rstack); 752 r->rstack = NULL; 753 r->rstacksz = 0; 754 r->rstackpos = -1; 755 756 roff_freereg(r->regtab); 757 r->regtab = NULL; 758 759 roff_freestr(r->strtab); 760 roff_freestr(r->rentab); 761 roff_freestr(r->xmbtab); 762 r->strtab = r->rentab = r->xmbtab = NULL; 763 764 if (r->xtab) 765 for (i = 0; i < 128; i++) 766 free(r->xtab[i].p); 767 free(r->xtab); 768 r->xtab = NULL; 769 } 770 771 void 772 roff_reset(struct roff *r) 773 { 774 roff_free1(r); 775 r->options |= MPARSE_COMMENT; 776 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 777 r->control = '\0'; 778 r->escape = '\\'; 779 roffce_lines = 0; 780 roffce_node = NULL; 781 roffit_lines = 0; 782 roffit_macro = NULL; 783 } 784 785 void 786 roff_free(struct roff *r) 787 { 788 int i; 789 790 roff_free1(r); 791 for (i = 0; i < r->mstacksz; i++) 792 free(r->mstack[i].argv); 793 free(r->mstack); 794 roffhash_free(r->reqtab); 795 free(r); 796 } 797 798 struct roff * 799 roff_alloc(int options) 800 { 801 struct roff *r; 802 803 r = mandoc_calloc(1, sizeof(struct roff)); 804 r->reqtab = roffhash_alloc(0, ROFF_RENAMED); 805 r->options = options | MPARSE_COMMENT; 806 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 807 r->mstackpos = -1; 808 r->rstackpos = -1; 809 r->escape = '\\'; 810 return r; 811 } 812 813 /* --- syntax tree state data management ---------------------------------- */ 814 815 static void 816 roff_man_free1(struct roff_man *man) 817 { 818 if (man->meta.first != NULL) 819 roff_node_delete(man, man->meta.first); 820 free(man->meta.msec); 821 free(man->meta.vol); 822 free(man->meta.os); 823 free(man->meta.arch); 824 free(man->meta.title); 825 free(man->meta.name); 826 free(man->meta.date); 827 free(man->meta.sodest); 828 } 829 830 void 831 roff_state_reset(struct roff_man *man) 832 { 833 man->last = man->meta.first; 834 man->last_es = NULL; 835 man->flags = 0; 836 man->lastsec = man->lastnamed = SEC_NONE; 837 man->next = ROFF_NEXT_CHILD; 838 roff_setreg(man->roff, "nS", 0, '='); 839 } 840 841 static void 842 roff_man_alloc1(struct roff_man *man) 843 { 844 memset(&man->meta, 0, sizeof(man->meta)); 845 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first)); 846 man->meta.first->type = ROFFT_ROOT; 847 man->meta.macroset = MACROSET_NONE; 848 roff_state_reset(man); 849 } 850 851 void 852 roff_man_reset(struct roff_man *man) 853 { 854 roff_man_free1(man); 855 roff_man_alloc1(man); 856 } 857 858 void 859 roff_man_free(struct roff_man *man) 860 { 861 roff_man_free1(man); 862 free(man->os_r); 863 free(man); 864 } 865 866 struct roff_man * 867 roff_man_alloc(struct roff *roff, const char *os_s, int quick) 868 { 869 struct roff_man *man; 870 871 man = mandoc_calloc(1, sizeof(*man)); 872 man->roff = roff; 873 man->os_s = os_s; 874 man->quick = quick; 875 roff_man_alloc1(man); 876 roff->man = man; 877 return man; 878 } 879 880 /* --- syntax tree handling ----------------------------------------------- */ 881 882 struct roff_node * 883 roff_node_alloc(struct roff_man *man, int line, int pos, 884 enum roff_type type, int tok) 885 { 886 struct roff_node *n; 887 888 n = mandoc_calloc(1, sizeof(*n)); 889 n->line = line; 890 n->pos = pos; 891 n->tok = tok; 892 n->type = type; 893 n->sec = man->lastsec; 894 895 if (man->flags & MDOC_SYNOPSIS) 896 n->flags |= NODE_SYNPRETTY; 897 else 898 n->flags &= ~NODE_SYNPRETTY; 899 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL) 900 n->flags |= NODE_NOFILL; 901 else 902 n->flags &= ~NODE_NOFILL; 903 if (man->flags & MDOC_NEWLINE) 904 n->flags |= NODE_LINE; 905 man->flags &= ~MDOC_NEWLINE; 906 907 return n; 908 } 909 910 void 911 roff_node_append(struct roff_man *man, struct roff_node *n) 912 { 913 914 switch (man->next) { 915 case ROFF_NEXT_SIBLING: 916 if (man->last->next != NULL) { 917 n->next = man->last->next; 918 man->last->next->prev = n; 919 } else 920 man->last->parent->last = n; 921 man->last->next = n; 922 n->prev = man->last; 923 n->parent = man->last->parent; 924 break; 925 case ROFF_NEXT_CHILD: 926 if (man->last->child != NULL) { 927 n->next = man->last->child; 928 man->last->child->prev = n; 929 } else 930 man->last->last = n; 931 man->last->child = n; 932 n->parent = man->last; 933 break; 934 default: 935 abort(); 936 } 937 man->last = n; 938 939 switch (n->type) { 940 case ROFFT_HEAD: 941 n->parent->head = n; 942 break; 943 case ROFFT_BODY: 944 if (n->end != ENDBODY_NOT) 945 return; 946 n->parent->body = n; 947 break; 948 case ROFFT_TAIL: 949 n->parent->tail = n; 950 break; 951 default: 952 return; 953 } 954 955 /* 956 * Copy over the normalised-data pointer of our parent. Not 957 * everybody has one, but copying a null pointer is fine. 958 */ 959 960 n->norm = n->parent->norm; 961 assert(n->parent->type == ROFFT_BLOCK); 962 } 963 964 void 965 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 966 { 967 struct roff_node *n; 968 969 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 970 n->string = roff_strdup(man->roff, word); 971 roff_node_append(man, n); 972 n->flags |= NODE_VALID | NODE_ENDED; 973 man->next = ROFF_NEXT_SIBLING; 974 } 975 976 void 977 roff_word_append(struct roff_man *man, const char *word) 978 { 979 struct roff_node *n; 980 char *addstr, *newstr; 981 982 n = man->last; 983 addstr = roff_strdup(man->roff, word); 984 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 985 free(addstr); 986 free(n->string); 987 n->string = newstr; 988 man->next = ROFF_NEXT_SIBLING; 989 } 990 991 void 992 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 993 { 994 struct roff_node *n; 995 996 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 997 roff_node_append(man, n); 998 man->next = ROFF_NEXT_CHILD; 999 } 1000 1001 struct roff_node * 1002 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 1003 { 1004 struct roff_node *n; 1005 1006 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 1007 roff_node_append(man, n); 1008 man->next = ROFF_NEXT_CHILD; 1009 return n; 1010 } 1011 1012 struct roff_node * 1013 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 1014 { 1015 struct roff_node *n; 1016 1017 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 1018 roff_node_append(man, n); 1019 man->next = ROFF_NEXT_CHILD; 1020 return n; 1021 } 1022 1023 struct roff_node * 1024 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 1025 { 1026 struct roff_node *n; 1027 1028 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 1029 roff_node_append(man, n); 1030 man->next = ROFF_NEXT_CHILD; 1031 return n; 1032 } 1033 1034 static void 1035 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl) 1036 { 1037 struct roff_node *n; 1038 struct tbl_span *span; 1039 1040 if (man->meta.macroset == MACROSET_MAN) 1041 man_breakscope(man, ROFF_TS); 1042 while ((span = tbl_span(tbl)) != NULL) { 1043 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE); 1044 n->span = span; 1045 roff_node_append(man, n); 1046 n->flags |= NODE_VALID | NODE_ENDED; 1047 man->next = ROFF_NEXT_SIBLING; 1048 } 1049 } 1050 1051 void 1052 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1053 { 1054 1055 /* Adjust siblings. */ 1056 1057 if (n->prev) 1058 n->prev->next = n->next; 1059 if (n->next) 1060 n->next->prev = n->prev; 1061 1062 /* Adjust parent. */ 1063 1064 if (n->parent != NULL) { 1065 if (n->parent->child == n) 1066 n->parent->child = n->next; 1067 if (n->parent->last == n) 1068 n->parent->last = n->prev; 1069 } 1070 1071 /* Adjust parse point. */ 1072 1073 if (man == NULL) 1074 return; 1075 if (man->last == n) { 1076 if (n->prev == NULL) { 1077 man->last = n->parent; 1078 man->next = ROFF_NEXT_CHILD; 1079 } else { 1080 man->last = n->prev; 1081 man->next = ROFF_NEXT_SIBLING; 1082 } 1083 } 1084 if (man->meta.first == n) 1085 man->meta.first = NULL; 1086 } 1087 1088 void 1089 roff_node_relink(struct roff_man *man, struct roff_node *n) 1090 { 1091 roff_node_unlink(man, n); 1092 n->prev = n->next = NULL; 1093 roff_node_append(man, n); 1094 } 1095 1096 void 1097 roff_node_free(struct roff_node *n) 1098 { 1099 1100 if (n->args != NULL) 1101 mdoc_argv_free(n->args); 1102 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1103 free(n->norm); 1104 eqn_box_free(n->eqn); 1105 free(n->string); 1106 free(n->tag); 1107 free(n); 1108 } 1109 1110 void 1111 roff_node_delete(struct roff_man *man, struct roff_node *n) 1112 { 1113 1114 while (n->child != NULL) 1115 roff_node_delete(man, n->child); 1116 roff_node_unlink(man, n); 1117 roff_node_free(n); 1118 } 1119 1120 int 1121 roff_node_transparent(struct roff_node *n) 1122 { 1123 if (n == NULL) 1124 return 0; 1125 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) 1126 return 1; 1127 return roff_tok_transparent(n->tok); 1128 } 1129 1130 int 1131 roff_tok_transparent(enum roff_tok tok) 1132 { 1133 switch (tok) { 1134 case ROFF_ft: 1135 case ROFF_ll: 1136 case ROFF_mc: 1137 case ROFF_po: 1138 case ROFF_ta: 1139 case MDOC_Db: 1140 case MDOC_Es: 1141 case MDOC_Sm: 1142 case MDOC_Tg: 1143 case MAN_DT: 1144 case MAN_UC: 1145 case MAN_PD: 1146 case MAN_AT: 1147 return 1; 1148 default: 1149 return 0; 1150 } 1151 } 1152 1153 struct roff_node * 1154 roff_node_child(struct roff_node *n) 1155 { 1156 for (n = n->child; roff_node_transparent(n); n = n->next) 1157 continue; 1158 return n; 1159 } 1160 1161 struct roff_node * 1162 roff_node_prev(struct roff_node *n) 1163 { 1164 do { 1165 n = n->prev; 1166 } while (roff_node_transparent(n)); 1167 return n; 1168 } 1169 1170 struct roff_node * 1171 roff_node_next(struct roff_node *n) 1172 { 1173 do { 1174 n = n->next; 1175 } while (roff_node_transparent(n)); 1176 return n; 1177 } 1178 1179 void 1180 deroff(char **dest, const struct roff_node *n) 1181 { 1182 char *cp; 1183 size_t sz; 1184 1185 if (n->string == NULL) { 1186 for (n = n->child; n != NULL; n = n->next) 1187 deroff(dest, n); 1188 return; 1189 } 1190 1191 /* Skip leading whitespace. */ 1192 1193 for (cp = n->string; *cp != '\0'; cp++) { 1194 if (cp[0] == '\\' && cp[1] != '\0' && 1195 strchr(" %&0^|~", cp[1]) != NULL) 1196 cp++; 1197 else if ( ! isspace((unsigned char)*cp)) 1198 break; 1199 } 1200 1201 /* Skip trailing backslash. */ 1202 1203 sz = strlen(cp); 1204 if (sz > 0 && cp[sz - 1] == '\\') 1205 sz--; 1206 1207 /* Skip trailing whitespace. */ 1208 1209 for (; sz; sz--) 1210 if ( ! isspace((unsigned char)cp[sz-1])) 1211 break; 1212 1213 /* Skip empty strings. */ 1214 1215 if (sz == 0) 1216 return; 1217 1218 if (*dest == NULL) { 1219 *dest = mandoc_strndup(cp, sz); 1220 return; 1221 } 1222 1223 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1224 free(*dest); 1225 *dest = cp; 1226 } 1227 1228 /* --- main functions of the roff parser ---------------------------------- */ 1229 1230 /* 1231 * In the current line, expand escape sequences that produce parsable 1232 * input text. Also check the syntax of the remaining escape sequences, 1233 * which typically produce output glyphs or change formatter state. 1234 */ 1235 static int 1236 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc) 1237 { 1238 struct mctx *ctx; /* current macro call context */ 1239 char ubuf[24]; /* buffer to print the number */ 1240 struct roff_node *n; /* used for header comments */ 1241 const char *start; /* start of the string to process */ 1242 char *stesc; /* start of an escape sequence ('\\') */ 1243 const char *esct; /* type of esccape sequence */ 1244 char *ep; /* end of comment string */ 1245 const char *stnam; /* start of the name, after "[(*" */ 1246 const char *cp; /* end of the name, e.g. before ']' */ 1247 const char *res; /* the string to be substituted */ 1248 char *nbuf; /* new buffer to copy buf->buf to */ 1249 size_t maxl; /* expected length of the escape name */ 1250 size_t naml; /* actual length of the escape name */ 1251 size_t asz; /* length of the replacement */ 1252 size_t rsz; /* length of the rest of the string */ 1253 int inaml; /* length returned from mandoc_escape() */ 1254 int expand_count; /* to avoid infinite loops */ 1255 int npos; /* position in numeric expression */ 1256 int arg_complete; /* argument not interrupted by eol */ 1257 int quote_args; /* true for \\$@, false for \\$* */ 1258 int done; /* no more input available */ 1259 int deftype; /* type of definition to paste */ 1260 int rcsid; /* kind of RCS id seen */ 1261 enum mandocerr err; /* for escape sequence problems */ 1262 char sign; /* increment number register */ 1263 char term; /* character terminating the escape */ 1264 1265 /* Search forward for comments. */ 1266 1267 done = 0; 1268 start = buf->buf + pos; 1269 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { 1270 if (stesc[0] != newesc || stesc[1] == '\0') 1271 continue; 1272 stesc++; 1273 if (*stesc != '"' && *stesc != '#') 1274 continue; 1275 1276 /* Comment found, look for RCS id. */ 1277 1278 rcsid = 0; 1279 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) { 1280 rcsid = 1 << MANDOC_OS_OPENBSD; 1281 cp += 8; 1282 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) { 1283 rcsid = 1 << MANDOC_OS_NETBSD; 1284 cp += 7; 1285 } 1286 if (cp != NULL && 1287 isalnum((unsigned char)*cp) == 0 && 1288 strchr(cp, '$') != NULL) { 1289 if (r->man->meta.rcsids & rcsid) 1290 mandoc_msg(MANDOCERR_RCS_REP, ln, 1291 (int)(stesc - buf->buf) + 1, 1292 "%s", stesc + 1); 1293 r->man->meta.rcsids |= rcsid; 1294 } 1295 1296 /* Handle trailing whitespace. */ 1297 1298 ep = strchr(stesc--, '\0') - 1; 1299 if (*ep == '\n') { 1300 done = 1; 1301 ep--; 1302 } 1303 if (*ep == ' ' || *ep == '\t') 1304 mandoc_msg(MANDOCERR_SPACE_EOL, 1305 ln, (int)(ep - buf->buf), NULL); 1306 1307 /* 1308 * Save comments preceding the title macro 1309 * in the syntax tree. 1310 */ 1311 1312 if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) { 1313 while (*ep == ' ' || *ep == '\t') 1314 ep--; 1315 ep[1] = '\0'; 1316 n = roff_node_alloc(r->man, 1317 ln, stesc + 1 - buf->buf, 1318 ROFFT_COMMENT, TOKEN_NONE); 1319 n->string = mandoc_strdup(stesc + 2); 1320 roff_node_append(r->man, n); 1321 n->flags |= NODE_VALID | NODE_ENDED; 1322 r->man->next = ROFF_NEXT_SIBLING; 1323 } 1324 1325 /* Line continuation with comment. */ 1326 1327 if (stesc[1] == '#') { 1328 *stesc = '\0'; 1329 return ROFF_IGN | ROFF_APPEND; 1330 } 1331 1332 /* Discard normal comments. */ 1333 1334 while (stesc > start && stesc[-1] == ' ' && 1335 (stesc == start + 1 || stesc[-2] != '\\')) 1336 stesc--; 1337 *stesc = '\0'; 1338 break; 1339 } 1340 if (stesc == start) 1341 return ROFF_CONT; 1342 stesc--; 1343 1344 /* Notice the end of the input. */ 1345 1346 if (*stesc == '\n') { 1347 *stesc-- = '\0'; 1348 done = 1; 1349 } 1350 1351 expand_count = 0; 1352 while (stesc >= start) { 1353 if (*stesc != newesc) { 1354 1355 /* 1356 * If we have a non-standard escape character, 1357 * escape literal backslashes because all 1358 * processing in subsequent functions uses 1359 * the standard escaping rules. 1360 */ 1361 1362 if (newesc != ASCII_ESC && *stesc == '\\') { 1363 *stesc = '\0'; 1364 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", 1365 buf->buf, stesc + 1) + 1; 1366 start = nbuf + pos; 1367 stesc = nbuf + (stesc - buf->buf); 1368 free(buf->buf); 1369 buf->buf = nbuf; 1370 } 1371 1372 /* Search backwards for the next escape. */ 1373 1374 stesc--; 1375 continue; 1376 } 1377 1378 /* If it is escaped, skip it. */ 1379 1380 for (cp = stesc - 1; cp >= start; cp--) 1381 if (*cp != r->escape) 1382 break; 1383 1384 if ((stesc - cp) % 2 == 0) { 1385 while (stesc > cp) 1386 *stesc-- = '\\'; 1387 continue; 1388 } else if (stesc[1] != '\0') { 1389 *stesc = '\\'; 1390 } else { 1391 *stesc-- = '\0'; 1392 if (done) 1393 continue; 1394 else 1395 return ROFF_IGN | ROFF_APPEND; 1396 } 1397 1398 /* Decide whether to expand or to check only. */ 1399 1400 term = '\0'; 1401 cp = stesc + 1; 1402 if (*cp == 'E') 1403 cp++; 1404 esct = cp; 1405 switch (*esct) { 1406 case '*': 1407 case '$': 1408 res = NULL; 1409 break; 1410 case 'B': 1411 case 'w': 1412 term = cp[1]; 1413 /* FALLTHROUGH */ 1414 case 'n': 1415 sign = cp[1]; 1416 if (sign == '+' || sign == '-') 1417 cp++; 1418 res = ubuf; 1419 break; 1420 default: 1421 err = MANDOCERR_OK; 1422 switch(mandoc_escape(&cp, &stnam, &inaml)) { 1423 case ESCAPE_SPECIAL: 1424 if (mchars_spec2cp(stnam, inaml) >= 0) 1425 break; 1426 /* FALLTHROUGH */ 1427 case ESCAPE_ERROR: 1428 err = MANDOCERR_ESC_BAD; 1429 break; 1430 case ESCAPE_UNDEF: 1431 err = MANDOCERR_ESC_UNDEF; 1432 break; 1433 case ESCAPE_UNSUPP: 1434 err = MANDOCERR_ESC_UNSUPP; 1435 break; 1436 default: 1437 break; 1438 } 1439 if (err != MANDOCERR_OK) 1440 mandoc_msg(err, ln, (int)(stesc - buf->buf), 1441 "%.*s", (int)(cp - stesc), stesc); 1442 stesc--; 1443 continue; 1444 } 1445 1446 if (EXPAND_LIMIT < ++expand_count) { 1447 mandoc_msg(MANDOCERR_ROFFLOOP, 1448 ln, (int)(stesc - buf->buf), NULL); 1449 return ROFF_IGN; 1450 } 1451 1452 /* 1453 * The third character decides the length 1454 * of the name of the string or register. 1455 * Save a pointer to the name. 1456 */ 1457 1458 if (term == '\0') { 1459 switch (*++cp) { 1460 case '\0': 1461 maxl = 0; 1462 break; 1463 case '(': 1464 cp++; 1465 maxl = 2; 1466 break; 1467 case '[': 1468 cp++; 1469 term = ']'; 1470 maxl = 0; 1471 break; 1472 default: 1473 maxl = 1; 1474 break; 1475 } 1476 } else { 1477 cp += 2; 1478 maxl = 0; 1479 } 1480 stnam = cp; 1481 1482 /* Advance to the end of the name. */ 1483 1484 naml = 0; 1485 arg_complete = 1; 1486 while (maxl == 0 || naml < maxl) { 1487 if (*cp == '\0') { 1488 mandoc_msg(MANDOCERR_ESC_BAD, ln, 1489 (int)(stesc - buf->buf), "%s", stesc); 1490 arg_complete = 0; 1491 break; 1492 } 1493 if (maxl == 0 && *cp == term) { 1494 cp++; 1495 break; 1496 } 1497 if (*cp++ != '\\' || *esct != 'w') { 1498 naml++; 1499 continue; 1500 } 1501 switch (mandoc_escape(&cp, NULL, NULL)) { 1502 case ESCAPE_SPECIAL: 1503 case ESCAPE_UNICODE: 1504 case ESCAPE_NUMBERED: 1505 case ESCAPE_UNDEF: 1506 case ESCAPE_OVERSTRIKE: 1507 naml++; 1508 break; 1509 default: 1510 break; 1511 } 1512 } 1513 1514 /* 1515 * Retrieve the replacement string; if it is 1516 * undefined, resume searching for escapes. 1517 */ 1518 1519 switch (*esct) { 1520 case '*': 1521 if (arg_complete) { 1522 deftype = ROFFDEF_USER | ROFFDEF_PRE; 1523 res = roff_getstrn(r, stnam, naml, &deftype); 1524 1525 /* 1526 * If not overriden, let \*(.T 1527 * through to the formatters. 1528 */ 1529 1530 if (res == NULL && naml == 2 && 1531 stnam[0] == '.' && stnam[1] == 'T') { 1532 roff_setstrn(&r->strtab, 1533 ".T", 2, NULL, 0, 0); 1534 stesc--; 1535 continue; 1536 } 1537 } 1538 break; 1539 case '$': 1540 if (r->mstackpos < 0) { 1541 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, 1542 (int)(stesc - buf->buf), "%.3s", stesc); 1543 break; 1544 } 1545 ctx = r->mstack + r->mstackpos; 1546 npos = esct[1] - '1'; 1547 if (npos >= 0 && npos <= 8) { 1548 res = npos < ctx->argc ? 1549 ctx->argv[npos] : ""; 1550 break; 1551 } 1552 if (esct[1] == '*') 1553 quote_args = 0; 1554 else if (esct[1] == '@') 1555 quote_args = 1; 1556 else { 1557 mandoc_msg(MANDOCERR_ARG_NONUM, ln, 1558 (int)(stesc - buf->buf), "%.3s", stesc); 1559 break; 1560 } 1561 asz = 0; 1562 for (npos = 0; npos < ctx->argc; npos++) { 1563 if (npos) 1564 asz++; /* blank */ 1565 if (quote_args) 1566 asz += 2; /* quotes */ 1567 asz += strlen(ctx->argv[npos]); 1568 } 1569 if (asz != 3) { 1570 rsz = buf->sz - (stesc - buf->buf) - 3; 1571 if (asz < 3) 1572 memmove(stesc + asz, stesc + 3, rsz); 1573 buf->sz += asz - 3; 1574 nbuf = mandoc_realloc(buf->buf, buf->sz); 1575 start = nbuf + pos; 1576 stesc = nbuf + (stesc - buf->buf); 1577 buf->buf = nbuf; 1578 if (asz > 3) 1579 memmove(stesc + asz, stesc + 3, rsz); 1580 } 1581 for (npos = 0; npos < ctx->argc; npos++) { 1582 if (npos) 1583 *stesc++ = ' '; 1584 if (quote_args) 1585 *stesc++ = '"'; 1586 cp = ctx->argv[npos]; 1587 while (*cp != '\0') 1588 *stesc++ = *cp++; 1589 if (quote_args) 1590 *stesc++ = '"'; 1591 } 1592 continue; 1593 case 'B': 1594 npos = 0; 1595 ubuf[0] = arg_complete && 1596 roff_evalnum(r, ln, stnam, &npos, 1597 NULL, ROFFNUM_SCALE) && 1598 stnam + npos + 1 == cp ? '1' : '0'; 1599 ubuf[1] = '\0'; 1600 break; 1601 case 'n': 1602 if (arg_complete) 1603 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1604 roff_getregn(r, stnam, naml, sign)); 1605 else 1606 ubuf[0] = '\0'; 1607 break; 1608 case 'w': 1609 /* use even incomplete args */ 1610 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1611 24 * (int)naml); 1612 break; 1613 } 1614 1615 if (res == NULL) { 1616 if (*esct == '*') 1617 mandoc_msg(MANDOCERR_STR_UNDEF, 1618 ln, (int)(stesc - buf->buf), 1619 "%.*s", (int)naml, stnam); 1620 res = ""; 1621 } else if (buf->sz + strlen(res) > SHRT_MAX) { 1622 mandoc_msg(MANDOCERR_ROFFLOOP, 1623 ln, (int)(stesc - buf->buf), NULL); 1624 return ROFF_IGN; 1625 } 1626 1627 /* Replace the escape sequence by the string. */ 1628 1629 *stesc = '\0'; 1630 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", 1631 buf->buf, res, cp) + 1; 1632 1633 /* Prepare for the next replacement. */ 1634 1635 start = nbuf + pos; 1636 stesc = nbuf + (stesc - buf->buf) + strlen(res); 1637 free(buf->buf); 1638 buf->buf = nbuf; 1639 } 1640 return ROFF_CONT; 1641 } 1642 1643 /* 1644 * Parse a quoted or unquoted roff-style request or macro argument. 1645 * Return a pointer to the parsed argument, which is either the original 1646 * pointer or advanced by one byte in case the argument is quoted. 1647 * NUL-terminate the argument in place. 1648 * Collapse pairs of quotes inside quoted arguments. 1649 * Advance the argument pointer to the next argument, 1650 * or to the NUL byte terminating the argument line. 1651 */ 1652 char * 1653 roff_getarg(struct roff *r, char **cpp, int ln, int *pos) 1654 { 1655 struct buf buf; 1656 char *cp, *start; 1657 int newesc, pairs, quoted, white; 1658 1659 /* Quoting can only start with a new word. */ 1660 start = *cpp; 1661 quoted = 0; 1662 if ('"' == *start) { 1663 quoted = 1; 1664 start++; 1665 } 1666 1667 newesc = pairs = white = 0; 1668 for (cp = start; '\0' != *cp; cp++) { 1669 1670 /* 1671 * Move the following text left 1672 * after quoted quotes and after "\\" and "\t". 1673 */ 1674 if (pairs) 1675 cp[-pairs] = cp[0]; 1676 1677 if ('\\' == cp[0]) { 1678 /* 1679 * In copy mode, translate double to single 1680 * backslashes and backslash-t to literal tabs. 1681 */ 1682 switch (cp[1]) { 1683 case 'a': 1684 case 't': 1685 cp[-pairs] = '\t'; 1686 pairs++; 1687 cp++; 1688 break; 1689 case '\\': 1690 newesc = 1; 1691 cp[-pairs] = ASCII_ESC; 1692 pairs++; 1693 cp++; 1694 break; 1695 case ' ': 1696 /* Skip escaped blanks. */ 1697 if (0 == quoted) 1698 cp++; 1699 break; 1700 default: 1701 break; 1702 } 1703 } else if (0 == quoted) { 1704 if (' ' == cp[0]) { 1705 /* Unescaped blanks end unquoted args. */ 1706 white = 1; 1707 break; 1708 } 1709 } else if ('"' == cp[0]) { 1710 if ('"' == cp[1]) { 1711 /* Quoted quotes collapse. */ 1712 pairs++; 1713 cp++; 1714 } else { 1715 /* Unquoted quotes end quoted args. */ 1716 quoted = 2; 1717 break; 1718 } 1719 } 1720 } 1721 1722 /* Quoted argument without a closing quote. */ 1723 if (1 == quoted) 1724 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL); 1725 1726 /* NUL-terminate this argument and move to the next one. */ 1727 if (pairs) 1728 cp[-pairs] = '\0'; 1729 if ('\0' != *cp) { 1730 *cp++ = '\0'; 1731 while (' ' == *cp) 1732 cp++; 1733 } 1734 *pos += (int)(cp - start) + (quoted ? 1 : 0); 1735 *cpp = cp; 1736 1737 if ('\0' == *cp && (white || ' ' == cp[-1])) 1738 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); 1739 1740 start = mandoc_strdup(start); 1741 if (newesc == 0) 1742 return start; 1743 1744 buf.buf = start; 1745 buf.sz = strlen(start) + 1; 1746 buf.next = NULL; 1747 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { 1748 free(buf.buf); 1749 buf.buf = mandoc_strdup(""); 1750 } 1751 return buf.buf; 1752 } 1753 1754 1755 /* 1756 * Process text streams. 1757 */ 1758 static int 1759 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) 1760 { 1761 size_t sz; 1762 const char *start; 1763 char *p; 1764 int isz; 1765 enum mandoc_esc esc; 1766 1767 /* Spring the input line trap. */ 1768 1769 if (roffit_lines == 1) { 1770 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1771 free(buf->buf); 1772 buf->buf = p; 1773 buf->sz = isz + 1; 1774 *offs = 0; 1775 free(roffit_macro); 1776 roffit_lines = 0; 1777 return ROFF_REPARSE; 1778 } else if (roffit_lines > 1) 1779 --roffit_lines; 1780 1781 if (roffce_node != NULL && buf->buf[pos] != '\0') { 1782 if (roffce_lines < 1) { 1783 r->man->last = roffce_node; 1784 r->man->next = ROFF_NEXT_SIBLING; 1785 roffce_lines = 0; 1786 roffce_node = NULL; 1787 } else 1788 roffce_lines--; 1789 } 1790 1791 /* Convert all breakable hyphens into ASCII_HYPH. */ 1792 1793 start = p = buf->buf + pos; 1794 1795 while (*p != '\0') { 1796 sz = strcspn(p, "-\\"); 1797 p += sz; 1798 1799 if (*p == '\0') 1800 break; 1801 1802 if (*p == '\\') { 1803 /* Skip over escapes. */ 1804 p++; 1805 esc = mandoc_escape((const char **)&p, NULL, NULL); 1806 if (esc == ESCAPE_ERROR) 1807 break; 1808 while (*p == '-') 1809 p++; 1810 continue; 1811 } else if (p == start) { 1812 p++; 1813 continue; 1814 } 1815 1816 if (isalpha((unsigned char)p[-1]) && 1817 isalpha((unsigned char)p[1])) 1818 *p = ASCII_HYPH; 1819 p++; 1820 } 1821 return ROFF_CONT; 1822 } 1823 1824 int 1825 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len) 1826 { 1827 enum roff_tok t; 1828 int e; 1829 int pos; /* parse point */ 1830 int spos; /* saved parse point for messages */ 1831 int ppos; /* original offset in buf->buf */ 1832 int ctl; /* macro line (boolean) */ 1833 1834 ppos = pos = *offs; 1835 1836 if (len > 80 && r->tbl == NULL && r->eqn == NULL && 1837 (r->man->flags & ROFF_NOFILL) == 0 && 1838 strchr(" .\\", buf->buf[pos]) == NULL && 1839 buf->buf[pos] != r->control && 1840 strcspn(buf->buf, " ") < 80) 1841 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1, 1842 "%.20s...", buf->buf + pos); 1843 1844 /* Handle in-line equation delimiters. */ 1845 1846 if (r->tbl == NULL && 1847 r->last_eqn != NULL && r->last_eqn->delim && 1848 (r->eqn == NULL || r->eqn_inline)) { 1849 e = roff_eqndelim(r, buf, pos); 1850 if (e == ROFF_REPARSE) 1851 return e; 1852 assert(e == ROFF_CONT); 1853 } 1854 1855 /* Expand some escape sequences. */ 1856 1857 e = roff_expand(r, buf, ln, pos, r->escape); 1858 if ((e & ROFF_MASK) == ROFF_IGN) 1859 return e; 1860 assert(e == ROFF_CONT); 1861 1862 ctl = roff_getcontrol(r, buf->buf, &pos); 1863 1864 /* 1865 * First, if a scope is open and we're not a macro, pass the 1866 * text through the macro's filter. 1867 * Equations process all content themselves. 1868 * Tables process almost all content themselves, but we want 1869 * to warn about macros before passing it there. 1870 */ 1871 1872 if (r->last != NULL && ! ctl) { 1873 t = r->last->tok; 1874 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1875 if ((e & ROFF_MASK) == ROFF_IGN) 1876 return e; 1877 e &= ~ROFF_MASK; 1878 } else 1879 e = ROFF_IGN; 1880 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { 1881 eqn_read(r->eqn, buf->buf + ppos); 1882 return e; 1883 } 1884 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { 1885 tbl_read(r->tbl, ln, buf->buf, ppos); 1886 roff_addtbl(r->man, ln, r->tbl); 1887 return e; 1888 } 1889 if ( ! ctl) { 1890 r->options &= ~MPARSE_COMMENT; 1891 return roff_parsetext(r, buf, pos, offs) | e; 1892 } 1893 1894 /* Skip empty request lines. */ 1895 1896 if (buf->buf[pos] == '"') { 1897 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL); 1898 return ROFF_IGN; 1899 } else if (buf->buf[pos] == '\0') 1900 return ROFF_IGN; 1901 1902 /* 1903 * If a scope is open, go to the child handler for that macro, 1904 * as it may want to preprocess before doing anything with it. 1905 * Don't do so if an equation is open. 1906 */ 1907 1908 if (r->last) { 1909 t = r->last->tok; 1910 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1911 } 1912 1913 /* No scope is open. This is a new request or macro. */ 1914 1915 r->options &= ~MPARSE_COMMENT; 1916 spos = pos; 1917 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1918 1919 /* Tables ignore most macros. */ 1920 1921 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS || 1922 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) { 1923 mandoc_msg(MANDOCERR_TBLMACRO, 1924 ln, pos, "%s", buf->buf + spos); 1925 if (t != TOKEN_NONE) 1926 return ROFF_IGN; 1927 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1928 pos++; 1929 while (buf->buf[pos] == ' ') 1930 pos++; 1931 tbl_read(r->tbl, ln, buf->buf, pos); 1932 roff_addtbl(r->man, ln, r->tbl); 1933 return ROFF_IGN; 1934 } 1935 1936 /* For now, let high level macros abort .ce mode. */ 1937 1938 if (ctl && roffce_node != NULL && 1939 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 1940 t == ROFF_TH || t == ROFF_TS)) { 1941 r->man->last = roffce_node; 1942 r->man->next = ROFF_NEXT_SIBLING; 1943 roffce_lines = 0; 1944 roffce_node = NULL; 1945 } 1946 1947 /* 1948 * This is neither a roff request nor a user-defined macro. 1949 * Let the standard macro set parsers handle it. 1950 */ 1951 1952 if (t == TOKEN_NONE) 1953 return ROFF_CONT; 1954 1955 /* Execute a roff request or a user defined macro. */ 1956 1957 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); 1958 } 1959 1960 /* 1961 * Internal interface function to tell the roff parser that execution 1962 * of the current macro ended. This is required because macro 1963 * definitions usually do not end with a .return request. 1964 */ 1965 void 1966 roff_userret(struct roff *r) 1967 { 1968 struct mctx *ctx; 1969 int i; 1970 1971 assert(r->mstackpos >= 0); 1972 ctx = r->mstack + r->mstackpos; 1973 for (i = 0; i < ctx->argc; i++) 1974 free(ctx->argv[i]); 1975 ctx->argc = 0; 1976 r->mstackpos--; 1977 } 1978 1979 void 1980 roff_endparse(struct roff *r) 1981 { 1982 if (r->last != NULL) 1983 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line, 1984 r->last->col, "%s", roff_name[r->last->tok]); 1985 1986 if (r->eqn != NULL) { 1987 mandoc_msg(MANDOCERR_BLK_NOEND, 1988 r->eqn->node->line, r->eqn->node->pos, "EQ"); 1989 eqn_parse(r->eqn); 1990 r->eqn = NULL; 1991 } 1992 1993 if (r->tbl != NULL) { 1994 tbl_end(r->tbl, 1); 1995 r->tbl = NULL; 1996 } 1997 } 1998 1999 /* 2000 * Parse a roff node's type from the input buffer. This must be in the 2001 * form of ".foo xxx" in the usual way. 2002 */ 2003 static enum roff_tok 2004 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 2005 { 2006 char *cp; 2007 const char *mac; 2008 size_t maclen; 2009 int deftype; 2010 enum roff_tok t; 2011 2012 cp = buf + *pos; 2013 2014 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 2015 return TOKEN_NONE; 2016 2017 mac = cp; 2018 maclen = roff_getname(r, &cp, ln, ppos); 2019 2020 deftype = ROFFDEF_USER | ROFFDEF_REN; 2021 r->current_string = roff_getstrn(r, mac, maclen, &deftype); 2022 switch (deftype) { 2023 case ROFFDEF_USER: 2024 t = ROFF_USERDEF; 2025 break; 2026 case ROFFDEF_REN: 2027 t = ROFF_RENAMED; 2028 break; 2029 default: 2030 t = roffhash_find(r->reqtab, mac, maclen); 2031 break; 2032 } 2033 if (t != TOKEN_NONE) 2034 *pos = cp - buf; 2035 else if (deftype == ROFFDEF_UNDEF) { 2036 /* Using an undefined macro defines it to be empty. */ 2037 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); 2038 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); 2039 } 2040 return t; 2041 } 2042 2043 /* --- handling of request blocks ----------------------------------------- */ 2044 2045 /* 2046 * Close a macro definition block or an "ignore" block. 2047 */ 2048 static int 2049 roff_cblock(ROFF_ARGS) 2050 { 2051 int rr; 2052 2053 if (r->last == NULL) { 2054 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2055 return ROFF_IGN; 2056 } 2057 2058 switch (r->last->tok) { 2059 case ROFF_am: 2060 case ROFF_ami: 2061 case ROFF_de: 2062 case ROFF_dei: 2063 case ROFF_ig: 2064 break; 2065 case ROFF_am1: 2066 case ROFF_de1: 2067 /* Remapped in roff_block(). */ 2068 abort(); 2069 default: 2070 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2071 return ROFF_IGN; 2072 } 2073 2074 roffnode_pop(r); 2075 roffnode_cleanscope(r); 2076 2077 /* 2078 * If a conditional block with braces is still open, 2079 * check for "\}" block end markers. 2080 */ 2081 2082 if (r->last != NULL && r->last->endspan < 0) { 2083 rr = 1; /* If arguments follow "\}", warn about them. */ 2084 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2085 } 2086 2087 if (buf->buf[pos] != '\0') 2088 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 2089 ".. %s", buf->buf + pos); 2090 2091 return ROFF_IGN; 2092 } 2093 2094 /* 2095 * Pop all nodes ending at the end of the current input line. 2096 * Return the number of loops ended. 2097 */ 2098 static int 2099 roffnode_cleanscope(struct roff *r) 2100 { 2101 int inloop; 2102 2103 inloop = 0; 2104 while (r->last != NULL && r->last->endspan > 0) { 2105 if (--r->last->endspan != 0) 2106 break; 2107 inloop += roffnode_pop(r); 2108 } 2109 return inloop; 2110 } 2111 2112 /* 2113 * Handle the closing "\}" of a conditional block. 2114 * Apart from generating warnings, this only pops nodes. 2115 * Return the number of loops ended. 2116 */ 2117 static int 2118 roff_ccond(struct roff *r, int ln, int ppos) 2119 { 2120 if (NULL == r->last) { 2121 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2122 return 0; 2123 } 2124 2125 switch (r->last->tok) { 2126 case ROFF_el: 2127 case ROFF_ie: 2128 case ROFF_if: 2129 case ROFF_while: 2130 break; 2131 default: 2132 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2133 return 0; 2134 } 2135 2136 if (r->last->endspan > -1) { 2137 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2138 return 0; 2139 } 2140 2141 return roffnode_pop(r) + roffnode_cleanscope(r); 2142 } 2143 2144 static int 2145 roff_block(ROFF_ARGS) 2146 { 2147 const char *name, *value; 2148 char *call, *cp, *iname, *rname; 2149 size_t csz, namesz, rsz; 2150 int deftype; 2151 2152 /* Ignore groff compatibility mode for now. */ 2153 2154 if (tok == ROFF_de1) 2155 tok = ROFF_de; 2156 else if (tok == ROFF_dei1) 2157 tok = ROFF_dei; 2158 else if (tok == ROFF_am1) 2159 tok = ROFF_am; 2160 else if (tok == ROFF_ami1) 2161 tok = ROFF_ami; 2162 2163 /* Parse the macro name argument. */ 2164 2165 cp = buf->buf + pos; 2166 if (tok == ROFF_ig) { 2167 iname = NULL; 2168 namesz = 0; 2169 } else { 2170 iname = cp; 2171 namesz = roff_getname(r, &cp, ln, ppos); 2172 iname[namesz] = '\0'; 2173 } 2174 2175 /* Resolve the macro name argument if it is indirect. */ 2176 2177 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2178 deftype = ROFFDEF_USER; 2179 name = roff_getstrn(r, iname, namesz, &deftype); 2180 if (name == NULL) { 2181 mandoc_msg(MANDOCERR_STR_UNDEF, 2182 ln, (int)(iname - buf->buf), 2183 "%.*s", (int)namesz, iname); 2184 namesz = 0; 2185 } else 2186 namesz = strlen(name); 2187 } else 2188 name = iname; 2189 2190 if (namesz == 0 && tok != ROFF_ig) { 2191 mandoc_msg(MANDOCERR_REQ_EMPTY, 2192 ln, ppos, "%s", roff_name[tok]); 2193 return ROFF_IGN; 2194 } 2195 2196 roffnode_push(r, tok, name, ln, ppos); 2197 2198 /* 2199 * At the beginning of a `de' macro, clear the existing string 2200 * with the same name, if there is one. New content will be 2201 * appended from roff_block_text() in multiline mode. 2202 */ 2203 2204 if (tok == ROFF_de || tok == ROFF_dei) { 2205 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 2206 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2207 } else if (tok == ROFF_am || tok == ROFF_ami) { 2208 deftype = ROFFDEF_ANY; 2209 value = roff_getstrn(r, iname, namesz, &deftype); 2210 switch (deftype) { /* Before appending, ... */ 2211 case ROFFDEF_PRE: /* copy predefined to user-defined. */ 2212 roff_setstrn(&r->strtab, name, namesz, 2213 value, strlen(value), 0); 2214 break; 2215 case ROFFDEF_REN: /* call original standard macro. */ 2216 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2217 (int)strlen(value), value); 2218 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2219 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2220 free(call); 2221 break; 2222 case ROFFDEF_STD: /* rename and call standard macro. */ 2223 rsz = mandoc_asprintf(&rname, "__%s_renamed", name); 2224 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); 2225 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2226 (int)rsz, rname); 2227 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2228 free(call); 2229 free(rname); 2230 break; 2231 default: 2232 break; 2233 } 2234 } 2235 2236 if (*cp == '\0') 2237 return ROFF_IGN; 2238 2239 /* Get the custom end marker. */ 2240 2241 iname = cp; 2242 namesz = roff_getname(r, &cp, ln, ppos); 2243 2244 /* Resolve the end marker if it is indirect. */ 2245 2246 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2247 deftype = ROFFDEF_USER; 2248 name = roff_getstrn(r, iname, namesz, &deftype); 2249 if (name == NULL) { 2250 mandoc_msg(MANDOCERR_STR_UNDEF, 2251 ln, (int)(iname - buf->buf), 2252 "%.*s", (int)namesz, iname); 2253 namesz = 0; 2254 } else 2255 namesz = strlen(name); 2256 } else 2257 name = iname; 2258 2259 if (namesz) 2260 r->last->end = mandoc_strndup(name, namesz); 2261 2262 if (*cp != '\0') 2263 mandoc_msg(MANDOCERR_ARG_EXCESS, 2264 ln, pos, ".%s ... %s", roff_name[tok], cp); 2265 2266 return ROFF_IGN; 2267 } 2268 2269 static int 2270 roff_block_sub(ROFF_ARGS) 2271 { 2272 enum roff_tok t; 2273 int i, j; 2274 2275 /* 2276 * First check whether a custom macro exists at this level. If 2277 * it does, then check against it. This is some of groff's 2278 * stranger behaviours. If we encountered a custom end-scope 2279 * tag and that tag also happens to be a "real" macro, then we 2280 * need to try interpreting it again as a real macro. If it's 2281 * not, then return ignore. Else continue. 2282 */ 2283 2284 if (r->last->end) { 2285 for (i = pos, j = 0; r->last->end[j]; j++, i++) 2286 if (buf->buf[i] != r->last->end[j]) 2287 break; 2288 2289 if (r->last->end[j] == '\0' && 2290 (buf->buf[i] == '\0' || 2291 buf->buf[i] == ' ' || 2292 buf->buf[i] == '\t')) { 2293 roffnode_pop(r); 2294 roffnode_cleanscope(r); 2295 2296 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 2297 i++; 2298 2299 pos = i; 2300 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 2301 TOKEN_NONE) 2302 return ROFF_RERUN; 2303 return ROFF_IGN; 2304 } 2305 } 2306 2307 /* 2308 * If we have no custom end-query or lookup failed, then try 2309 * pulling it out of the hashtable. 2310 */ 2311 2312 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2313 2314 if (t != ROFF_cblock) { 2315 if (tok != ROFF_ig) 2316 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 2317 return ROFF_IGN; 2318 } 2319 2320 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2321 } 2322 2323 static int 2324 roff_block_text(ROFF_ARGS) 2325 { 2326 2327 if (tok != ROFF_ig) 2328 roff_setstr(r, r->last->name, buf->buf + pos, 2); 2329 2330 return ROFF_IGN; 2331 } 2332 2333 /* 2334 * Check for a closing "\}" and handle it. 2335 * In this function, the final "int *offs" argument is used for 2336 * different purposes than elsewhere: 2337 * Input: *offs == 0: caller wants to discard arguments following \} 2338 * *offs == 1: caller wants to preserve text following \} 2339 * Output: *offs = 0: tell caller to discard input line 2340 * *offs = 1: tell caller to use input line 2341 */ 2342 static int 2343 roff_cond_checkend(ROFF_ARGS) 2344 { 2345 char *ep; 2346 int endloop, irc, rr; 2347 2348 irc = ROFF_IGN; 2349 rr = r->last->rule; 2350 endloop = tok != ROFF_while ? ROFF_IGN : 2351 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2352 if (roffnode_cleanscope(r)) 2353 irc |= endloop; 2354 2355 /* 2356 * If "\}" occurs on a macro line without a preceding macro or 2357 * a text line contains nothing else, drop the line completely. 2358 */ 2359 2360 ep = buf->buf + pos; 2361 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0)) 2362 rr = 0; 2363 2364 /* 2365 * The closing delimiter "\}" rewinds the conditional scope 2366 * but is otherwise ignored when interpreting the line. 2367 */ 2368 2369 while ((ep = strchr(ep, '\\')) != NULL) { 2370 switch (ep[1]) { 2371 case '}': 2372 if (ep[2] == '\0') 2373 ep[0] = '\0'; 2374 else if (rr) 2375 ep[1] = '&'; 2376 else 2377 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2378 if (roff_ccond(r, ln, ep - buf->buf)) 2379 irc |= endloop; 2380 break; 2381 case '\0': 2382 ++ep; 2383 break; 2384 default: 2385 ep += 2; 2386 break; 2387 } 2388 } 2389 *offs = rr; 2390 return irc; 2391 } 2392 2393 /* 2394 * Parse and process a request or macro line in conditional scope. 2395 */ 2396 static int 2397 roff_cond_sub(ROFF_ARGS) 2398 { 2399 struct roffnode *bl; 2400 int irc, rr; 2401 enum roff_tok t; 2402 2403 rr = 0; /* If arguments follow "\}", skip them. */ 2404 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2405 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2406 2407 /* For now, let high level macros abort .ce mode. */ 2408 2409 if (roffce_node != NULL && 2410 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 2411 t == ROFF_TH || t == ROFF_TS)) { 2412 r->man->last = roffce_node; 2413 r->man->next = ROFF_NEXT_SIBLING; 2414 roffce_lines = 0; 2415 roffce_node = NULL; 2416 } 2417 2418 /* 2419 * Fully handle known macros when they are structurally 2420 * required or when the conditional evaluated to true. 2421 */ 2422 2423 if (t == ROFF_break) { 2424 if (irc & ROFF_LOOPMASK) 2425 irc = ROFF_IGN | ROFF_LOOPEXIT; 2426 else if (rr) { 2427 for (bl = r->last; bl != NULL; bl = bl->parent) { 2428 bl->rule = 0; 2429 if (bl->tok == ROFF_while) 2430 break; 2431 } 2432 } 2433 } else if (t != TOKEN_NONE && 2434 (rr || roffs[t].flags & ROFFMAC_STRUCT)) 2435 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2436 else 2437 irc |= rr ? ROFF_CONT : ROFF_IGN; 2438 return irc; 2439 } 2440 2441 /* 2442 * Parse and process a text line in conditional scope. 2443 */ 2444 static int 2445 roff_cond_text(ROFF_ARGS) 2446 { 2447 int irc, rr; 2448 2449 rr = 1; /* If arguments follow "\}", preserve them. */ 2450 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2451 if (rr) 2452 irc |= ROFF_CONT; 2453 return irc; 2454 } 2455 2456 /* --- handling of numeric and conditional expressions -------------------- */ 2457 2458 /* 2459 * Parse a single signed integer number. Stop at the first non-digit. 2460 * If there is at least one digit, return success and advance the 2461 * parse point, else return failure and let the parse point unchanged. 2462 * Ignore overflows, treat them just like the C language. 2463 */ 2464 static int 2465 roff_getnum(const char *v, int *pos, int *res, int flags) 2466 { 2467 int myres, scaled, n, p; 2468 2469 if (NULL == res) 2470 res = &myres; 2471 2472 p = *pos; 2473 n = v[p] == '-'; 2474 if (n || v[p] == '+') 2475 p++; 2476 2477 if (flags & ROFFNUM_WHITE) 2478 while (isspace((unsigned char)v[p])) 2479 p++; 2480 2481 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2482 *res = 10 * *res + v[p] - '0'; 2483 if (p == *pos + n) 2484 return 0; 2485 2486 if (n) 2487 *res = -*res; 2488 2489 /* Each number may be followed by one optional scaling unit. */ 2490 2491 switch (v[p]) { 2492 case 'f': 2493 scaled = *res * 65536; 2494 break; 2495 case 'i': 2496 scaled = *res * 240; 2497 break; 2498 case 'c': 2499 scaled = *res * 240 / 2.54; 2500 break; 2501 case 'v': 2502 case 'P': 2503 scaled = *res * 40; 2504 break; 2505 case 'm': 2506 case 'n': 2507 scaled = *res * 24; 2508 break; 2509 case 'p': 2510 scaled = *res * 10 / 3; 2511 break; 2512 case 'u': 2513 scaled = *res; 2514 break; 2515 case 'M': 2516 scaled = *res * 6 / 25; 2517 break; 2518 default: 2519 scaled = *res; 2520 p--; 2521 break; 2522 } 2523 if (flags & ROFFNUM_SCALE) 2524 *res = scaled; 2525 2526 *pos = p + 1; 2527 return 1; 2528 } 2529 2530 /* 2531 * Evaluate a string comparison condition. 2532 * The first character is the delimiter. 2533 * Succeed if the string up to its second occurrence 2534 * matches the string up to its third occurence. 2535 * Advance the cursor after the third occurrence 2536 * or lacking that, to the end of the line. 2537 */ 2538 static int 2539 roff_evalstrcond(const char *v, int *pos) 2540 { 2541 const char *s1, *s2, *s3; 2542 int match; 2543 2544 match = 0; 2545 s1 = v + *pos; /* initial delimiter */ 2546 s2 = s1 + 1; /* for scanning the first string */ 2547 s3 = strchr(s2, *s1); /* for scanning the second string */ 2548 2549 if (NULL == s3) /* found no middle delimiter */ 2550 goto out; 2551 2552 while ('\0' != *++s3) { 2553 if (*s2 != *s3) { /* mismatch */ 2554 s3 = strchr(s3, *s1); 2555 break; 2556 } 2557 if (*s3 == *s1) { /* found the final delimiter */ 2558 match = 1; 2559 break; 2560 } 2561 s2++; 2562 } 2563 2564 out: 2565 if (NULL == s3) 2566 s3 = strchr(s2, '\0'); 2567 else if (*s3 != '\0') 2568 s3++; 2569 *pos = s3 - v; 2570 return match; 2571 } 2572 2573 /* 2574 * Evaluate an optionally negated single character, numerical, 2575 * or string condition. 2576 */ 2577 static int 2578 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2579 { 2580 const char *start, *end; 2581 char *cp, *name; 2582 size_t sz; 2583 int deftype, len, number, savepos, istrue, wanttrue; 2584 2585 if ('!' == v[*pos]) { 2586 wanttrue = 0; 2587 (*pos)++; 2588 } else 2589 wanttrue = 1; 2590 2591 switch (v[*pos]) { 2592 case '\0': 2593 return 0; 2594 case 'n': 2595 case 'o': 2596 (*pos)++; 2597 return wanttrue; 2598 case 'e': 2599 case 't': 2600 case 'v': 2601 (*pos)++; 2602 return !wanttrue; 2603 case 'c': 2604 do { 2605 (*pos)++; 2606 } while (v[*pos] == ' '); 2607 2608 /* 2609 * Quirk for groff compatibility: 2610 * The horizontal tab is neither available nor unavailable. 2611 */ 2612 2613 if (v[*pos] == '\t') { 2614 (*pos)++; 2615 return 0; 2616 } 2617 2618 /* Printable ASCII characters are available. */ 2619 2620 if (v[*pos] != '\\') { 2621 (*pos)++; 2622 return wanttrue; 2623 } 2624 2625 end = v + ++*pos; 2626 switch (mandoc_escape(&end, &start, &len)) { 2627 case ESCAPE_SPECIAL: 2628 istrue = mchars_spec2cp(start, len) != -1; 2629 break; 2630 case ESCAPE_UNICODE: 2631 istrue = 1; 2632 break; 2633 case ESCAPE_NUMBERED: 2634 istrue = mchars_num2char(start, len) != -1; 2635 break; 2636 default: 2637 istrue = !wanttrue; 2638 break; 2639 } 2640 *pos = end - v; 2641 return istrue == wanttrue; 2642 case 'd': 2643 case 'r': 2644 cp = v + *pos + 1; 2645 while (*cp == ' ') 2646 cp++; 2647 name = cp; 2648 sz = roff_getname(r, &cp, ln, cp - v); 2649 if (sz == 0) 2650 istrue = 0; 2651 else if (v[*pos] == 'r') 2652 istrue = roff_hasregn(r, name, sz); 2653 else { 2654 deftype = ROFFDEF_ANY; 2655 roff_getstrn(r, name, sz, &deftype); 2656 istrue = !!deftype; 2657 } 2658 *pos = (name + sz) - v; 2659 return istrue == wanttrue; 2660 default: 2661 break; 2662 } 2663 2664 savepos = *pos; 2665 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2666 return (number > 0) == wanttrue; 2667 else if (*pos == savepos) 2668 return roff_evalstrcond(v, pos) == wanttrue; 2669 else 2670 return 0; 2671 } 2672 2673 static int 2674 roff_line_ignore(ROFF_ARGS) 2675 { 2676 2677 return ROFF_IGN; 2678 } 2679 2680 static int 2681 roff_insec(ROFF_ARGS) 2682 { 2683 2684 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]); 2685 return ROFF_IGN; 2686 } 2687 2688 static int 2689 roff_unsupp(ROFF_ARGS) 2690 { 2691 2692 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]); 2693 return ROFF_IGN; 2694 } 2695 2696 static int 2697 roff_cond(ROFF_ARGS) 2698 { 2699 int irc; 2700 2701 roffnode_push(r, tok, NULL, ln, ppos); 2702 2703 /* 2704 * An `.el' has no conditional body: it will consume the value 2705 * of the current rstack entry set in prior `ie' calls or 2706 * defaults to DENY. 2707 * 2708 * If we're not an `el', however, then evaluate the conditional. 2709 */ 2710 2711 r->last->rule = tok == ROFF_el ? 2712 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2713 roff_evalcond(r, ln, buf->buf, &pos); 2714 2715 /* 2716 * An if-else will put the NEGATION of the current evaluated 2717 * conditional into the stack of rules. 2718 */ 2719 2720 if (tok == ROFF_ie) { 2721 if (r->rstackpos + 1 == r->rstacksz) { 2722 r->rstacksz += 16; 2723 r->rstack = mandoc_reallocarray(r->rstack, 2724 r->rstacksz, sizeof(int)); 2725 } 2726 r->rstack[++r->rstackpos] = !r->last->rule; 2727 } 2728 2729 /* If the parent has false as its rule, then so do we. */ 2730 2731 if (r->last->parent && !r->last->parent->rule) 2732 r->last->rule = 0; 2733 2734 /* 2735 * Determine scope. 2736 * If there is nothing on the line after the conditional, 2737 * not even whitespace, use next-line scope. 2738 * Except that .while does not support next-line scope. 2739 */ 2740 2741 if (buf->buf[pos] == '\0' && tok != ROFF_while) { 2742 r->last->endspan = 2; 2743 goto out; 2744 } 2745 2746 while (buf->buf[pos] == ' ') 2747 pos++; 2748 2749 /* An opening brace requests multiline scope. */ 2750 2751 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2752 r->last->endspan = -1; 2753 pos += 2; 2754 while (buf->buf[pos] == ' ') 2755 pos++; 2756 goto out; 2757 } 2758 2759 /* 2760 * Anything else following the conditional causes 2761 * single-line scope. Warn if the scope contains 2762 * nothing but trailing whitespace. 2763 */ 2764 2765 if (buf->buf[pos] == '\0') 2766 mandoc_msg(MANDOCERR_COND_EMPTY, 2767 ln, ppos, "%s", roff_name[tok]); 2768 2769 r->last->endspan = 1; 2770 2771 out: 2772 *offs = pos; 2773 irc = ROFF_RERUN; 2774 if (tok == ROFF_while) 2775 irc |= ROFF_WHILE; 2776 return irc; 2777 } 2778 2779 static int 2780 roff_ds(ROFF_ARGS) 2781 { 2782 char *string; 2783 const char *name; 2784 size_t namesz; 2785 2786 /* Ignore groff compatibility mode for now. */ 2787 2788 if (tok == ROFF_ds1) 2789 tok = ROFF_ds; 2790 else if (tok == ROFF_as1) 2791 tok = ROFF_as; 2792 2793 /* 2794 * The first word is the name of the string. 2795 * If it is empty or terminated by an escape sequence, 2796 * abort the `ds' request without defining anything. 2797 */ 2798 2799 name = string = buf->buf + pos; 2800 if (*name == '\0') 2801 return ROFF_IGN; 2802 2803 namesz = roff_getname(r, &string, ln, pos); 2804 switch (name[namesz]) { 2805 case '\\': 2806 return ROFF_IGN; 2807 case '\t': 2808 string = buf->buf + pos + namesz; 2809 break; 2810 default: 2811 break; 2812 } 2813 2814 /* Read past the initial double-quote, if any. */ 2815 if (*string == '"') 2816 string++; 2817 2818 /* The rest is the value. */ 2819 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2820 ROFF_as == tok); 2821 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2822 return ROFF_IGN; 2823 } 2824 2825 /* 2826 * Parse a single operator, one or two characters long. 2827 * If the operator is recognized, return success and advance the 2828 * parse point, else return failure and let the parse point unchanged. 2829 */ 2830 static int 2831 roff_getop(const char *v, int *pos, char *res) 2832 { 2833 2834 *res = v[*pos]; 2835 2836 switch (*res) { 2837 case '+': 2838 case '-': 2839 case '*': 2840 case '/': 2841 case '%': 2842 case '&': 2843 case ':': 2844 break; 2845 case '<': 2846 switch (v[*pos + 1]) { 2847 case '=': 2848 *res = 'l'; 2849 (*pos)++; 2850 break; 2851 case '>': 2852 *res = '!'; 2853 (*pos)++; 2854 break; 2855 case '?': 2856 *res = 'i'; 2857 (*pos)++; 2858 break; 2859 default: 2860 break; 2861 } 2862 break; 2863 case '>': 2864 switch (v[*pos + 1]) { 2865 case '=': 2866 *res = 'g'; 2867 (*pos)++; 2868 break; 2869 case '?': 2870 *res = 'a'; 2871 (*pos)++; 2872 break; 2873 default: 2874 break; 2875 } 2876 break; 2877 case '=': 2878 if ('=' == v[*pos + 1]) 2879 (*pos)++; 2880 break; 2881 default: 2882 return 0; 2883 } 2884 (*pos)++; 2885 2886 return *res; 2887 } 2888 2889 /* 2890 * Evaluate either a parenthesized numeric expression 2891 * or a single signed integer number. 2892 */ 2893 static int 2894 roff_evalpar(struct roff *r, int ln, 2895 const char *v, int *pos, int *res, int flags) 2896 { 2897 2898 if ('(' != v[*pos]) 2899 return roff_getnum(v, pos, res, flags); 2900 2901 (*pos)++; 2902 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2903 return 0; 2904 2905 /* 2906 * Omission of the closing parenthesis 2907 * is an error in validation mode, 2908 * but ignored in evaluation mode. 2909 */ 2910 2911 if (')' == v[*pos]) 2912 (*pos)++; 2913 else if (NULL == res) 2914 return 0; 2915 2916 return 1; 2917 } 2918 2919 /* 2920 * Evaluate a complete numeric expression. 2921 * Proceed left to right, there is no concept of precedence. 2922 */ 2923 static int 2924 roff_evalnum(struct roff *r, int ln, const char *v, 2925 int *pos, int *res, int flags) 2926 { 2927 int mypos, operand2; 2928 char operator; 2929 2930 if (NULL == pos) { 2931 mypos = 0; 2932 pos = &mypos; 2933 } 2934 2935 if (flags & ROFFNUM_WHITE) 2936 while (isspace((unsigned char)v[*pos])) 2937 (*pos)++; 2938 2939 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2940 return 0; 2941 2942 while (1) { 2943 if (flags & ROFFNUM_WHITE) 2944 while (isspace((unsigned char)v[*pos])) 2945 (*pos)++; 2946 2947 if ( ! roff_getop(v, pos, &operator)) 2948 break; 2949 2950 if (flags & ROFFNUM_WHITE) 2951 while (isspace((unsigned char)v[*pos])) 2952 (*pos)++; 2953 2954 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2955 return 0; 2956 2957 if (flags & ROFFNUM_WHITE) 2958 while (isspace((unsigned char)v[*pos])) 2959 (*pos)++; 2960 2961 if (NULL == res) 2962 continue; 2963 2964 switch (operator) { 2965 case '+': 2966 *res += operand2; 2967 break; 2968 case '-': 2969 *res -= operand2; 2970 break; 2971 case '*': 2972 *res *= operand2; 2973 break; 2974 case '/': 2975 if (operand2 == 0) { 2976 mandoc_msg(MANDOCERR_DIVZERO, 2977 ln, *pos, "%s", v); 2978 *res = 0; 2979 break; 2980 } 2981 *res /= operand2; 2982 break; 2983 case '%': 2984 if (operand2 == 0) { 2985 mandoc_msg(MANDOCERR_DIVZERO, 2986 ln, *pos, "%s", v); 2987 *res = 0; 2988 break; 2989 } 2990 *res %= operand2; 2991 break; 2992 case '<': 2993 *res = *res < operand2; 2994 break; 2995 case '>': 2996 *res = *res > operand2; 2997 break; 2998 case 'l': 2999 *res = *res <= operand2; 3000 break; 3001 case 'g': 3002 *res = *res >= operand2; 3003 break; 3004 case '=': 3005 *res = *res == operand2; 3006 break; 3007 case '!': 3008 *res = *res != operand2; 3009 break; 3010 case '&': 3011 *res = *res && operand2; 3012 break; 3013 case ':': 3014 *res = *res || operand2; 3015 break; 3016 case 'i': 3017 if (operand2 < *res) 3018 *res = operand2; 3019 break; 3020 case 'a': 3021 if (operand2 > *res) 3022 *res = operand2; 3023 break; 3024 default: 3025 abort(); 3026 } 3027 } 3028 return 1; 3029 } 3030 3031 /* --- register management ------------------------------------------------ */ 3032 3033 void 3034 roff_setreg(struct roff *r, const char *name, int val, char sign) 3035 { 3036 roff_setregn(r, name, strlen(name), val, sign, INT_MIN); 3037 } 3038 3039 static void 3040 roff_setregn(struct roff *r, const char *name, size_t len, 3041 int val, char sign, int step) 3042 { 3043 struct roffreg *reg; 3044 3045 /* Search for an existing register with the same name. */ 3046 reg = r->regtab; 3047 3048 while (reg != NULL && (reg->key.sz != len || 3049 strncmp(reg->key.p, name, len) != 0)) 3050 reg = reg->next; 3051 3052 if (NULL == reg) { 3053 /* Create a new register. */ 3054 reg = mandoc_malloc(sizeof(struct roffreg)); 3055 reg->key.p = mandoc_strndup(name, len); 3056 reg->key.sz = len; 3057 reg->val = 0; 3058 reg->step = 0; 3059 reg->next = r->regtab; 3060 r->regtab = reg; 3061 } 3062 3063 if ('+' == sign) 3064 reg->val += val; 3065 else if ('-' == sign) 3066 reg->val -= val; 3067 else 3068 reg->val = val; 3069 if (step != INT_MIN) 3070 reg->step = step; 3071 } 3072 3073 /* 3074 * Handle some predefined read-only number registers. 3075 * For now, return -1 if the requested register is not predefined; 3076 * in case a predefined read-only register having the value -1 3077 * were to turn up, another special value would have to be chosen. 3078 */ 3079 static int 3080 roff_getregro(const struct roff *r, const char *name) 3081 { 3082 3083 switch (*name) { 3084 case '$': /* Number of arguments of the last macro evaluated. */ 3085 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; 3086 case 'A': /* ASCII approximation mode is always off. */ 3087 return 0; 3088 case 'g': /* Groff compatibility mode is always on. */ 3089 return 1; 3090 case 'H': /* Fixed horizontal resolution. */ 3091 return 24; 3092 case 'j': /* Always adjust left margin only. */ 3093 return 0; 3094 case 'T': /* Some output device is always defined. */ 3095 return 1; 3096 case 'V': /* Fixed vertical resolution. */ 3097 return 40; 3098 default: 3099 return -1; 3100 } 3101 } 3102 3103 int 3104 roff_getreg(struct roff *r, const char *name) 3105 { 3106 return roff_getregn(r, name, strlen(name), '\0'); 3107 } 3108 3109 static int 3110 roff_getregn(struct roff *r, const char *name, size_t len, char sign) 3111 { 3112 struct roffreg *reg; 3113 int val; 3114 3115 if ('.' == name[0] && 2 == len) { 3116 val = roff_getregro(r, name + 1); 3117 if (-1 != val) 3118 return val; 3119 } 3120 3121 for (reg = r->regtab; reg; reg = reg->next) { 3122 if (len == reg->key.sz && 3123 0 == strncmp(name, reg->key.p, len)) { 3124 switch (sign) { 3125 case '+': 3126 reg->val += reg->step; 3127 break; 3128 case '-': 3129 reg->val -= reg->step; 3130 break; 3131 default: 3132 break; 3133 } 3134 return reg->val; 3135 } 3136 } 3137 3138 roff_setregn(r, name, len, 0, '\0', INT_MIN); 3139 return 0; 3140 } 3141 3142 static int 3143 roff_hasregn(const struct roff *r, const char *name, size_t len) 3144 { 3145 struct roffreg *reg; 3146 int val; 3147 3148 if ('.' == name[0] && 2 == len) { 3149 val = roff_getregro(r, name + 1); 3150 if (-1 != val) 3151 return 1; 3152 } 3153 3154 for (reg = r->regtab; reg; reg = reg->next) 3155 if (len == reg->key.sz && 3156 0 == strncmp(name, reg->key.p, len)) 3157 return 1; 3158 3159 return 0; 3160 } 3161 3162 static void 3163 roff_freereg(struct roffreg *reg) 3164 { 3165 struct roffreg *old_reg; 3166 3167 while (NULL != reg) { 3168 free(reg->key.p); 3169 old_reg = reg; 3170 reg = reg->next; 3171 free(old_reg); 3172 } 3173 } 3174 3175 static int 3176 roff_nr(ROFF_ARGS) 3177 { 3178 char *key, *val, *step; 3179 size_t keysz; 3180 int iv, is, len; 3181 char sign; 3182 3183 key = val = buf->buf + pos; 3184 if (*key == '\0') 3185 return ROFF_IGN; 3186 3187 keysz = roff_getname(r, &val, ln, pos); 3188 if (key[keysz] == '\\' || key[keysz] == '\t') 3189 return ROFF_IGN; 3190 3191 sign = *val; 3192 if (sign == '+' || sign == '-') 3193 val++; 3194 3195 len = 0; 3196 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) 3197 return ROFF_IGN; 3198 3199 step = val + len; 3200 while (isspace((unsigned char)*step)) 3201 step++; 3202 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) 3203 is = INT_MIN; 3204 3205 roff_setregn(r, key, keysz, iv, sign, is); 3206 return ROFF_IGN; 3207 } 3208 3209 static int 3210 roff_rr(ROFF_ARGS) 3211 { 3212 struct roffreg *reg, **prev; 3213 char *name, *cp; 3214 size_t namesz; 3215 3216 name = cp = buf->buf + pos; 3217 if (*name == '\0') 3218 return ROFF_IGN; 3219 namesz = roff_getname(r, &cp, ln, pos); 3220 name[namesz] = '\0'; 3221 3222 prev = &r->regtab; 3223 while (1) { 3224 reg = *prev; 3225 if (reg == NULL || !strcmp(name, reg->key.p)) 3226 break; 3227 prev = ®->next; 3228 } 3229 if (reg != NULL) { 3230 *prev = reg->next; 3231 free(reg->key.p); 3232 free(reg); 3233 } 3234 return ROFF_IGN; 3235 } 3236 3237 /* --- handler functions for roff requests -------------------------------- */ 3238 3239 static int 3240 roff_rm(ROFF_ARGS) 3241 { 3242 const char *name; 3243 char *cp; 3244 size_t namesz; 3245 3246 cp = buf->buf + pos; 3247 while (*cp != '\0') { 3248 name = cp; 3249 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 3250 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 3251 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3252 if (name[namesz] == '\\' || name[namesz] == '\t') 3253 break; 3254 } 3255 return ROFF_IGN; 3256 } 3257 3258 static int 3259 roff_it(ROFF_ARGS) 3260 { 3261 int iv; 3262 3263 /* Parse the number of lines. */ 3264 3265 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 3266 mandoc_msg(MANDOCERR_IT_NONUM, 3267 ln, ppos, "%s", buf->buf + 1); 3268 return ROFF_IGN; 3269 } 3270 3271 while (isspace((unsigned char)buf->buf[pos])) 3272 pos++; 3273 3274 /* 3275 * Arm the input line trap. 3276 * Special-casing "an-trap" is an ugly workaround to cope 3277 * with DocBook stupidly fiddling with man(7) internals. 3278 */ 3279 3280 roffit_lines = iv; 3281 roffit_macro = mandoc_strdup(iv != 1 || 3282 strcmp(buf->buf + pos, "an-trap") ? 3283 buf->buf + pos : "br"); 3284 return ROFF_IGN; 3285 } 3286 3287 static int 3288 roff_Dd(ROFF_ARGS) 3289 { 3290 int mask; 3291 enum roff_tok t, te; 3292 3293 switch (tok) { 3294 case ROFF_Dd: 3295 tok = MDOC_Dd; 3296 te = MDOC_MAX; 3297 if (r->format == 0) 3298 r->format = MPARSE_MDOC; 3299 mask = MPARSE_MDOC | MPARSE_QUICK; 3300 break; 3301 case ROFF_TH: 3302 tok = MAN_TH; 3303 te = MAN_MAX; 3304 if (r->format == 0) 3305 r->format = MPARSE_MAN; 3306 mask = MPARSE_QUICK; 3307 break; 3308 default: 3309 abort(); 3310 } 3311 if ((r->options & mask) == 0) 3312 for (t = tok; t < te; t++) 3313 roff_setstr(r, roff_name[t], NULL, 0); 3314 return ROFF_CONT; 3315 } 3316 3317 static int 3318 roff_TE(ROFF_ARGS) 3319 { 3320 r->man->flags &= ~ROFF_NONOFILL; 3321 if (r->tbl == NULL) { 3322 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE"); 3323 return ROFF_IGN; 3324 } 3325 if (tbl_end(r->tbl, 0) == 0) { 3326 r->tbl = NULL; 3327 free(buf->buf); 3328 buf->buf = mandoc_strdup(".sp"); 3329 buf->sz = 4; 3330 *offs = 0; 3331 return ROFF_REPARSE; 3332 } 3333 r->tbl = NULL; 3334 return ROFF_IGN; 3335 } 3336 3337 static int 3338 roff_T_(ROFF_ARGS) 3339 { 3340 3341 if (NULL == r->tbl) 3342 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&"); 3343 else 3344 tbl_restart(ln, ppos, r->tbl); 3345 3346 return ROFF_IGN; 3347 } 3348 3349 /* 3350 * Handle in-line equation delimiters. 3351 */ 3352 static int 3353 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 3354 { 3355 char *cp1, *cp2; 3356 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 3357 3358 /* 3359 * Outside equations, look for an opening delimiter. 3360 * If we are inside an equation, we already know it is 3361 * in-line, or this function wouldn't have been called; 3362 * so look for a closing delimiter. 3363 */ 3364 3365 cp1 = buf->buf + pos; 3366 cp2 = strchr(cp1, r->eqn == NULL ? 3367 r->last_eqn->odelim : r->last_eqn->cdelim); 3368 if (cp2 == NULL) 3369 return ROFF_CONT; 3370 3371 *cp2++ = '\0'; 3372 bef_pr = bef_nl = aft_nl = aft_pr = ""; 3373 3374 /* Handle preceding text, protecting whitespace. */ 3375 3376 if (*buf->buf != '\0') { 3377 if (r->eqn == NULL) 3378 bef_pr = "\\&"; 3379 bef_nl = "\n"; 3380 } 3381 3382 /* 3383 * Prepare replacing the delimiter with an equation macro 3384 * and drop leading white space from the equation. 3385 */ 3386 3387 if (r->eqn == NULL) { 3388 while (*cp2 == ' ') 3389 cp2++; 3390 mac = ".EQ"; 3391 } else 3392 mac = ".EN"; 3393 3394 /* Handle following text, protecting whitespace. */ 3395 3396 if (*cp2 != '\0') { 3397 aft_nl = "\n"; 3398 if (r->eqn != NULL) 3399 aft_pr = "\\&"; 3400 } 3401 3402 /* Do the actual replacement. */ 3403 3404 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 3405 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 3406 free(buf->buf); 3407 buf->buf = cp1; 3408 3409 /* Toggle the in-line state of the eqn subsystem. */ 3410 3411 r->eqn_inline = r->eqn == NULL; 3412 return ROFF_REPARSE; 3413 } 3414 3415 static int 3416 roff_EQ(ROFF_ARGS) 3417 { 3418 struct roff_node *n; 3419 3420 if (r->man->meta.macroset == MACROSET_MAN) 3421 man_breakscope(r->man, ROFF_EQ); 3422 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); 3423 if (ln > r->man->last->line) 3424 n->flags |= NODE_LINE; 3425 n->eqn = eqn_box_new(); 3426 roff_node_append(r->man, n); 3427 r->man->next = ROFF_NEXT_SIBLING; 3428 3429 assert(r->eqn == NULL); 3430 if (r->last_eqn == NULL) 3431 r->last_eqn = eqn_alloc(); 3432 else 3433 eqn_reset(r->last_eqn); 3434 r->eqn = r->last_eqn; 3435 r->eqn->node = n; 3436 3437 if (buf->buf[pos] != '\0') 3438 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3439 ".EQ %s", buf->buf + pos); 3440 3441 return ROFF_IGN; 3442 } 3443 3444 static int 3445 roff_EN(ROFF_ARGS) 3446 { 3447 if (r->eqn != NULL) { 3448 eqn_parse(r->eqn); 3449 r->eqn = NULL; 3450 } else 3451 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN"); 3452 if (buf->buf[pos] != '\0') 3453 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3454 "EN %s", buf->buf + pos); 3455 return ROFF_IGN; 3456 } 3457 3458 static int 3459 roff_TS(ROFF_ARGS) 3460 { 3461 if (r->tbl != NULL) { 3462 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); 3463 tbl_end(r->tbl, 0); 3464 } 3465 r->man->flags |= ROFF_NONOFILL; 3466 r->tbl = tbl_alloc(ppos, ln, r->last_tbl); 3467 if (r->last_tbl == NULL) 3468 r->first_tbl = r->tbl; 3469 r->last_tbl = r->tbl; 3470 return ROFF_IGN; 3471 } 3472 3473 static int 3474 roff_noarg(ROFF_ARGS) 3475 { 3476 if (r->man->flags & (MAN_BLINE | MAN_ELINE)) 3477 man_breakscope(r->man, tok); 3478 if (tok == ROFF_brp) 3479 tok = ROFF_br; 3480 roff_elem_alloc(r->man, ln, ppos, tok); 3481 if (buf->buf[pos] != '\0') 3482 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3483 "%s %s", roff_name[tok], buf->buf + pos); 3484 if (tok == ROFF_nf) 3485 r->man->flags |= ROFF_NOFILL; 3486 else if (tok == ROFF_fi) 3487 r->man->flags &= ~ROFF_NOFILL; 3488 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3489 r->man->next = ROFF_NEXT_SIBLING; 3490 return ROFF_IGN; 3491 } 3492 3493 static int 3494 roff_onearg(ROFF_ARGS) 3495 { 3496 struct roff_node *n; 3497 char *cp; 3498 int npos; 3499 3500 if (r->man->flags & (MAN_BLINE | MAN_ELINE) && 3501 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || 3502 tok == ROFF_ti)) 3503 man_breakscope(r->man, tok); 3504 3505 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { 3506 r->man->last = roffce_node; 3507 r->man->next = ROFF_NEXT_SIBLING; 3508 } 3509 3510 roff_elem_alloc(r->man, ln, ppos, tok); 3511 n = r->man->last; 3512 3513 cp = buf->buf + pos; 3514 if (*cp != '\0') { 3515 while (*cp != '\0' && *cp != ' ') 3516 cp++; 3517 while (*cp == ' ') 3518 *cp++ = '\0'; 3519 if (*cp != '\0') 3520 mandoc_msg(MANDOCERR_ARG_EXCESS, 3521 ln, (int)(cp - buf->buf), 3522 "%s ... %s", roff_name[tok], cp); 3523 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3524 } 3525 3526 if (tok == ROFF_ce || tok == ROFF_rj) { 3527 if (r->man->last->type == ROFFT_ELEM) { 3528 roff_word_alloc(r->man, ln, pos, "1"); 3529 r->man->last->flags |= NODE_NOSRC; 3530 } 3531 npos = 0; 3532 if (roff_evalnum(r, ln, r->man->last->string, &npos, 3533 &roffce_lines, 0) == 0) { 3534 mandoc_msg(MANDOCERR_CE_NONUM, 3535 ln, pos, "ce %s", buf->buf + pos); 3536 roffce_lines = 1; 3537 } 3538 if (roffce_lines < 1) { 3539 r->man->last = r->man->last->parent; 3540 roffce_node = NULL; 3541 roffce_lines = 0; 3542 } else 3543 roffce_node = r->man->last->parent; 3544 } else { 3545 n->flags |= NODE_VALID | NODE_ENDED; 3546 r->man->last = n; 3547 } 3548 n->flags |= NODE_LINE; 3549 r->man->next = ROFF_NEXT_SIBLING; 3550 return ROFF_IGN; 3551 } 3552 3553 static int 3554 roff_manyarg(ROFF_ARGS) 3555 { 3556 struct roff_node *n; 3557 char *sp, *ep; 3558 3559 roff_elem_alloc(r->man, ln, ppos, tok); 3560 n = r->man->last; 3561 3562 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { 3563 while (*ep != '\0' && *ep != ' ') 3564 ep++; 3565 while (*ep == ' ') 3566 *ep++ = '\0'; 3567 roff_word_alloc(r->man, ln, sp - buf->buf, sp); 3568 } 3569 3570 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3571 r->man->last = n; 3572 r->man->next = ROFF_NEXT_SIBLING; 3573 return ROFF_IGN; 3574 } 3575 3576 static int 3577 roff_als(ROFF_ARGS) 3578 { 3579 char *oldn, *newn, *end, *value; 3580 size_t oldsz, newsz, valsz; 3581 3582 newn = oldn = buf->buf + pos; 3583 if (*newn == '\0') 3584 return ROFF_IGN; 3585 3586 newsz = roff_getname(r, &oldn, ln, pos); 3587 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0') 3588 return ROFF_IGN; 3589 3590 end = oldn; 3591 oldsz = roff_getname(r, &end, ln, oldn - buf->buf); 3592 if (oldsz == 0) 3593 return ROFF_IGN; 3594 3595 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", 3596 (int)oldsz, oldn); 3597 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); 3598 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3599 free(value); 3600 return ROFF_IGN; 3601 } 3602 3603 /* 3604 * The .break request only makes sense inside conditionals, 3605 * and that case is already handled in roff_cond_sub(). 3606 */ 3607 static int 3608 roff_break(ROFF_ARGS) 3609 { 3610 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break"); 3611 return ROFF_IGN; 3612 } 3613 3614 static int 3615 roff_cc(ROFF_ARGS) 3616 { 3617 const char *p; 3618 3619 p = buf->buf + pos; 3620 3621 if (*p == '\0' || (r->control = *p++) == '.') 3622 r->control = '\0'; 3623 3624 if (*p != '\0') 3625 mandoc_msg(MANDOCERR_ARG_EXCESS, 3626 ln, p - buf->buf, "cc ... %s", p); 3627 3628 return ROFF_IGN; 3629 } 3630 3631 static int 3632 roff_char(ROFF_ARGS) 3633 { 3634 const char *p, *kp, *vp; 3635 size_t ksz, vsz; 3636 int font; 3637 3638 /* Parse the character to be replaced. */ 3639 3640 kp = buf->buf + pos; 3641 p = kp + 1; 3642 if (*kp == '\0' || (*kp == '\\' && 3643 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) || 3644 (*p != ' ' && *p != '\0')) { 3645 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp); 3646 return ROFF_IGN; 3647 } 3648 ksz = p - kp; 3649 while (*p == ' ') 3650 p++; 3651 3652 /* 3653 * If the replacement string contains a font escape sequence, 3654 * we have to restore the font at the end. 3655 */ 3656 3657 vp = p; 3658 vsz = strlen(p); 3659 font = 0; 3660 while (*p != '\0') { 3661 if (*p++ != '\\') 3662 continue; 3663 switch (mandoc_escape(&p, NULL, NULL)) { 3664 case ESCAPE_FONT: 3665 case ESCAPE_FONTROMAN: 3666 case ESCAPE_FONTITALIC: 3667 case ESCAPE_FONTBOLD: 3668 case ESCAPE_FONTBI: 3669 case ESCAPE_FONTCR: 3670 case ESCAPE_FONTCB: 3671 case ESCAPE_FONTCI: 3672 case ESCAPE_FONTPREV: 3673 font++; 3674 break; 3675 default: 3676 break; 3677 } 3678 } 3679 if (font > 1) 3680 mandoc_msg(MANDOCERR_CHAR_FONT, 3681 ln, (int)(vp - buf->buf), "%s", vp); 3682 3683 /* 3684 * Approximate the effect of .char using the .tr tables. 3685 * XXX In groff, .char and .tr interact differently. 3686 */ 3687 3688 if (ksz == 1) { 3689 if (r->xtab == NULL) 3690 r->xtab = mandoc_calloc(128, sizeof(*r->xtab)); 3691 assert((unsigned int)*kp < 128); 3692 free(r->xtab[(int)*kp].p); 3693 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p, 3694 "%s%s", vp, font ? "\fP" : ""); 3695 } else { 3696 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0); 3697 if (font) 3698 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1); 3699 } 3700 return ROFF_IGN; 3701 } 3702 3703 static int 3704 roff_ec(ROFF_ARGS) 3705 { 3706 const char *p; 3707 3708 p = buf->buf + pos; 3709 if (*p == '\0') 3710 r->escape = '\\'; 3711 else { 3712 r->escape = *p; 3713 if (*++p != '\0') 3714 mandoc_msg(MANDOCERR_ARG_EXCESS, ln, 3715 (int)(p - buf->buf), "ec ... %s", p); 3716 } 3717 return ROFF_IGN; 3718 } 3719 3720 static int 3721 roff_eo(ROFF_ARGS) 3722 { 3723 r->escape = '\0'; 3724 if (buf->buf[pos] != '\0') 3725 mandoc_msg(MANDOCERR_ARG_SKIP, 3726 ln, pos, "eo %s", buf->buf + pos); 3727 return ROFF_IGN; 3728 } 3729 3730 static int 3731 roff_nop(ROFF_ARGS) 3732 { 3733 while (buf->buf[pos] == ' ') 3734 pos++; 3735 *offs = pos; 3736 return ROFF_RERUN; 3737 } 3738 3739 static int 3740 roff_tr(ROFF_ARGS) 3741 { 3742 const char *p, *first, *second; 3743 size_t fsz, ssz; 3744 enum mandoc_esc esc; 3745 3746 p = buf->buf + pos; 3747 3748 if (*p == '\0') { 3749 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr"); 3750 return ROFF_IGN; 3751 } 3752 3753 while (*p != '\0') { 3754 fsz = ssz = 1; 3755 3756 first = p++; 3757 if (*first == '\\') { 3758 esc = mandoc_escape(&p, NULL, NULL); 3759 if (esc == ESCAPE_ERROR) { 3760 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3761 (int)(p - buf->buf), "%s", first); 3762 return ROFF_IGN; 3763 } 3764 fsz = (size_t)(p - first); 3765 } 3766 3767 second = p++; 3768 if (*second == '\\') { 3769 esc = mandoc_escape(&p, NULL, NULL); 3770 if (esc == ESCAPE_ERROR) { 3771 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3772 (int)(p - buf->buf), "%s", second); 3773 return ROFF_IGN; 3774 } 3775 ssz = (size_t)(p - second); 3776 } else if (*second == '\0') { 3777 mandoc_msg(MANDOCERR_TR_ODD, ln, 3778 (int)(first - buf->buf), "tr %s", first); 3779 second = " "; 3780 p--; 3781 } 3782 3783 if (fsz > 1) { 3784 roff_setstrn(&r->xmbtab, first, fsz, 3785 second, ssz, 0); 3786 continue; 3787 } 3788 3789 if (r->xtab == NULL) 3790 r->xtab = mandoc_calloc(128, 3791 sizeof(struct roffstr)); 3792 3793 free(r->xtab[(int)*first].p); 3794 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 3795 r->xtab[(int)*first].sz = ssz; 3796 } 3797 3798 return ROFF_IGN; 3799 } 3800 3801 /* 3802 * Implementation of the .return request. 3803 * There is no need to call roff_userret() from here. 3804 * The read module will call that after rewinding the reader stack 3805 * to the place from where the current macro was called. 3806 */ 3807 static int 3808 roff_return(ROFF_ARGS) 3809 { 3810 if (r->mstackpos >= 0) 3811 return ROFF_IGN | ROFF_USERRET; 3812 3813 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return"); 3814 return ROFF_IGN; 3815 } 3816 3817 static int 3818 roff_rn(ROFF_ARGS) 3819 { 3820 const char *value; 3821 char *oldn, *newn, *end; 3822 size_t oldsz, newsz; 3823 int deftype; 3824 3825 oldn = newn = buf->buf + pos; 3826 if (*oldn == '\0') 3827 return ROFF_IGN; 3828 3829 oldsz = roff_getname(r, &newn, ln, pos); 3830 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0') 3831 return ROFF_IGN; 3832 3833 end = newn; 3834 newsz = roff_getname(r, &end, ln, newn - buf->buf); 3835 if (newsz == 0) 3836 return ROFF_IGN; 3837 3838 deftype = ROFFDEF_ANY; 3839 value = roff_getstrn(r, oldn, oldsz, &deftype); 3840 switch (deftype) { 3841 case ROFFDEF_USER: 3842 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3843 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); 3844 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3845 break; 3846 case ROFFDEF_PRE: 3847 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3848 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3849 break; 3850 case ROFFDEF_REN: 3851 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); 3852 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); 3853 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3854 break; 3855 case ROFFDEF_STD: 3856 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); 3857 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3858 break; 3859 default: 3860 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3861 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3862 break; 3863 } 3864 return ROFF_IGN; 3865 } 3866 3867 static int 3868 roff_shift(ROFF_ARGS) 3869 { 3870 struct mctx *ctx; 3871 int levels, i; 3872 3873 levels = 1; 3874 if (buf->buf[pos] != '\0' && 3875 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { 3876 mandoc_msg(MANDOCERR_CE_NONUM, 3877 ln, pos, "shift %s", buf->buf + pos); 3878 levels = 1; 3879 } 3880 if (r->mstackpos < 0) { 3881 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift"); 3882 return ROFF_IGN; 3883 } 3884 ctx = r->mstack + r->mstackpos; 3885 if (levels > ctx->argc) { 3886 mandoc_msg(MANDOCERR_SHIFT, 3887 ln, pos, "%d, but max is %d", levels, ctx->argc); 3888 levels = ctx->argc; 3889 } 3890 if (levels == 0) 3891 return ROFF_IGN; 3892 for (i = 0; i < levels; i++) 3893 free(ctx->argv[i]); 3894 ctx->argc -= levels; 3895 for (i = 0; i < ctx->argc; i++) 3896 ctx->argv[i] = ctx->argv[i + levels]; 3897 return ROFF_IGN; 3898 } 3899 3900 static int 3901 roff_so(ROFF_ARGS) 3902 { 3903 char *name, *cp; 3904 3905 name = buf->buf + pos; 3906 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name); 3907 3908 /* 3909 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3910 * opening anything that's not in our cwd or anything beneath 3911 * it. Thus, explicitly disallow traversing up the file-system 3912 * or using absolute paths. 3913 */ 3914 3915 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3916 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name); 3917 buf->sz = mandoc_asprintf(&cp, 3918 ".sp\nSee the file %s.\n.sp", name) + 1; 3919 free(buf->buf); 3920 buf->buf = cp; 3921 *offs = 0; 3922 return ROFF_REPARSE; 3923 } 3924 3925 *offs = pos; 3926 return ROFF_SO; 3927 } 3928 3929 /* --- user defined strings and macros ------------------------------------ */ 3930 3931 static int 3932 roff_userdef(ROFF_ARGS) 3933 { 3934 struct mctx *ctx; 3935 char *arg, *ap, *dst, *src; 3936 size_t sz; 3937 3938 /* If the macro is empty, ignore it altogether. */ 3939 3940 if (*r->current_string == '\0') 3941 return ROFF_IGN; 3942 3943 /* Initialize a new macro stack context. */ 3944 3945 if (++r->mstackpos == r->mstacksz) { 3946 r->mstack = mandoc_recallocarray(r->mstack, 3947 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); 3948 r->mstacksz += 8; 3949 } 3950 ctx = r->mstack + r->mstackpos; 3951 ctx->argc = 0; 3952 3953 /* 3954 * Collect pointers to macro argument strings, 3955 * NUL-terminating them and escaping quotes. 3956 */ 3957 3958 src = buf->buf + pos; 3959 while (*src != '\0') { 3960 if (ctx->argc == ctx->argsz) { 3961 ctx->argsz += 8; 3962 ctx->argv = mandoc_reallocarray(ctx->argv, 3963 ctx->argsz, sizeof(*ctx->argv)); 3964 } 3965 arg = roff_getarg(r, &src, ln, &pos); 3966 sz = 1; /* For the terminating NUL. */ 3967 for (ap = arg; *ap != '\0'; ap++) 3968 sz += *ap == '"' ? 4 : 1; 3969 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); 3970 for (ap = arg; *ap != '\0'; ap++) { 3971 if (*ap == '"') { 3972 memcpy(dst, "\\(dq", 4); 3973 dst += 4; 3974 } else 3975 *dst++ = *ap; 3976 } 3977 *dst = '\0'; 3978 free(arg); 3979 } 3980 3981 /* Replace the macro invocation by the macro definition. */ 3982 3983 free(buf->buf); 3984 buf->buf = mandoc_strdup(r->current_string); 3985 buf->sz = strlen(buf->buf) + 1; 3986 *offs = 0; 3987 3988 return buf->buf[buf->sz - 2] == '\n' ? 3989 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND; 3990 } 3991 3992 /* 3993 * Calling a high-level macro that was renamed with .rn. 3994 * r->current_string has already been set up by roff_parse(). 3995 */ 3996 static int 3997 roff_renamed(ROFF_ARGS) 3998 { 3999 char *nbuf; 4000 4001 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, 4002 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; 4003 free(buf->buf); 4004 buf->buf = nbuf; 4005 *offs = 0; 4006 return ROFF_CONT; 4007 } 4008 4009 /* 4010 * Measure the length in bytes of the roff identifier at *cpp 4011 * and advance the pointer to the next word. 4012 */ 4013 static size_t 4014 roff_getname(struct roff *r, char **cpp, int ln, int pos) 4015 { 4016 char *name, *cp; 4017 size_t namesz; 4018 4019 name = *cpp; 4020 if (*name == '\0') 4021 return 0; 4022 4023 /* Advance cp to the byte after the end of the name. */ 4024 4025 for (cp = name; 1; cp++) { 4026 namesz = cp - name; 4027 if (*cp == '\0') 4028 break; 4029 if (*cp == ' ' || *cp == '\t') { 4030 cp++; 4031 break; 4032 } 4033 if (*cp != '\\') 4034 continue; 4035 if (cp[1] == '{' || cp[1] == '}') 4036 break; 4037 if (*++cp == '\\') 4038 continue; 4039 mandoc_msg(MANDOCERR_NAMESC, ln, pos, 4040 "%.*s", (int)(cp - name + 1), name); 4041 mandoc_escape((const char **)&cp, NULL, NULL); 4042 break; 4043 } 4044 4045 /* Read past spaces. */ 4046 4047 while (*cp == ' ') 4048 cp++; 4049 4050 *cpp = cp; 4051 return namesz; 4052 } 4053 4054 /* 4055 * Store *string into the user-defined string called *name. 4056 * To clear an existing entry, call with (*r, *name, NULL, 0). 4057 * append == 0: replace mode 4058 * append == 1: single-line append mode 4059 * append == 2: multiline append mode, append '\n' after each call 4060 */ 4061 static void 4062 roff_setstr(struct roff *r, const char *name, const char *string, 4063 int append) 4064 { 4065 size_t namesz; 4066 4067 namesz = strlen(name); 4068 roff_setstrn(&r->strtab, name, namesz, string, 4069 string ? strlen(string) : 0, append); 4070 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 4071 } 4072 4073 static void 4074 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 4075 const char *string, size_t stringsz, int append) 4076 { 4077 struct roffkv *n; 4078 char *c; 4079 int i; 4080 size_t oldch, newch; 4081 4082 /* Search for an existing string with the same name. */ 4083 n = *r; 4084 4085 while (n && (namesz != n->key.sz || 4086 strncmp(n->key.p, name, namesz))) 4087 n = n->next; 4088 4089 if (NULL == n) { 4090 /* Create a new string table entry. */ 4091 n = mandoc_malloc(sizeof(struct roffkv)); 4092 n->key.p = mandoc_strndup(name, namesz); 4093 n->key.sz = namesz; 4094 n->val.p = NULL; 4095 n->val.sz = 0; 4096 n->next = *r; 4097 *r = n; 4098 } else if (0 == append) { 4099 free(n->val.p); 4100 n->val.p = NULL; 4101 n->val.sz = 0; 4102 } 4103 4104 if (NULL == string) 4105 return; 4106 4107 /* 4108 * One additional byte for the '\n' in multiline mode, 4109 * and one for the terminating '\0'. 4110 */ 4111 newch = stringsz + (1 < append ? 2u : 1u); 4112 4113 if (NULL == n->val.p) { 4114 n->val.p = mandoc_malloc(newch); 4115 *n->val.p = '\0'; 4116 oldch = 0; 4117 } else { 4118 oldch = n->val.sz; 4119 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 4120 } 4121 4122 /* Skip existing content in the destination buffer. */ 4123 c = n->val.p + (int)oldch; 4124 4125 /* Append new content to the destination buffer. */ 4126 i = 0; 4127 while (i < (int)stringsz) { 4128 /* 4129 * Rudimentary roff copy mode: 4130 * Handle escaped backslashes. 4131 */ 4132 if ('\\' == string[i] && '\\' == string[i + 1]) 4133 i++; 4134 *c++ = string[i++]; 4135 } 4136 4137 /* Append terminating bytes. */ 4138 if (1 < append) 4139 *c++ = '\n'; 4140 4141 *c = '\0'; 4142 n->val.sz = (int)(c - n->val.p); 4143 } 4144 4145 static const char * 4146 roff_getstrn(struct roff *r, const char *name, size_t len, 4147 int *deftype) 4148 { 4149 const struct roffkv *n; 4150 int found, i; 4151 enum roff_tok tok; 4152 4153 found = 0; 4154 for (n = r->strtab; n != NULL; n = n->next) { 4155 if (strncmp(name, n->key.p, len) != 0 || 4156 n->key.p[len] != '\0' || n->val.p == NULL) 4157 continue; 4158 if (*deftype & ROFFDEF_USER) { 4159 *deftype = ROFFDEF_USER; 4160 return n->val.p; 4161 } else { 4162 found = 1; 4163 break; 4164 } 4165 } 4166 for (n = r->rentab; n != NULL; n = n->next) { 4167 if (strncmp(name, n->key.p, len) != 0 || 4168 n->key.p[len] != '\0' || n->val.p == NULL) 4169 continue; 4170 if (*deftype & ROFFDEF_REN) { 4171 *deftype = ROFFDEF_REN; 4172 return n->val.p; 4173 } else { 4174 found = 1; 4175 break; 4176 } 4177 } 4178 for (i = 0; i < PREDEFS_MAX; i++) { 4179 if (strncmp(name, predefs[i].name, len) != 0 || 4180 predefs[i].name[len] != '\0') 4181 continue; 4182 if (*deftype & ROFFDEF_PRE) { 4183 *deftype = ROFFDEF_PRE; 4184 return predefs[i].str; 4185 } else { 4186 found = 1; 4187 break; 4188 } 4189 } 4190 if (r->man->meta.macroset != MACROSET_MAN) { 4191 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { 4192 if (strncmp(name, roff_name[tok], len) != 0 || 4193 roff_name[tok][len] != '\0') 4194 continue; 4195 if (*deftype & ROFFDEF_STD) { 4196 *deftype = ROFFDEF_STD; 4197 return NULL; 4198 } else { 4199 found = 1; 4200 break; 4201 } 4202 } 4203 } 4204 if (r->man->meta.macroset != MACROSET_MDOC) { 4205 for (tok = MAN_TH; tok < MAN_MAX; tok++) { 4206 if (strncmp(name, roff_name[tok], len) != 0 || 4207 roff_name[tok][len] != '\0') 4208 continue; 4209 if (*deftype & ROFFDEF_STD) { 4210 *deftype = ROFFDEF_STD; 4211 return NULL; 4212 } else { 4213 found = 1; 4214 break; 4215 } 4216 } 4217 } 4218 4219 if (found == 0 && *deftype != ROFFDEF_ANY) { 4220 if (*deftype & ROFFDEF_REN) { 4221 /* 4222 * This might still be a request, 4223 * so do not treat it as undefined yet. 4224 */ 4225 *deftype = ROFFDEF_UNDEF; 4226 return NULL; 4227 } 4228 4229 /* Using an undefined string defines it to be empty. */ 4230 4231 roff_setstrn(&r->strtab, name, len, "", 0, 0); 4232 roff_setstrn(&r->rentab, name, len, NULL, 0, 0); 4233 } 4234 4235 *deftype = 0; 4236 return NULL; 4237 } 4238 4239 static void 4240 roff_freestr(struct roffkv *r) 4241 { 4242 struct roffkv *n, *nn; 4243 4244 for (n = r; n; n = nn) { 4245 free(n->key.p); 4246 free(n->val.p); 4247 nn = n->next; 4248 free(n); 4249 } 4250 } 4251 4252 /* --- accessors and utility functions ------------------------------------ */ 4253 4254 /* 4255 * Duplicate an input string, making the appropriate character 4256 * conversations (as stipulated by `tr') along the way. 4257 * Returns a heap-allocated string with all the replacements made. 4258 */ 4259 char * 4260 roff_strdup(const struct roff *r, const char *p) 4261 { 4262 const struct roffkv *cp; 4263 char *res; 4264 const char *pp; 4265 size_t ssz, sz; 4266 enum mandoc_esc esc; 4267 4268 if (NULL == r->xmbtab && NULL == r->xtab) 4269 return mandoc_strdup(p); 4270 else if ('\0' == *p) 4271 return mandoc_strdup(""); 4272 4273 /* 4274 * Step through each character looking for term matches 4275 * (remember that a `tr' can be invoked with an escape, which is 4276 * a glyph but the escape is multi-character). 4277 * We only do this if the character hash has been initialised 4278 * and the string is >0 length. 4279 */ 4280 4281 res = NULL; 4282 ssz = 0; 4283 4284 while ('\0' != *p) { 4285 assert((unsigned int)*p < 128); 4286 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { 4287 sz = r->xtab[(int)*p].sz; 4288 res = mandoc_realloc(res, ssz + sz + 1); 4289 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 4290 ssz += sz; 4291 p++; 4292 continue; 4293 } else if ('\\' != *p) { 4294 res = mandoc_realloc(res, ssz + 2); 4295 res[ssz++] = *p++; 4296 continue; 4297 } 4298 4299 /* Search for term matches. */ 4300 for (cp = r->xmbtab; cp; cp = cp->next) 4301 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 4302 break; 4303 4304 if (NULL != cp) { 4305 /* 4306 * A match has been found. 4307 * Append the match to the array and move 4308 * forward by its keysize. 4309 */ 4310 res = mandoc_realloc(res, 4311 ssz + cp->val.sz + 1); 4312 memcpy(res + ssz, cp->val.p, cp->val.sz); 4313 ssz += cp->val.sz; 4314 p += (int)cp->key.sz; 4315 continue; 4316 } 4317 4318 /* 4319 * Handle escapes carefully: we need to copy 4320 * over just the escape itself, or else we might 4321 * do replacements within the escape itself. 4322 * Make sure to pass along the bogus string. 4323 */ 4324 pp = p++; 4325 esc = mandoc_escape(&p, NULL, NULL); 4326 if (ESCAPE_ERROR == esc) { 4327 sz = strlen(pp); 4328 res = mandoc_realloc(res, ssz + sz + 1); 4329 memcpy(res + ssz, pp, sz); 4330 break; 4331 } 4332 /* 4333 * We bail out on bad escapes. 4334 * No need to warn: we already did so when 4335 * roff_expand() was called. 4336 */ 4337 sz = (int)(p - pp); 4338 res = mandoc_realloc(res, ssz + sz + 1); 4339 memcpy(res + ssz, pp, sz); 4340 ssz += sz; 4341 } 4342 4343 res[(int)ssz] = '\0'; 4344 return res; 4345 } 4346 4347 int 4348 roff_getformat(const struct roff *r) 4349 { 4350 4351 return r->format; 4352 } 4353 4354 /* 4355 * Find out whether a line is a macro line or not. 4356 * If it is, adjust the current position and return one; if it isn't, 4357 * return zero and don't change the current position. 4358 * If the control character has been set with `.cc', then let that grain 4359 * precedence. 4360 * This is slighly contrary to groff, where using the non-breaking 4361 * control character when `cc' has been invoked will cause the 4362 * non-breaking macro contents to be printed verbatim. 4363 */ 4364 int 4365 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 4366 { 4367 int pos; 4368 4369 pos = *ppos; 4370 4371 if (r->control != '\0' && cp[pos] == r->control) 4372 pos++; 4373 else if (r->control != '\0') 4374 return 0; 4375 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 4376 pos += 2; 4377 else if ('.' == cp[pos] || '\'' == cp[pos]) 4378 pos++; 4379 else 4380 return 0; 4381 4382 while (' ' == cp[pos] || '\t' == cp[pos]) 4383 pos++; 4384 4385 *ppos = pos; 4386 return 1; 4387 } 4388