1 /* $OpenBSD: roff.c,v 1.259 2022/05/01 16:18:59 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Implementation of the roff(7) parser for mandoc(1). 19 */ 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <limits.h> 25 #include <stddef.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 31 #include "mandoc_aux.h" 32 #include "mandoc_ohash.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mandoc_parse.h" 36 #include "libmandoc.h" 37 #include "roff_int.h" 38 #include "tbl_parse.h" 39 #include "eqn_parse.h" 40 41 /* 42 * ASCII_ESC is used to signal from roff_getarg() to roff_expand() 43 * that an escape sequence resulted from copy-in processing and 44 * needs to be checked or interpolated. As it is used nowhere 45 * else, it is defined here rather than in a header file. 46 */ 47 #define ASCII_ESC 27 48 49 /* Maximum number of string expansions per line, to break infinite loops. */ 50 #define EXPAND_LIMIT 1000 51 52 /* Types of definitions of macros and strings. */ 53 #define ROFFDEF_USER (1 << 1) /* User-defined. */ 54 #define ROFFDEF_PRE (1 << 2) /* Predefined. */ 55 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ 56 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ 57 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ 58 ROFFDEF_REN | ROFFDEF_STD) 59 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ 60 61 /* --- data types --------------------------------------------------------- */ 62 63 /* 64 * An incredibly-simple string buffer. 65 */ 66 struct roffstr { 67 char *p; /* nil-terminated buffer */ 68 size_t sz; /* saved strlen(p) */ 69 }; 70 71 /* 72 * A key-value roffstr pair as part of a singly-linked list. 73 */ 74 struct roffkv { 75 struct roffstr key; 76 struct roffstr val; 77 struct roffkv *next; /* next in list */ 78 }; 79 80 /* 81 * A single number register as part of a singly-linked list. 82 */ 83 struct roffreg { 84 struct roffstr key; 85 int val; 86 int step; 87 struct roffreg *next; 88 }; 89 90 /* 91 * Association of request and macro names with token IDs. 92 */ 93 struct roffreq { 94 enum roff_tok tok; 95 char name[]; 96 }; 97 98 /* 99 * A macro processing context. 100 * More than one is needed when macro calls are nested. 101 */ 102 struct mctx { 103 char **argv; 104 int argc; 105 int argsz; 106 }; 107 108 struct roff { 109 struct roff_man *man; /* mdoc or man parser */ 110 struct roffnode *last; /* leaf of stack */ 111 struct mctx *mstack; /* stack of macro contexts */ 112 int *rstack; /* stack of inverted `ie' values */ 113 struct ohash *reqtab; /* request lookup table */ 114 struct roffreg *regtab; /* number registers */ 115 struct roffkv *strtab; /* user-defined strings & macros */ 116 struct roffkv *rentab; /* renamed strings & macros */ 117 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 118 struct roffstr *xtab; /* single-byte trans table (`tr') */ 119 const char *current_string; /* value of last called user macro */ 120 struct tbl_node *first_tbl; /* first table parsed */ 121 struct tbl_node *last_tbl; /* last table parsed */ 122 struct tbl_node *tbl; /* current table being parsed */ 123 struct eqn_node *last_eqn; /* equation parser */ 124 struct eqn_node *eqn; /* active equation parser */ 125 int eqn_inline; /* current equation is inline */ 126 int options; /* parse options */ 127 int mstacksz; /* current size of mstack */ 128 int mstackpos; /* position in mstack */ 129 int rstacksz; /* current size limit of rstack */ 130 int rstackpos; /* position in rstack */ 131 int format; /* current file in mdoc or man format */ 132 char control; /* control character */ 133 char escape; /* escape character */ 134 }; 135 136 /* 137 * A macro definition, condition, or ignored block. 138 */ 139 struct roffnode { 140 enum roff_tok tok; /* type of node */ 141 struct roffnode *parent; /* up one in stack */ 142 int line; /* parse line */ 143 int col; /* parse col */ 144 char *name; /* node name, e.g. macro name */ 145 char *end; /* custom end macro of the block */ 146 int endspan; /* scope to: 1=eol 2=next line -1=\} */ 147 int rule; /* content is: 1=evaluated 0=skipped */ 148 }; 149 150 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 151 enum roff_tok tok, /* tok of macro */ \ 152 struct buf *buf, /* input buffer */ \ 153 int ln, /* parse line */ \ 154 int ppos, /* original pos in buffer */ \ 155 int pos, /* current pos in buffer */ \ 156 int *offs /* reset offset of buffer data */ 157 158 typedef int (*roffproc)(ROFF_ARGS); 159 160 struct roffmac { 161 roffproc proc; /* process new macro */ 162 roffproc text; /* process as child text of macro */ 163 roffproc sub; /* process as child of macro */ 164 int flags; 165 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 166 }; 167 168 struct predef { 169 const char *name; /* predefined input name */ 170 const char *str; /* replacement symbol */ 171 }; 172 173 #define PREDEF(__name, __str) \ 174 { (__name), (__str) }, 175 176 /* --- function prototypes ------------------------------------------------ */ 177 178 static int roffnode_cleanscope(struct roff *); 179 static int roffnode_pop(struct roff *); 180 static void roffnode_push(struct roff *, enum roff_tok, 181 const char *, int, int); 182 static void roff_addtbl(struct roff_man *, int, struct tbl_node *); 183 static int roff_als(ROFF_ARGS); 184 static int roff_block(ROFF_ARGS); 185 static int roff_block_text(ROFF_ARGS); 186 static int roff_block_sub(ROFF_ARGS); 187 static int roff_break(ROFF_ARGS); 188 static int roff_cblock(ROFF_ARGS); 189 static int roff_cc(ROFF_ARGS); 190 static int roff_ccond(struct roff *, int, int); 191 static int roff_char(ROFF_ARGS); 192 static int roff_cond(ROFF_ARGS); 193 static int roff_cond_checkend(ROFF_ARGS); 194 static int roff_cond_text(ROFF_ARGS); 195 static int roff_cond_sub(ROFF_ARGS); 196 static int roff_ds(ROFF_ARGS); 197 static int roff_ec(ROFF_ARGS); 198 static int roff_eo(ROFF_ARGS); 199 static int roff_eqndelim(struct roff *, struct buf *, int); 200 static int roff_evalcond(struct roff *, int, char *, int *); 201 static int roff_evalnum(struct roff *, int, 202 const char *, int *, int *, int); 203 static int roff_evalpar(struct roff *, int, 204 const char *, int *, int *, int); 205 static int roff_evalstrcond(const char *, int *); 206 static int roff_expand(struct roff *, struct buf *, 207 int, int, char); 208 static void roff_free1(struct roff *); 209 static void roff_freereg(struct roffreg *); 210 static void roff_freestr(struct roffkv *); 211 static size_t roff_getname(struct roff *, char **, int, int); 212 static int roff_getnum(const char *, int *, int *, int); 213 static int roff_getop(const char *, int *, char *); 214 static int roff_getregn(struct roff *, 215 const char *, size_t, char); 216 static int roff_getregro(const struct roff *, 217 const char *name); 218 static const char *roff_getstrn(struct roff *, 219 const char *, size_t, int *); 220 static int roff_hasregn(const struct roff *, 221 const char *, size_t); 222 static int roff_insec(ROFF_ARGS); 223 static int roff_it(ROFF_ARGS); 224 static int roff_line_ignore(ROFF_ARGS); 225 static void roff_man_alloc1(struct roff_man *); 226 static void roff_man_free1(struct roff_man *); 227 static int roff_manyarg(ROFF_ARGS); 228 static int roff_mc(ROFF_ARGS); 229 static int roff_noarg(ROFF_ARGS); 230 static int roff_nop(ROFF_ARGS); 231 static int roff_nr(ROFF_ARGS); 232 static int roff_onearg(ROFF_ARGS); 233 static enum roff_tok roff_parse(struct roff *, char *, int *, 234 int, int); 235 static int roff_parse_comment(struct roff *, struct buf *, 236 int, int, char); 237 static int roff_parsetext(struct roff *, struct buf *, 238 int, int *); 239 static int roff_renamed(ROFF_ARGS); 240 static int roff_req_or_macro(ROFF_ARGS); 241 static int roff_return(ROFF_ARGS); 242 static int roff_rm(ROFF_ARGS); 243 static int roff_rn(ROFF_ARGS); 244 static int roff_rr(ROFF_ARGS); 245 static void roff_setregn(struct roff *, const char *, 246 size_t, int, char, int); 247 static void roff_setstr(struct roff *, 248 const char *, const char *, int); 249 static void roff_setstrn(struct roffkv **, const char *, 250 size_t, const char *, size_t, int); 251 static int roff_shift(ROFF_ARGS); 252 static int roff_so(ROFF_ARGS); 253 static int roff_tr(ROFF_ARGS); 254 static int roff_Dd(ROFF_ARGS); 255 static int roff_TE(ROFF_ARGS); 256 static int roff_TS(ROFF_ARGS); 257 static int roff_EQ(ROFF_ARGS); 258 static int roff_EN(ROFF_ARGS); 259 static int roff_T_(ROFF_ARGS); 260 static int roff_unsupp(ROFF_ARGS); 261 static int roff_userdef(ROFF_ARGS); 262 263 /* --- constant data ------------------------------------------------------ */ 264 265 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 266 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 267 268 const char *__roff_name[MAN_MAX + 1] = { 269 "br", "ce", "fi", "ft", 270 "ll", "mc", "nf", 271 "po", "rj", "sp", 272 "ta", "ti", NULL, 273 "ab", "ad", "af", "aln", 274 "als", "am", "am1", "ami", 275 "ami1", "as", "as1", "asciify", 276 "backtrace", "bd", "bleedat", "blm", 277 "box", "boxa", "bp", "BP", 278 "break", "breakchar", "brnl", "brp", 279 "brpnl", "c2", "cc", 280 "cf", "cflags", "ch", "char", 281 "chop", "class", "close", "CL", 282 "color", "composite", "continue", "cp", 283 "cropat", "cs", "cu", "da", 284 "dch", "Dd", "de", "de1", 285 "defcolor", "dei", "dei1", "device", 286 "devicem", "di", "do", "ds", 287 "ds1", "dwh", "dt", "ec", 288 "ecr", "ecs", "el", "em", 289 "EN", "eo", "EP", "EQ", 290 "errprint", "ev", "evc", "ex", 291 "fallback", "fam", "fc", "fchar", 292 "fcolor", "fdeferlig", "feature", "fkern", 293 "fl", "flig", "fp", "fps", 294 "fschar", "fspacewidth", "fspecial", "ftr", 295 "fzoom", "gcolor", "hc", "hcode", 296 "hidechar", "hla", "hlm", "hpf", 297 "hpfa", "hpfcode", "hw", "hy", 298 "hylang", "hylen", "hym", "hypp", 299 "hys", "ie", "if", "ig", 300 "index", "it", "itc", "IX", 301 "kern", "kernafter", "kernbefore", "kernpair", 302 "lc", "lc_ctype", "lds", "length", 303 "letadj", "lf", "lg", "lhang", 304 "linetabs", "lnr", "lnrf", "lpfx", 305 "ls", "lsm", "lt", 306 "mediasize", "minss", "mk", "mso", 307 "na", "ne", "nh", "nhychar", 308 "nm", "nn", "nop", "nr", 309 "nrf", "nroff", "ns", "nx", 310 "open", "opena", "os", "output", 311 "padj", "papersize", "pc", "pev", 312 "pi", "PI", "pl", "pm", 313 "pn", "pnr", "ps", 314 "psbb", "pshape", "pso", "ptr", 315 "pvs", "rchar", "rd", "recursionlimit", 316 "return", "rfschar", "rhang", 317 "rm", "rn", "rnn", "rr", 318 "rs", "rt", "schar", "sentchar", 319 "shc", "shift", "sizes", "so", 320 "spacewidth", "special", "spreadwarn", "ss", 321 "sty", "substring", "sv", "sy", 322 "T&", "tc", "TE", 323 "TH", "tkf", "tl", 324 "tm", "tm1", "tmc", "tr", 325 "track", "transchar", "trf", "trimat", 326 "trin", "trnt", "troff", "TS", 327 "uf", "ul", "unformat", "unwatch", 328 "unwatchn", "vpt", "vs", "warn", 329 "warnscale", "watch", "watchlength", "watchn", 330 "wh", "while", "write", "writec", 331 "writem", "xflag", ".", NULL, 332 NULL, "text", 333 "Dd", "Dt", "Os", "Sh", 334 "Ss", "Pp", "D1", "Dl", 335 "Bd", "Ed", "Bl", "El", 336 "It", "Ad", "An", "Ap", 337 "Ar", "Cd", "Cm", "Dv", 338 "Er", "Ev", "Ex", "Fa", 339 "Fd", "Fl", "Fn", "Ft", 340 "Ic", "In", "Li", "Nd", 341 "Nm", "Op", "Ot", "Pa", 342 "Rv", "St", "Va", "Vt", 343 "Xr", "%A", "%B", "%D", 344 "%I", "%J", "%N", "%O", 345 "%P", "%R", "%T", "%V", 346 "Ac", "Ao", "Aq", "At", 347 "Bc", "Bf", "Bo", "Bq", 348 "Bsx", "Bx", "Db", "Dc", 349 "Do", "Dq", "Ec", "Ef", 350 "Em", "Eo", "Fx", "Ms", 351 "No", "Ns", "Nx", "Ox", 352 "Pc", "Pf", "Po", "Pq", 353 "Qc", "Ql", "Qo", "Qq", 354 "Re", "Rs", "Sc", "So", 355 "Sq", "Sm", "Sx", "Sy", 356 "Tn", "Ux", "Xc", "Xo", 357 "Fo", "Fc", "Oo", "Oc", 358 "Bk", "Ek", "Bt", "Hf", 359 "Fr", "Ud", "Lb", "Lp", 360 "Lk", "Mt", "Brq", "Bro", 361 "Brc", "%C", "Es", "En", 362 "Dx", "%Q", "%U", "Ta", 363 "Tg", NULL, 364 "TH", "SH", "SS", "TP", 365 "TQ", 366 "LP", "PP", "P", "IP", 367 "HP", "SM", "SB", "BI", 368 "IB", "BR", "RB", "R", 369 "B", "I", "IR", "RI", 370 "RE", "RS", "DT", "UC", 371 "PD", "AT", "in", 372 "SY", "YS", "OP", 373 "EX", "EE", "UR", 374 "UE", "MT", "ME", NULL 375 }; 376 const char *const *roff_name = __roff_name; 377 378 static struct roffmac roffs[TOKEN_NONE] = { 379 { roff_noarg, NULL, NULL, 0 }, /* br */ 380 { roff_onearg, NULL, NULL, 0 }, /* ce */ 381 { roff_noarg, NULL, NULL, 0 }, /* fi */ 382 { roff_onearg, NULL, NULL, 0 }, /* ft */ 383 { roff_onearg, NULL, NULL, 0 }, /* ll */ 384 { roff_mc, NULL, NULL, 0 }, /* mc */ 385 { roff_noarg, NULL, NULL, 0 }, /* nf */ 386 { roff_onearg, NULL, NULL, 0 }, /* po */ 387 { roff_onearg, NULL, NULL, 0 }, /* rj */ 388 { roff_onearg, NULL, NULL, 0 }, /* sp */ 389 { roff_manyarg, NULL, NULL, 0 }, /* ta */ 390 { roff_onearg, NULL, NULL, 0 }, /* ti */ 391 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ 392 { roff_unsupp, NULL, NULL, 0 }, /* ab */ 393 { roff_line_ignore, NULL, NULL, 0 }, /* ad */ 394 { roff_line_ignore, NULL, NULL, 0 }, /* af */ 395 { roff_unsupp, NULL, NULL, 0 }, /* aln */ 396 { roff_als, NULL, NULL, 0 }, /* als */ 397 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ 398 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ 399 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ 400 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ 401 { roff_ds, NULL, NULL, 0 }, /* as */ 402 { roff_ds, NULL, NULL, 0 }, /* as1 */ 403 { roff_unsupp, NULL, NULL, 0 }, /* asciify */ 404 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ 405 { roff_line_ignore, NULL, NULL, 0 }, /* bd */ 406 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ 407 { roff_unsupp, NULL, NULL, 0 }, /* blm */ 408 { roff_unsupp, NULL, NULL, 0 }, /* box */ 409 { roff_unsupp, NULL, NULL, 0 }, /* boxa */ 410 { roff_line_ignore, NULL, NULL, 0 }, /* bp */ 411 { roff_unsupp, NULL, NULL, 0 }, /* BP */ 412 { roff_break, NULL, NULL, 0 }, /* break */ 413 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ 414 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ 415 { roff_noarg, NULL, NULL, 0 }, /* brp */ 416 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ 417 { roff_unsupp, NULL, NULL, 0 }, /* c2 */ 418 { roff_cc, NULL, NULL, 0 }, /* cc */ 419 { roff_insec, NULL, NULL, 0 }, /* cf */ 420 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ 421 { roff_line_ignore, NULL, NULL, 0 }, /* ch */ 422 { roff_char, NULL, NULL, 0 }, /* char */ 423 { roff_unsupp, NULL, NULL, 0 }, /* chop */ 424 { roff_line_ignore, NULL, NULL, 0 }, /* class */ 425 { roff_insec, NULL, NULL, 0 }, /* close */ 426 { roff_unsupp, NULL, NULL, 0 }, /* CL */ 427 { roff_line_ignore, NULL, NULL, 0 }, /* color */ 428 { roff_unsupp, NULL, NULL, 0 }, /* composite */ 429 { roff_unsupp, NULL, NULL, 0 }, /* continue */ 430 { roff_line_ignore, NULL, NULL, 0 }, /* cp */ 431 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ 432 { roff_line_ignore, NULL, NULL, 0 }, /* cs */ 433 { roff_line_ignore, NULL, NULL, 0 }, /* cu */ 434 { roff_unsupp, NULL, NULL, 0 }, /* da */ 435 { roff_unsupp, NULL, NULL, 0 }, /* dch */ 436 { roff_Dd, NULL, NULL, 0 }, /* Dd */ 437 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ 438 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ 439 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ 440 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ 441 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ 442 { roff_unsupp, NULL, NULL, 0 }, /* device */ 443 { roff_unsupp, NULL, NULL, 0 }, /* devicem */ 444 { roff_unsupp, NULL, NULL, 0 }, /* di */ 445 { roff_unsupp, NULL, NULL, 0 }, /* do */ 446 { roff_ds, NULL, NULL, 0 }, /* ds */ 447 { roff_ds, NULL, NULL, 0 }, /* ds1 */ 448 { roff_unsupp, NULL, NULL, 0 }, /* dwh */ 449 { roff_unsupp, NULL, NULL, 0 }, /* dt */ 450 { roff_ec, NULL, NULL, 0 }, /* ec */ 451 { roff_unsupp, NULL, NULL, 0 }, /* ecr */ 452 { roff_unsupp, NULL, NULL, 0 }, /* ecs */ 453 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ 454 { roff_unsupp, NULL, NULL, 0 }, /* em */ 455 { roff_EN, NULL, NULL, 0 }, /* EN */ 456 { roff_eo, NULL, NULL, 0 }, /* eo */ 457 { roff_unsupp, NULL, NULL, 0 }, /* EP */ 458 { roff_EQ, NULL, NULL, 0 }, /* EQ */ 459 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ 460 { roff_unsupp, NULL, NULL, 0 }, /* ev */ 461 { roff_unsupp, NULL, NULL, 0 }, /* evc */ 462 { roff_unsupp, NULL, NULL, 0 }, /* ex */ 463 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ 464 { roff_line_ignore, NULL, NULL, 0 }, /* fam */ 465 { roff_unsupp, NULL, NULL, 0 }, /* fc */ 466 { roff_unsupp, NULL, NULL, 0 }, /* fchar */ 467 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ 468 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ 469 { roff_line_ignore, NULL, NULL, 0 }, /* feature */ 470 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ 471 { roff_line_ignore, NULL, NULL, 0 }, /* fl */ 472 { roff_line_ignore, NULL, NULL, 0 }, /* flig */ 473 { roff_line_ignore, NULL, NULL, 0 }, /* fp */ 474 { roff_line_ignore, NULL, NULL, 0 }, /* fps */ 475 { roff_unsupp, NULL, NULL, 0 }, /* fschar */ 476 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ 477 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ 478 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ 479 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ 480 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ 481 { roff_line_ignore, NULL, NULL, 0 }, /* hc */ 482 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ 483 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ 484 { roff_line_ignore, NULL, NULL, 0 }, /* hla */ 485 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ 486 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ 487 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ 488 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ 489 { roff_line_ignore, NULL, NULL, 0 }, /* hw */ 490 { roff_line_ignore, NULL, NULL, 0 }, /* hy */ 491 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ 492 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ 493 { roff_line_ignore, NULL, NULL, 0 }, /* hym */ 494 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ 495 { roff_line_ignore, NULL, NULL, 0 }, /* hys */ 496 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ 497 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ 498 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ 499 { roff_unsupp, NULL, NULL, 0 }, /* index */ 500 { roff_it, NULL, NULL, 0 }, /* it */ 501 { roff_unsupp, NULL, NULL, 0 }, /* itc */ 502 { roff_line_ignore, NULL, NULL, 0 }, /* IX */ 503 { roff_line_ignore, NULL, NULL, 0 }, /* kern */ 504 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ 505 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ 506 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ 507 { roff_unsupp, NULL, NULL, 0 }, /* lc */ 508 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ 509 { roff_unsupp, NULL, NULL, 0 }, /* lds */ 510 { roff_unsupp, NULL, NULL, 0 }, /* length */ 511 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ 512 { roff_insec, NULL, NULL, 0 }, /* lf */ 513 { roff_line_ignore, NULL, NULL, 0 }, /* lg */ 514 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ 515 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ 516 { roff_unsupp, NULL, NULL, 0 }, /* lnr */ 517 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ 518 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ 519 { roff_line_ignore, NULL, NULL, 0 }, /* ls */ 520 { roff_unsupp, NULL, NULL, 0 }, /* lsm */ 521 { roff_line_ignore, NULL, NULL, 0 }, /* lt */ 522 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ 523 { roff_line_ignore, NULL, NULL, 0 }, /* minss */ 524 { roff_line_ignore, NULL, NULL, 0 }, /* mk */ 525 { roff_insec, NULL, NULL, 0 }, /* mso */ 526 { roff_line_ignore, NULL, NULL, 0 }, /* na */ 527 { roff_line_ignore, NULL, NULL, 0 }, /* ne */ 528 { roff_line_ignore, NULL, NULL, 0 }, /* nh */ 529 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ 530 { roff_unsupp, NULL, NULL, 0 }, /* nm */ 531 { roff_unsupp, NULL, NULL, 0 }, /* nn */ 532 { roff_nop, NULL, NULL, 0 }, /* nop */ 533 { roff_nr, NULL, NULL, 0 }, /* nr */ 534 { roff_unsupp, NULL, NULL, 0 }, /* nrf */ 535 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ 536 { roff_line_ignore, NULL, NULL, 0 }, /* ns */ 537 { roff_insec, NULL, NULL, 0 }, /* nx */ 538 { roff_insec, NULL, NULL, 0 }, /* open */ 539 { roff_insec, NULL, NULL, 0 }, /* opena */ 540 { roff_line_ignore, NULL, NULL, 0 }, /* os */ 541 { roff_unsupp, NULL, NULL, 0 }, /* output */ 542 { roff_line_ignore, NULL, NULL, 0 }, /* padj */ 543 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ 544 { roff_line_ignore, NULL, NULL, 0 }, /* pc */ 545 { roff_line_ignore, NULL, NULL, 0 }, /* pev */ 546 { roff_insec, NULL, NULL, 0 }, /* pi */ 547 { roff_unsupp, NULL, NULL, 0 }, /* PI */ 548 { roff_line_ignore, NULL, NULL, 0 }, /* pl */ 549 { roff_line_ignore, NULL, NULL, 0 }, /* pm */ 550 { roff_line_ignore, NULL, NULL, 0 }, /* pn */ 551 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ 552 { roff_line_ignore, NULL, NULL, 0 }, /* ps */ 553 { roff_unsupp, NULL, NULL, 0 }, /* psbb */ 554 { roff_unsupp, NULL, NULL, 0 }, /* pshape */ 555 { roff_insec, NULL, NULL, 0 }, /* pso */ 556 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ 557 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ 558 { roff_unsupp, NULL, NULL, 0 }, /* rchar */ 559 { roff_line_ignore, NULL, NULL, 0 }, /* rd */ 560 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ 561 { roff_return, NULL, NULL, 0 }, /* return */ 562 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ 563 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ 564 { roff_rm, NULL, NULL, 0 }, /* rm */ 565 { roff_rn, NULL, NULL, 0 }, /* rn */ 566 { roff_unsupp, NULL, NULL, 0 }, /* rnn */ 567 { roff_rr, NULL, NULL, 0 }, /* rr */ 568 { roff_line_ignore, NULL, NULL, 0 }, /* rs */ 569 { roff_line_ignore, NULL, NULL, 0 }, /* rt */ 570 { roff_unsupp, NULL, NULL, 0 }, /* schar */ 571 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ 572 { roff_line_ignore, NULL, NULL, 0 }, /* shc */ 573 { roff_shift, NULL, NULL, 0 }, /* shift */ 574 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ 575 { roff_so, NULL, NULL, 0 }, /* so */ 576 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ 577 { roff_line_ignore, NULL, NULL, 0 }, /* special */ 578 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ 579 { roff_line_ignore, NULL, NULL, 0 }, /* ss */ 580 { roff_line_ignore, NULL, NULL, 0 }, /* sty */ 581 { roff_unsupp, NULL, NULL, 0 }, /* substring */ 582 { roff_line_ignore, NULL, NULL, 0 }, /* sv */ 583 { roff_insec, NULL, NULL, 0 }, /* sy */ 584 { roff_T_, NULL, NULL, 0 }, /* T& */ 585 { roff_unsupp, NULL, NULL, 0 }, /* tc */ 586 { roff_TE, NULL, NULL, 0 }, /* TE */ 587 { roff_Dd, NULL, NULL, 0 }, /* TH */ 588 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ 589 { roff_unsupp, NULL, NULL, 0 }, /* tl */ 590 { roff_line_ignore, NULL, NULL, 0 }, /* tm */ 591 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ 592 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ 593 { roff_tr, NULL, NULL, 0 }, /* tr */ 594 { roff_line_ignore, NULL, NULL, 0 }, /* track */ 595 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ 596 { roff_insec, NULL, NULL, 0 }, /* trf */ 597 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ 598 { roff_unsupp, NULL, NULL, 0 }, /* trin */ 599 { roff_unsupp, NULL, NULL, 0 }, /* trnt */ 600 { roff_line_ignore, NULL, NULL, 0 }, /* troff */ 601 { roff_TS, NULL, NULL, 0 }, /* TS */ 602 { roff_line_ignore, NULL, NULL, 0 }, /* uf */ 603 { roff_line_ignore, NULL, NULL, 0 }, /* ul */ 604 { roff_unsupp, NULL, NULL, 0 }, /* unformat */ 605 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ 606 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ 607 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ 608 { roff_line_ignore, NULL, NULL, 0 }, /* vs */ 609 { roff_line_ignore, NULL, NULL, 0 }, /* warn */ 610 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ 611 { roff_line_ignore, NULL, NULL, 0 }, /* watch */ 612 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ 613 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ 614 { roff_unsupp, NULL, NULL, 0 }, /* wh */ 615 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/ 616 { roff_insec, NULL, NULL, 0 }, /* write */ 617 { roff_insec, NULL, NULL, 0 }, /* writec */ 618 { roff_insec, NULL, NULL, 0 }, /* writem */ 619 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ 620 { roff_cblock, NULL, NULL, 0 }, /* . */ 621 { roff_renamed, NULL, NULL, 0 }, 622 { roff_userdef, NULL, NULL, 0 } 623 }; 624 625 /* Array of injected predefined strings. */ 626 #define PREDEFS_MAX 38 627 static const struct predef predefs[PREDEFS_MAX] = { 628 #include "predefs.in" 629 }; 630 631 static int roffce_lines; /* number of input lines to center */ 632 static struct roff_node *roffce_node; /* active request */ 633 static int roffit_lines; /* number of lines to delay */ 634 static char *roffit_macro; /* nil-terminated macro line */ 635 636 637 /* --- request table ------------------------------------------------------ */ 638 639 struct ohash * 640 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) 641 { 642 struct ohash *htab; 643 struct roffreq *req; 644 enum roff_tok tok; 645 size_t sz; 646 unsigned int slot; 647 648 htab = mandoc_malloc(sizeof(*htab)); 649 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); 650 651 for (tok = mintok; tok < maxtok; tok++) { 652 if (roff_name[tok] == NULL) 653 continue; 654 sz = strlen(roff_name[tok]); 655 req = mandoc_malloc(sizeof(*req) + sz + 1); 656 req->tok = tok; 657 memcpy(req->name, roff_name[tok], sz + 1); 658 slot = ohash_qlookup(htab, req->name); 659 ohash_insert(htab, slot, req); 660 } 661 return htab; 662 } 663 664 void 665 roffhash_free(struct ohash *htab) 666 { 667 struct roffreq *req; 668 unsigned int slot; 669 670 if (htab == NULL) 671 return; 672 for (req = ohash_first(htab, &slot); req != NULL; 673 req = ohash_next(htab, &slot)) 674 free(req); 675 ohash_delete(htab); 676 free(htab); 677 } 678 679 enum roff_tok 680 roffhash_find(struct ohash *htab, const char *name, size_t sz) 681 { 682 struct roffreq *req; 683 const char *end; 684 685 if (sz) { 686 end = name + sz; 687 req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); 688 } else 689 req = ohash_find(htab, ohash_qlookup(htab, name)); 690 return req == NULL ? TOKEN_NONE : req->tok; 691 } 692 693 /* --- stack of request blocks -------------------------------------------- */ 694 695 /* 696 * Pop the current node off of the stack of roff instructions currently 697 * pending. Return 1 if it is a loop or 0 otherwise. 698 */ 699 static int 700 roffnode_pop(struct roff *r) 701 { 702 struct roffnode *p; 703 int inloop; 704 705 p = r->last; 706 inloop = p->tok == ROFF_while; 707 r->last = p->parent; 708 free(p->name); 709 free(p->end); 710 free(p); 711 return inloop; 712 } 713 714 /* 715 * Push a roff node onto the instruction stack. This must later be 716 * removed with roffnode_pop(). 717 */ 718 static void 719 roffnode_push(struct roff *r, enum roff_tok tok, const char *name, 720 int line, int col) 721 { 722 struct roffnode *p; 723 724 p = mandoc_calloc(1, sizeof(struct roffnode)); 725 p->tok = tok; 726 if (name) 727 p->name = mandoc_strdup(name); 728 p->parent = r->last; 729 p->line = line; 730 p->col = col; 731 p->rule = p->parent ? p->parent->rule : 0; 732 733 r->last = p; 734 } 735 736 /* --- roff parser state data management ---------------------------------- */ 737 738 static void 739 roff_free1(struct roff *r) 740 { 741 int i; 742 743 tbl_free(r->first_tbl); 744 r->first_tbl = r->last_tbl = r->tbl = NULL; 745 746 eqn_free(r->last_eqn); 747 r->last_eqn = r->eqn = NULL; 748 749 while (r->mstackpos >= 0) 750 roff_userret(r); 751 752 while (r->last) 753 roffnode_pop(r); 754 755 free (r->rstack); 756 r->rstack = NULL; 757 r->rstacksz = 0; 758 r->rstackpos = -1; 759 760 roff_freereg(r->regtab); 761 r->regtab = NULL; 762 763 roff_freestr(r->strtab); 764 roff_freestr(r->rentab); 765 roff_freestr(r->xmbtab); 766 r->strtab = r->rentab = r->xmbtab = NULL; 767 768 if (r->xtab) 769 for (i = 0; i < 128; i++) 770 free(r->xtab[i].p); 771 free(r->xtab); 772 r->xtab = NULL; 773 } 774 775 void 776 roff_reset(struct roff *r) 777 { 778 roff_free1(r); 779 r->options |= MPARSE_COMMENT; 780 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 781 r->control = '\0'; 782 r->escape = '\\'; 783 roffce_lines = 0; 784 roffce_node = NULL; 785 roffit_lines = 0; 786 roffit_macro = NULL; 787 } 788 789 void 790 roff_free(struct roff *r) 791 { 792 int i; 793 794 roff_free1(r); 795 for (i = 0; i < r->mstacksz; i++) 796 free(r->mstack[i].argv); 797 free(r->mstack); 798 roffhash_free(r->reqtab); 799 free(r); 800 } 801 802 struct roff * 803 roff_alloc(int options) 804 { 805 struct roff *r; 806 807 r = mandoc_calloc(1, sizeof(struct roff)); 808 r->reqtab = roffhash_alloc(0, ROFF_RENAMED); 809 r->options = options | MPARSE_COMMENT; 810 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 811 r->mstackpos = -1; 812 r->rstackpos = -1; 813 r->escape = '\\'; 814 return r; 815 } 816 817 /* --- syntax tree state data management ---------------------------------- */ 818 819 static void 820 roff_man_free1(struct roff_man *man) 821 { 822 if (man->meta.first != NULL) 823 roff_node_delete(man, man->meta.first); 824 free(man->meta.msec); 825 free(man->meta.vol); 826 free(man->meta.os); 827 free(man->meta.arch); 828 free(man->meta.title); 829 free(man->meta.name); 830 free(man->meta.date); 831 free(man->meta.sodest); 832 } 833 834 void 835 roff_state_reset(struct roff_man *man) 836 { 837 man->last = man->meta.first; 838 man->last_es = NULL; 839 man->flags = 0; 840 man->lastsec = man->lastnamed = SEC_NONE; 841 man->next = ROFF_NEXT_CHILD; 842 roff_setreg(man->roff, "nS", 0, '='); 843 } 844 845 static void 846 roff_man_alloc1(struct roff_man *man) 847 { 848 memset(&man->meta, 0, sizeof(man->meta)); 849 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first)); 850 man->meta.first->type = ROFFT_ROOT; 851 man->meta.macroset = MACROSET_NONE; 852 roff_state_reset(man); 853 } 854 855 void 856 roff_man_reset(struct roff_man *man) 857 { 858 roff_man_free1(man); 859 roff_man_alloc1(man); 860 } 861 862 void 863 roff_man_free(struct roff_man *man) 864 { 865 roff_man_free1(man); 866 free(man->os_r); 867 free(man); 868 } 869 870 struct roff_man * 871 roff_man_alloc(struct roff *roff, const char *os_s, int quick) 872 { 873 struct roff_man *man; 874 875 man = mandoc_calloc(1, sizeof(*man)); 876 man->roff = roff; 877 man->os_s = os_s; 878 man->quick = quick; 879 roff_man_alloc1(man); 880 roff->man = man; 881 return man; 882 } 883 884 /* --- syntax tree handling ----------------------------------------------- */ 885 886 struct roff_node * 887 roff_node_alloc(struct roff_man *man, int line, int pos, 888 enum roff_type type, int tok) 889 { 890 struct roff_node *n; 891 892 n = mandoc_calloc(1, sizeof(*n)); 893 n->line = line; 894 n->pos = pos; 895 n->tok = tok; 896 n->type = type; 897 n->sec = man->lastsec; 898 899 if (man->flags & MDOC_SYNOPSIS) 900 n->flags |= NODE_SYNPRETTY; 901 else 902 n->flags &= ~NODE_SYNPRETTY; 903 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL) 904 n->flags |= NODE_NOFILL; 905 else 906 n->flags &= ~NODE_NOFILL; 907 if (man->flags & MDOC_NEWLINE) 908 n->flags |= NODE_LINE; 909 man->flags &= ~MDOC_NEWLINE; 910 911 return n; 912 } 913 914 void 915 roff_node_append(struct roff_man *man, struct roff_node *n) 916 { 917 918 switch (man->next) { 919 case ROFF_NEXT_SIBLING: 920 if (man->last->next != NULL) { 921 n->next = man->last->next; 922 man->last->next->prev = n; 923 } else 924 man->last->parent->last = n; 925 man->last->next = n; 926 n->prev = man->last; 927 n->parent = man->last->parent; 928 break; 929 case ROFF_NEXT_CHILD: 930 if (man->last->child != NULL) { 931 n->next = man->last->child; 932 man->last->child->prev = n; 933 } else 934 man->last->last = n; 935 man->last->child = n; 936 n->parent = man->last; 937 break; 938 default: 939 abort(); 940 } 941 man->last = n; 942 943 switch (n->type) { 944 case ROFFT_HEAD: 945 n->parent->head = n; 946 break; 947 case ROFFT_BODY: 948 if (n->end != ENDBODY_NOT) 949 return; 950 n->parent->body = n; 951 break; 952 case ROFFT_TAIL: 953 n->parent->tail = n; 954 break; 955 default: 956 return; 957 } 958 959 /* 960 * Copy over the normalised-data pointer of our parent. Not 961 * everybody has one, but copying a null pointer is fine. 962 */ 963 964 n->norm = n->parent->norm; 965 assert(n->parent->type == ROFFT_BLOCK); 966 } 967 968 void 969 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 970 { 971 struct roff_node *n; 972 973 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 974 n->string = roff_strdup(man->roff, word); 975 roff_node_append(man, n); 976 n->flags |= NODE_VALID | NODE_ENDED; 977 man->next = ROFF_NEXT_SIBLING; 978 } 979 980 void 981 roff_word_append(struct roff_man *man, const char *word) 982 { 983 struct roff_node *n; 984 char *addstr, *newstr; 985 986 n = man->last; 987 addstr = roff_strdup(man->roff, word); 988 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 989 free(addstr); 990 free(n->string); 991 n->string = newstr; 992 man->next = ROFF_NEXT_SIBLING; 993 } 994 995 void 996 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 997 { 998 struct roff_node *n; 999 1000 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 1001 roff_node_append(man, n); 1002 man->next = ROFF_NEXT_CHILD; 1003 } 1004 1005 struct roff_node * 1006 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 1007 { 1008 struct roff_node *n; 1009 1010 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 1011 roff_node_append(man, n); 1012 man->next = ROFF_NEXT_CHILD; 1013 return n; 1014 } 1015 1016 struct roff_node * 1017 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 1018 { 1019 struct roff_node *n; 1020 1021 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 1022 roff_node_append(man, n); 1023 man->next = ROFF_NEXT_CHILD; 1024 return n; 1025 } 1026 1027 struct roff_node * 1028 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 1029 { 1030 struct roff_node *n; 1031 1032 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 1033 roff_node_append(man, n); 1034 man->next = ROFF_NEXT_CHILD; 1035 return n; 1036 } 1037 1038 static void 1039 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl) 1040 { 1041 struct roff_node *n; 1042 struct tbl_span *span; 1043 1044 if (man->meta.macroset == MACROSET_MAN) 1045 man_breakscope(man, ROFF_TS); 1046 while ((span = tbl_span(tbl)) != NULL) { 1047 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE); 1048 n->span = span; 1049 roff_node_append(man, n); 1050 n->flags |= NODE_VALID | NODE_ENDED; 1051 man->next = ROFF_NEXT_SIBLING; 1052 } 1053 } 1054 1055 void 1056 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1057 { 1058 1059 /* Adjust siblings. */ 1060 1061 if (n->prev) 1062 n->prev->next = n->next; 1063 if (n->next) 1064 n->next->prev = n->prev; 1065 1066 /* Adjust parent. */ 1067 1068 if (n->parent != NULL) { 1069 if (n->parent->child == n) 1070 n->parent->child = n->next; 1071 if (n->parent->last == n) 1072 n->parent->last = n->prev; 1073 } 1074 1075 /* Adjust parse point. */ 1076 1077 if (man == NULL) 1078 return; 1079 if (man->last == n) { 1080 if (n->prev == NULL) { 1081 man->last = n->parent; 1082 man->next = ROFF_NEXT_CHILD; 1083 } else { 1084 man->last = n->prev; 1085 man->next = ROFF_NEXT_SIBLING; 1086 } 1087 } 1088 if (man->meta.first == n) 1089 man->meta.first = NULL; 1090 } 1091 1092 void 1093 roff_node_relink(struct roff_man *man, struct roff_node *n) 1094 { 1095 roff_node_unlink(man, n); 1096 n->prev = n->next = NULL; 1097 roff_node_append(man, n); 1098 } 1099 1100 void 1101 roff_node_free(struct roff_node *n) 1102 { 1103 1104 if (n->args != NULL) 1105 mdoc_argv_free(n->args); 1106 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1107 free(n->norm); 1108 eqn_box_free(n->eqn); 1109 free(n->string); 1110 free(n->tag); 1111 free(n); 1112 } 1113 1114 void 1115 roff_node_delete(struct roff_man *man, struct roff_node *n) 1116 { 1117 1118 while (n->child != NULL) 1119 roff_node_delete(man, n->child); 1120 roff_node_unlink(man, n); 1121 roff_node_free(n); 1122 } 1123 1124 int 1125 roff_node_transparent(struct roff_node *n) 1126 { 1127 if (n == NULL) 1128 return 0; 1129 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) 1130 return 1; 1131 return roff_tok_transparent(n->tok); 1132 } 1133 1134 int 1135 roff_tok_transparent(enum roff_tok tok) 1136 { 1137 switch (tok) { 1138 case ROFF_ft: 1139 case ROFF_ll: 1140 case ROFF_mc: 1141 case ROFF_po: 1142 case ROFF_ta: 1143 case MDOC_Db: 1144 case MDOC_Es: 1145 case MDOC_Sm: 1146 case MDOC_Tg: 1147 case MAN_DT: 1148 case MAN_UC: 1149 case MAN_PD: 1150 case MAN_AT: 1151 return 1; 1152 default: 1153 return 0; 1154 } 1155 } 1156 1157 struct roff_node * 1158 roff_node_child(struct roff_node *n) 1159 { 1160 for (n = n->child; roff_node_transparent(n); n = n->next) 1161 continue; 1162 return n; 1163 } 1164 1165 struct roff_node * 1166 roff_node_prev(struct roff_node *n) 1167 { 1168 do { 1169 n = n->prev; 1170 } while (roff_node_transparent(n)); 1171 return n; 1172 } 1173 1174 struct roff_node * 1175 roff_node_next(struct roff_node *n) 1176 { 1177 do { 1178 n = n->next; 1179 } while (roff_node_transparent(n)); 1180 return n; 1181 } 1182 1183 void 1184 deroff(char **dest, const struct roff_node *n) 1185 { 1186 char *cp; 1187 size_t sz; 1188 1189 if (n->string == NULL) { 1190 for (n = n->child; n != NULL; n = n->next) 1191 deroff(dest, n); 1192 return; 1193 } 1194 1195 /* Skip leading whitespace. */ 1196 1197 for (cp = n->string; *cp != '\0'; cp++) { 1198 if (cp[0] == '\\' && cp[1] != '\0' && 1199 strchr(" %&0^|~", cp[1]) != NULL) 1200 cp++; 1201 else if ( ! isspace((unsigned char)*cp)) 1202 break; 1203 } 1204 1205 /* Skip trailing backslash. */ 1206 1207 sz = strlen(cp); 1208 if (sz > 0 && cp[sz - 1] == '\\') 1209 sz--; 1210 1211 /* Skip trailing whitespace. */ 1212 1213 for (; sz; sz--) 1214 if ( ! isspace((unsigned char)cp[sz-1])) 1215 break; 1216 1217 /* Skip empty strings. */ 1218 1219 if (sz == 0) 1220 return; 1221 1222 if (*dest == NULL) { 1223 *dest = mandoc_strndup(cp, sz); 1224 return; 1225 } 1226 1227 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1228 free(*dest); 1229 *dest = cp; 1230 } 1231 1232 /* --- main functions of the roff parser ---------------------------------- */ 1233 1234 static int 1235 roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos, 1236 char newesc) 1237 { 1238 struct roff_node *n; /* used for header comments */ 1239 const char *start; /* start of the string to process */ 1240 const char *cp; /* for RCS id parsing */ 1241 char *stesc; /* start of an escape sequence ('\\') */ 1242 char *ep; /* end of comment string */ 1243 int rcsid; /* kind of RCS id seen */ 1244 1245 for (start = stesc = buf->buf + pos;; stesc++) { 1246 /* The line ends without continuation or comment. */ 1247 if (stesc[0] == '\0') 1248 return ROFF_CONT; 1249 1250 /* Unescaped byte: skip it. */ 1251 if (stesc[0] != newesc) 1252 continue; 1253 1254 /* Backslash at end of line requests line continuation. */ 1255 if (stesc[1] == '\0') { 1256 stesc[0] = '\0'; 1257 return ROFF_IGN | ROFF_APPEND; 1258 } 1259 1260 /* Found a comment: process it. */ 1261 if (stesc[1] == '"' || stesc[1] == '#') 1262 break; 1263 1264 /* Escaped escape character: skip them both. */ 1265 if (stesc[1] == newesc) 1266 stesc++; 1267 } 1268 1269 /* Look for an RCS id in the comment. */ 1270 1271 rcsid = 0; 1272 if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) { 1273 rcsid = 1 << MANDOC_OS_OPENBSD; 1274 cp += 8; 1275 } else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) { 1276 rcsid = 1 << MANDOC_OS_NETBSD; 1277 cp += 7; 1278 } 1279 if (cp != NULL && isalnum((unsigned char)*cp) == 0 && 1280 strchr(cp, '$') != NULL) { 1281 if (r->man->meta.rcsids & rcsid) 1282 mandoc_msg(MANDOCERR_RCS_REP, ln, 1283 (int)(stesc - buf->buf) + 2, "%s", stesc + 1); 1284 r->man->meta.rcsids |= rcsid; 1285 } 1286 1287 /* Warn about trailing whitespace at the end of the comment. */ 1288 1289 ep = strchr(stesc + 2, '\0') - 1; 1290 if (*ep == '\n') 1291 *ep-- = '\0'; 1292 if (*ep == ' ' || *ep == '\t') 1293 mandoc_msg(MANDOCERR_SPACE_EOL, 1294 ln, (int)(ep - buf->buf), NULL); 1295 1296 /* Save comments preceding the title macro in the syntax tree. */ 1297 1298 if (r->options & MPARSE_COMMENT) { 1299 while (*ep == ' ' || *ep == '\t') 1300 ep--; 1301 ep[1] = '\0'; 1302 n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf, 1303 ROFFT_COMMENT, TOKEN_NONE); 1304 n->string = mandoc_strdup(stesc + 2); 1305 roff_node_append(r->man, n); 1306 n->flags |= NODE_VALID | NODE_ENDED; 1307 r->man->next = ROFF_NEXT_SIBLING; 1308 } 1309 1310 /* The comment requests line continuation. */ 1311 1312 if (stesc[1] == '#') { 1313 *stesc = '\0'; 1314 return ROFF_IGN | ROFF_APPEND; 1315 } 1316 1317 /* Discard the comment including preceding whitespace. */ 1318 1319 while (stesc > start && stesc[-1] == ' ' && 1320 (stesc == start + 1 || stesc[-2] != '\\')) 1321 stesc--; 1322 *stesc = '\0'; 1323 return ROFF_CONT; 1324 } 1325 1326 /* 1327 * In the current line, expand escape sequences that produce parsable 1328 * input text. Also check the syntax of the remaining escape sequences, 1329 * which typically produce output glyphs or change formatter state. 1330 */ 1331 static int 1332 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc) 1333 { 1334 struct mctx *ctx; /* current macro call context */ 1335 char ubuf[24]; /* buffer to print the number */ 1336 const char *start; /* start of the string to process */ 1337 char *stesc; /* start of an escape sequence ('\\') */ 1338 const char *esct; /* type of esccape sequence */ 1339 const char *stnam; /* start of the name, after "[(*" */ 1340 const char *cp; /* end of the name, e.g. before ']' */ 1341 const char *res; /* the string to be substituted */ 1342 char *nbuf; /* new buffer to copy buf->buf to */ 1343 size_t maxl; /* expected length of the escape name */ 1344 size_t naml; /* actual length of the escape name */ 1345 size_t asz; /* length of the replacement */ 1346 size_t rsz; /* length of the rest of the string */ 1347 int inaml; /* length returned from mandoc_escape() */ 1348 int expand_count; /* to avoid infinite loops */ 1349 int npos; /* position in numeric expression */ 1350 int arg_complete; /* argument not interrupted by eol */ 1351 int quote_args; /* true for \\$@, false for \\$* */ 1352 int deftype; /* type of definition to paste */ 1353 enum mandocerr err; /* for escape sequence problems */ 1354 char sign; /* increment number register */ 1355 char term; /* character terminating the escape */ 1356 1357 start = buf->buf + pos; 1358 stesc = strchr(start, '\0') - 1; 1359 if (stesc >= start && *stesc == '\n') 1360 *stesc-- = '\0'; 1361 1362 expand_count = 0; 1363 while (stesc >= start) { 1364 if (*stesc != newesc) { 1365 1366 /* 1367 * If we have a non-standard escape character, 1368 * escape literal backslashes because all 1369 * processing in subsequent functions uses 1370 * the standard escaping rules. 1371 */ 1372 1373 if (newesc != ASCII_ESC && *stesc == '\\') { 1374 *stesc = '\0'; 1375 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", 1376 buf->buf, stesc + 1) + 1; 1377 start = nbuf + pos; 1378 stesc = nbuf + (stesc - buf->buf); 1379 free(buf->buf); 1380 buf->buf = nbuf; 1381 } 1382 1383 /* Search backwards for the next escape. */ 1384 1385 stesc--; 1386 continue; 1387 } 1388 1389 /* If it is escaped, skip it. */ 1390 1391 for (cp = stesc - 1; cp >= start; cp--) 1392 if (*cp != r->escape) 1393 break; 1394 1395 if ((stesc - cp) % 2 == 0) { 1396 while (stesc > cp) 1397 *stesc-- = '\\'; 1398 continue; 1399 } else if (stesc[1] == '\0') { 1400 *stesc-- = '\0'; 1401 continue; 1402 } else 1403 *stesc = '\\'; 1404 1405 /* Decide whether to expand or to check only. */ 1406 1407 term = '\0'; 1408 cp = stesc + 1; 1409 while (*cp == 'E') 1410 cp++; 1411 esct = cp; 1412 switch (*esct) { 1413 case '*': 1414 case '$': 1415 res = NULL; 1416 break; 1417 case 'B': 1418 case 'w': 1419 term = cp[1]; 1420 /* FALLTHROUGH */ 1421 case 'n': 1422 sign = cp[1]; 1423 if (sign == '+' || sign == '-') 1424 cp++; 1425 res = ubuf; 1426 break; 1427 default: 1428 err = MANDOCERR_OK; 1429 switch(mandoc_escape(&cp, &stnam, &inaml)) { 1430 case ESCAPE_SPECIAL: 1431 if (mchars_spec2cp(stnam, inaml) >= 0) 1432 break; 1433 /* FALLTHROUGH */ 1434 case ESCAPE_ERROR: 1435 err = MANDOCERR_ESC_BAD; 1436 break; 1437 case ESCAPE_UNDEF: 1438 err = MANDOCERR_ESC_UNDEF; 1439 break; 1440 case ESCAPE_UNSUPP: 1441 err = MANDOCERR_ESC_UNSUPP; 1442 break; 1443 default: 1444 break; 1445 } 1446 if (err != MANDOCERR_OK) 1447 mandoc_msg(err, ln, (int)(stesc - buf->buf), 1448 "%.*s", (int)(cp - stesc), stesc); 1449 stesc--; 1450 continue; 1451 } 1452 1453 if (EXPAND_LIMIT < ++expand_count) { 1454 mandoc_msg(MANDOCERR_ROFFLOOP, 1455 ln, (int)(stesc - buf->buf), NULL); 1456 return ROFF_IGN; 1457 } 1458 1459 /* 1460 * The third character decides the length 1461 * of the name of the string or register. 1462 * Save a pointer to the name. 1463 */ 1464 1465 if (term == '\0') { 1466 switch (*++cp) { 1467 case '\0': 1468 maxl = 0; 1469 break; 1470 case '(': 1471 cp++; 1472 maxl = 2; 1473 break; 1474 case '[': 1475 cp++; 1476 term = ']'; 1477 maxl = 0; 1478 break; 1479 default: 1480 maxl = 1; 1481 break; 1482 } 1483 } else { 1484 cp += 2; 1485 maxl = 0; 1486 } 1487 stnam = cp; 1488 1489 /* Advance to the end of the name. */ 1490 1491 naml = 0; 1492 arg_complete = 1; 1493 while (maxl == 0 || naml < maxl) { 1494 if (*cp == '\0') { 1495 mandoc_msg(MANDOCERR_ESC_BAD, ln, 1496 (int)(stesc - buf->buf), "%s", stesc); 1497 arg_complete = 0; 1498 break; 1499 } 1500 if (maxl == 0 && *cp == term) { 1501 cp++; 1502 break; 1503 } 1504 if (*cp++ != '\\' || *esct != 'w') { 1505 naml++; 1506 continue; 1507 } 1508 switch (mandoc_escape(&cp, NULL, NULL)) { 1509 case ESCAPE_SPECIAL: 1510 case ESCAPE_UNICODE: 1511 case ESCAPE_NUMBERED: 1512 case ESCAPE_UNDEF: 1513 case ESCAPE_OVERSTRIKE: 1514 naml++; 1515 break; 1516 default: 1517 break; 1518 } 1519 } 1520 1521 /* 1522 * Retrieve the replacement string; if it is 1523 * undefined, resume searching for escapes. 1524 */ 1525 1526 switch (*esct) { 1527 case '*': 1528 if (arg_complete) { 1529 deftype = ROFFDEF_USER | ROFFDEF_PRE; 1530 res = roff_getstrn(r, stnam, naml, &deftype); 1531 1532 /* 1533 * If not overriden, let \*(.T 1534 * through to the formatters. 1535 */ 1536 1537 if (res == NULL && naml == 2 && 1538 stnam[0] == '.' && stnam[1] == 'T') { 1539 roff_setstrn(&r->strtab, 1540 ".T", 2, NULL, 0, 0); 1541 stesc--; 1542 continue; 1543 } 1544 } 1545 break; 1546 case '$': 1547 if (r->mstackpos < 0) { 1548 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, 1549 (int)(stesc - buf->buf), "%.3s", stesc); 1550 break; 1551 } 1552 ctx = r->mstack + r->mstackpos; 1553 npos = esct[1] - '1'; 1554 if (npos >= 0 && npos <= 8) { 1555 res = npos < ctx->argc ? 1556 ctx->argv[npos] : ""; 1557 break; 1558 } 1559 if (esct[1] == '*') 1560 quote_args = 0; 1561 else if (esct[1] == '@') 1562 quote_args = 1; 1563 else { 1564 mandoc_msg(MANDOCERR_ARG_NONUM, ln, 1565 (int)(stesc - buf->buf), "%.3s", stesc); 1566 break; 1567 } 1568 asz = 0; 1569 for (npos = 0; npos < ctx->argc; npos++) { 1570 if (npos) 1571 asz++; /* blank */ 1572 if (quote_args) 1573 asz += 2; /* quotes */ 1574 asz += strlen(ctx->argv[npos]); 1575 } 1576 if (asz != 3) { 1577 rsz = buf->sz - (stesc - buf->buf) - 3; 1578 if (asz < 3) 1579 memmove(stesc + asz, stesc + 3, rsz); 1580 buf->sz += asz - 3; 1581 nbuf = mandoc_realloc(buf->buf, buf->sz); 1582 start = nbuf + pos; 1583 stesc = nbuf + (stesc - buf->buf); 1584 buf->buf = nbuf; 1585 if (asz > 3) 1586 memmove(stesc + asz, stesc + 3, rsz); 1587 } 1588 for (npos = 0; npos < ctx->argc; npos++) { 1589 if (npos) 1590 *stesc++ = ' '; 1591 if (quote_args) 1592 *stesc++ = '"'; 1593 cp = ctx->argv[npos]; 1594 while (*cp != '\0') 1595 *stesc++ = *cp++; 1596 if (quote_args) 1597 *stesc++ = '"'; 1598 } 1599 continue; 1600 case 'B': 1601 npos = 0; 1602 ubuf[0] = arg_complete && 1603 roff_evalnum(r, ln, stnam, &npos, 1604 NULL, ROFFNUM_SCALE) && 1605 stnam + npos + 1 == cp ? '1' : '0'; 1606 ubuf[1] = '\0'; 1607 break; 1608 case 'n': 1609 if (arg_complete) 1610 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1611 roff_getregn(r, stnam, naml, sign)); 1612 else 1613 ubuf[0] = '\0'; 1614 break; 1615 case 'w': 1616 /* use even incomplete args */ 1617 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1618 24 * (int)naml); 1619 break; 1620 } 1621 1622 if (res == NULL) { 1623 if (*esct == '*') 1624 mandoc_msg(MANDOCERR_STR_UNDEF, 1625 ln, (int)(stesc - buf->buf), 1626 "%.*s", (int)naml, stnam); 1627 res = ""; 1628 } else if (buf->sz + strlen(res) > SHRT_MAX) { 1629 mandoc_msg(MANDOCERR_ROFFLOOP, 1630 ln, (int)(stesc - buf->buf), NULL); 1631 return ROFF_IGN; 1632 } 1633 1634 /* Replace the escape sequence by the string. */ 1635 1636 *stesc = '\0'; 1637 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", 1638 buf->buf, res, cp) + 1; 1639 1640 /* Prepare for the next replacement. */ 1641 1642 start = nbuf + pos; 1643 stesc = nbuf + (stesc - buf->buf) + strlen(res); 1644 free(buf->buf); 1645 buf->buf = nbuf; 1646 } 1647 return ROFF_CONT; 1648 } 1649 1650 /* 1651 * Parse a quoted or unquoted roff-style request or macro argument. 1652 * Return a pointer to the parsed argument, which is either the original 1653 * pointer or advanced by one byte in case the argument is quoted. 1654 * NUL-terminate the argument in place. 1655 * Collapse pairs of quotes inside quoted arguments. 1656 * Advance the argument pointer to the next argument, 1657 * or to the NUL byte terminating the argument line. 1658 */ 1659 char * 1660 roff_getarg(struct roff *r, char **cpp, int ln, int *pos) 1661 { 1662 struct buf buf; 1663 char *cp, *start; 1664 int newesc, pairs, quoted, white; 1665 1666 /* Quoting can only start with a new word. */ 1667 start = *cpp; 1668 quoted = 0; 1669 if ('"' == *start) { 1670 quoted = 1; 1671 start++; 1672 } 1673 1674 newesc = pairs = white = 0; 1675 for (cp = start; '\0' != *cp; cp++) { 1676 1677 /* 1678 * Move the following text left 1679 * after quoted quotes and after "\\" and "\t". 1680 */ 1681 if (pairs) 1682 cp[-pairs] = cp[0]; 1683 1684 if ('\\' == cp[0]) { 1685 /* 1686 * In copy mode, translate double to single 1687 * backslashes and backslash-t to literal tabs. 1688 */ 1689 switch (cp[1]) { 1690 case 'a': 1691 case 't': 1692 cp[-pairs] = '\t'; 1693 pairs++; 1694 cp++; 1695 break; 1696 case '\\': 1697 newesc = 1; 1698 cp[-pairs] = ASCII_ESC; 1699 pairs++; 1700 cp++; 1701 break; 1702 case ' ': 1703 /* Skip escaped blanks. */ 1704 if (0 == quoted) 1705 cp++; 1706 break; 1707 default: 1708 break; 1709 } 1710 } else if (0 == quoted) { 1711 if (' ' == cp[0]) { 1712 /* Unescaped blanks end unquoted args. */ 1713 white = 1; 1714 break; 1715 } 1716 } else if ('"' == cp[0]) { 1717 if ('"' == cp[1]) { 1718 /* Quoted quotes collapse. */ 1719 pairs++; 1720 cp++; 1721 } else { 1722 /* Unquoted quotes end quoted args. */ 1723 quoted = 2; 1724 break; 1725 } 1726 } 1727 } 1728 1729 /* Quoted argument without a closing quote. */ 1730 if (1 == quoted) 1731 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL); 1732 1733 /* NUL-terminate this argument and move to the next one. */ 1734 if (pairs) 1735 cp[-pairs] = '\0'; 1736 if ('\0' != *cp) { 1737 *cp++ = '\0'; 1738 while (' ' == *cp) 1739 cp++; 1740 } 1741 *pos += (int)(cp - start) + (quoted ? 1 : 0); 1742 *cpp = cp; 1743 1744 if ('\0' == *cp && (white || ' ' == cp[-1])) 1745 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); 1746 1747 start = mandoc_strdup(start); 1748 if (newesc == 0) 1749 return start; 1750 1751 buf.buf = start; 1752 buf.sz = strlen(start) + 1; 1753 buf.next = NULL; 1754 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { 1755 free(buf.buf); 1756 buf.buf = mandoc_strdup(""); 1757 } 1758 return buf.buf; 1759 } 1760 1761 1762 /* 1763 * Process text streams. 1764 */ 1765 static int 1766 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) 1767 { 1768 size_t sz; 1769 const char *start; 1770 char *p; 1771 int isz; 1772 enum mandoc_esc esc; 1773 1774 /* Spring the input line trap. */ 1775 1776 if (roffit_lines == 1) { 1777 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1778 free(buf->buf); 1779 buf->buf = p; 1780 buf->sz = isz + 1; 1781 *offs = 0; 1782 free(roffit_macro); 1783 roffit_lines = 0; 1784 return ROFF_REPARSE; 1785 } else if (roffit_lines > 1) 1786 --roffit_lines; 1787 1788 if (roffce_node != NULL && buf->buf[pos] != '\0') { 1789 if (roffce_lines < 1) { 1790 r->man->last = roffce_node; 1791 r->man->next = ROFF_NEXT_SIBLING; 1792 roffce_lines = 0; 1793 roffce_node = NULL; 1794 } else 1795 roffce_lines--; 1796 } 1797 1798 /* Convert all breakable hyphens into ASCII_HYPH. */ 1799 1800 start = p = buf->buf + pos; 1801 1802 while (*p != '\0') { 1803 sz = strcspn(p, "-\\"); 1804 p += sz; 1805 1806 if (*p == '\0') 1807 break; 1808 1809 if (*p == '\\') { 1810 /* Skip over escapes. */ 1811 p++; 1812 esc = mandoc_escape((const char **)&p, NULL, NULL); 1813 if (esc == ESCAPE_ERROR) 1814 break; 1815 while (*p == '-') 1816 p++; 1817 continue; 1818 } else if (p == start) { 1819 p++; 1820 continue; 1821 } 1822 1823 if (isalpha((unsigned char)p[-1]) && 1824 isalpha((unsigned char)p[1])) 1825 *p = ASCII_HYPH; 1826 p++; 1827 } 1828 return ROFF_CONT; 1829 } 1830 1831 int 1832 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len) 1833 { 1834 enum roff_tok t; 1835 int e; 1836 int pos; /* parse point */ 1837 int spos; /* saved parse point for messages */ 1838 int ppos; /* original offset in buf->buf */ 1839 int ctl; /* macro line (boolean) */ 1840 1841 ppos = pos = *offs; 1842 1843 if (len > 80 && r->tbl == NULL && r->eqn == NULL && 1844 (r->man->flags & ROFF_NOFILL) == 0 && 1845 strchr(" .\\", buf->buf[pos]) == NULL && 1846 buf->buf[pos] != r->control && 1847 strcspn(buf->buf, " ") < 80) 1848 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1, 1849 "%.20s...", buf->buf + pos); 1850 1851 /* Handle in-line equation delimiters. */ 1852 1853 if (r->tbl == NULL && 1854 r->last_eqn != NULL && r->last_eqn->delim && 1855 (r->eqn == NULL || r->eqn_inline)) { 1856 e = roff_eqndelim(r, buf, pos); 1857 if (e == ROFF_REPARSE) 1858 return e; 1859 assert(e == ROFF_CONT); 1860 } 1861 1862 /* Handle comments and escape sequences. */ 1863 1864 e = roff_parse_comment(r, buf, ln, pos, r->escape); 1865 if ((e & ROFF_MASK) == ROFF_IGN) 1866 return e; 1867 assert(e == ROFF_CONT); 1868 1869 e = roff_expand(r, buf, ln, pos, r->escape); 1870 if ((e & ROFF_MASK) == ROFF_IGN) 1871 return e; 1872 assert(e == ROFF_CONT); 1873 1874 ctl = roff_getcontrol(r, buf->buf, &pos); 1875 1876 /* 1877 * First, if a scope is open and we're not a macro, pass the 1878 * text through the macro's filter. 1879 * Equations process all content themselves. 1880 * Tables process almost all content themselves, but we want 1881 * to warn about macros before passing it there. 1882 */ 1883 1884 if (r->last != NULL && ! ctl) { 1885 t = r->last->tok; 1886 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1887 if ((e & ROFF_MASK) == ROFF_IGN) 1888 return e; 1889 e &= ~ROFF_MASK; 1890 } else 1891 e = ROFF_IGN; 1892 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { 1893 eqn_read(r->eqn, buf->buf + ppos); 1894 return e; 1895 } 1896 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { 1897 tbl_read(r->tbl, ln, buf->buf, ppos); 1898 roff_addtbl(r->man, ln, r->tbl); 1899 return e; 1900 } 1901 if ( ! ctl) { 1902 r->options &= ~MPARSE_COMMENT; 1903 return roff_parsetext(r, buf, pos, offs) | e; 1904 } 1905 1906 /* Skip empty request lines. */ 1907 1908 if (buf->buf[pos] == '"') { 1909 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL); 1910 return ROFF_IGN; 1911 } else if (buf->buf[pos] == '\0') 1912 return ROFF_IGN; 1913 1914 /* 1915 * If a scope is open, go to the child handler for that macro, 1916 * as it may want to preprocess before doing anything with it. 1917 */ 1918 1919 if (r->last) { 1920 t = r->last->tok; 1921 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1922 } 1923 1924 r->options &= ~MPARSE_COMMENT; 1925 spos = pos; 1926 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1927 return roff_req_or_macro(r, t, buf, ln, spos, pos, offs); 1928 } 1929 1930 /* 1931 * Handle a new request or macro. 1932 * May be called outside any scope or from inside a conditional scope. 1933 */ 1934 static int 1935 roff_req_or_macro(ROFF_ARGS) { 1936 1937 /* For now, tables ignore most macros and some request. */ 1938 1939 if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS || 1940 tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj || 1941 tok == ROFF_sp)) { 1942 mandoc_msg(MANDOCERR_TBLMACRO, 1943 ln, ppos, "%s", buf->buf + ppos); 1944 if (tok != TOKEN_NONE) 1945 return ROFF_IGN; 1946 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1947 pos++; 1948 while (buf->buf[pos] == ' ') 1949 pos++; 1950 tbl_read(r->tbl, ln, buf->buf, pos); 1951 roff_addtbl(r->man, ln, r->tbl); 1952 return ROFF_IGN; 1953 } 1954 1955 /* For now, let high level macros abort .ce mode. */ 1956 1957 if (roffce_node != NULL && 1958 (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ || 1959 tok == ROFF_TH || tok == ROFF_TS)) { 1960 r->man->last = roffce_node; 1961 r->man->next = ROFF_NEXT_SIBLING; 1962 roffce_lines = 0; 1963 roffce_node = NULL; 1964 } 1965 1966 /* 1967 * This is neither a roff request nor a user-defined macro. 1968 * Let the standard macro set parsers handle it. 1969 */ 1970 1971 if (tok == TOKEN_NONE) 1972 return ROFF_CONT; 1973 1974 /* Execute a roff request or a user-defined macro. */ 1975 1976 return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs); 1977 } 1978 1979 /* 1980 * Internal interface function to tell the roff parser that execution 1981 * of the current macro ended. This is required because macro 1982 * definitions usually do not end with a .return request. 1983 */ 1984 void 1985 roff_userret(struct roff *r) 1986 { 1987 struct mctx *ctx; 1988 int i; 1989 1990 assert(r->mstackpos >= 0); 1991 ctx = r->mstack + r->mstackpos; 1992 for (i = 0; i < ctx->argc; i++) 1993 free(ctx->argv[i]); 1994 ctx->argc = 0; 1995 r->mstackpos--; 1996 } 1997 1998 void 1999 roff_endparse(struct roff *r) 2000 { 2001 if (r->last != NULL) 2002 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line, 2003 r->last->col, "%s", roff_name[r->last->tok]); 2004 2005 if (r->eqn != NULL) { 2006 mandoc_msg(MANDOCERR_BLK_NOEND, 2007 r->eqn->node->line, r->eqn->node->pos, "EQ"); 2008 eqn_parse(r->eqn); 2009 r->eqn = NULL; 2010 } 2011 2012 if (r->tbl != NULL) { 2013 tbl_end(r->tbl, 1); 2014 r->tbl = NULL; 2015 } 2016 } 2017 2018 /* 2019 * Parse the request or macro name at buf[*pos]. 2020 * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value. 2021 * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE. 2022 * As a side effect, set r->current_string to the definition or to NULL. 2023 */ 2024 static enum roff_tok 2025 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 2026 { 2027 char *cp; 2028 const char *mac; 2029 size_t maclen; 2030 int deftype; 2031 enum roff_tok t; 2032 2033 cp = buf + *pos; 2034 2035 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 2036 return TOKEN_NONE; 2037 2038 mac = cp; 2039 maclen = roff_getname(r, &cp, ln, ppos); 2040 2041 deftype = ROFFDEF_USER | ROFFDEF_REN; 2042 r->current_string = roff_getstrn(r, mac, maclen, &deftype); 2043 switch (deftype) { 2044 case ROFFDEF_USER: 2045 t = ROFF_USERDEF; 2046 break; 2047 case ROFFDEF_REN: 2048 t = ROFF_RENAMED; 2049 break; 2050 default: 2051 t = roffhash_find(r->reqtab, mac, maclen); 2052 break; 2053 } 2054 if (t != TOKEN_NONE) 2055 *pos = cp - buf; 2056 else if (deftype == ROFFDEF_UNDEF) { 2057 /* Using an undefined macro defines it to be empty. */ 2058 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); 2059 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); 2060 } 2061 return t; 2062 } 2063 2064 /* --- handling of request blocks ----------------------------------------- */ 2065 2066 /* 2067 * Close a macro definition block or an "ignore" block. 2068 */ 2069 static int 2070 roff_cblock(ROFF_ARGS) 2071 { 2072 int rr; 2073 2074 if (r->last == NULL) { 2075 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2076 return ROFF_IGN; 2077 } 2078 2079 switch (r->last->tok) { 2080 case ROFF_am: 2081 case ROFF_ami: 2082 case ROFF_de: 2083 case ROFF_dei: 2084 case ROFF_ig: 2085 break; 2086 case ROFF_am1: 2087 case ROFF_de1: 2088 /* Remapped in roff_block(). */ 2089 abort(); 2090 default: 2091 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2092 return ROFF_IGN; 2093 } 2094 2095 roffnode_pop(r); 2096 roffnode_cleanscope(r); 2097 2098 /* 2099 * If a conditional block with braces is still open, 2100 * check for "\}" block end markers. 2101 */ 2102 2103 if (r->last != NULL && r->last->endspan < 0) { 2104 rr = 1; /* If arguments follow "\}", warn about them. */ 2105 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2106 } 2107 2108 if (buf->buf[pos] != '\0') 2109 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 2110 ".. %s", buf->buf + pos); 2111 2112 return ROFF_IGN; 2113 } 2114 2115 /* 2116 * Pop all nodes ending at the end of the current input line. 2117 * Return the number of loops ended. 2118 */ 2119 static int 2120 roffnode_cleanscope(struct roff *r) 2121 { 2122 int inloop; 2123 2124 inloop = 0; 2125 while (r->last != NULL && r->last->endspan > 0) { 2126 if (--r->last->endspan != 0) 2127 break; 2128 inloop += roffnode_pop(r); 2129 } 2130 return inloop; 2131 } 2132 2133 /* 2134 * Handle the closing "\}" of a conditional block. 2135 * Apart from generating warnings, this only pops nodes. 2136 * Return the number of loops ended. 2137 */ 2138 static int 2139 roff_ccond(struct roff *r, int ln, int ppos) 2140 { 2141 if (NULL == r->last) { 2142 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2143 return 0; 2144 } 2145 2146 switch (r->last->tok) { 2147 case ROFF_el: 2148 case ROFF_ie: 2149 case ROFF_if: 2150 case ROFF_while: 2151 break; 2152 default: 2153 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2154 return 0; 2155 } 2156 2157 if (r->last->endspan > -1) { 2158 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2159 return 0; 2160 } 2161 2162 return roffnode_pop(r) + roffnode_cleanscope(r); 2163 } 2164 2165 static int 2166 roff_block(ROFF_ARGS) 2167 { 2168 const char *name, *value; 2169 char *call, *cp, *iname, *rname; 2170 size_t csz, namesz, rsz; 2171 int deftype; 2172 2173 /* Ignore groff compatibility mode for now. */ 2174 2175 if (tok == ROFF_de1) 2176 tok = ROFF_de; 2177 else if (tok == ROFF_dei1) 2178 tok = ROFF_dei; 2179 else if (tok == ROFF_am1) 2180 tok = ROFF_am; 2181 else if (tok == ROFF_ami1) 2182 tok = ROFF_ami; 2183 2184 /* Parse the macro name argument. */ 2185 2186 cp = buf->buf + pos; 2187 if (tok == ROFF_ig) { 2188 iname = NULL; 2189 namesz = 0; 2190 } else { 2191 iname = cp; 2192 namesz = roff_getname(r, &cp, ln, ppos); 2193 iname[namesz] = '\0'; 2194 } 2195 2196 /* Resolve the macro name argument if it is indirect. */ 2197 2198 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2199 deftype = ROFFDEF_USER; 2200 name = roff_getstrn(r, iname, namesz, &deftype); 2201 if (name == NULL) { 2202 mandoc_msg(MANDOCERR_STR_UNDEF, 2203 ln, (int)(iname - buf->buf), 2204 "%.*s", (int)namesz, iname); 2205 namesz = 0; 2206 } else 2207 namesz = strlen(name); 2208 } else 2209 name = iname; 2210 2211 if (namesz == 0 && tok != ROFF_ig) { 2212 mandoc_msg(MANDOCERR_REQ_EMPTY, 2213 ln, ppos, "%s", roff_name[tok]); 2214 return ROFF_IGN; 2215 } 2216 2217 roffnode_push(r, tok, name, ln, ppos); 2218 2219 /* 2220 * At the beginning of a `de' macro, clear the existing string 2221 * with the same name, if there is one. New content will be 2222 * appended from roff_block_text() in multiline mode. 2223 */ 2224 2225 if (tok == ROFF_de || tok == ROFF_dei) { 2226 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 2227 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2228 } else if (tok == ROFF_am || tok == ROFF_ami) { 2229 deftype = ROFFDEF_ANY; 2230 value = roff_getstrn(r, iname, namesz, &deftype); 2231 switch (deftype) { /* Before appending, ... */ 2232 case ROFFDEF_PRE: /* copy predefined to user-defined. */ 2233 roff_setstrn(&r->strtab, name, namesz, 2234 value, strlen(value), 0); 2235 break; 2236 case ROFFDEF_REN: /* call original standard macro. */ 2237 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2238 (int)strlen(value), value); 2239 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2240 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2241 free(call); 2242 break; 2243 case ROFFDEF_STD: /* rename and call standard macro. */ 2244 rsz = mandoc_asprintf(&rname, "__%s_renamed", name); 2245 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); 2246 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2247 (int)rsz, rname); 2248 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2249 free(call); 2250 free(rname); 2251 break; 2252 default: 2253 break; 2254 } 2255 } 2256 2257 if (*cp == '\0') 2258 return ROFF_IGN; 2259 2260 /* Get the custom end marker. */ 2261 2262 iname = cp; 2263 namesz = roff_getname(r, &cp, ln, ppos); 2264 2265 /* Resolve the end marker if it is indirect. */ 2266 2267 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2268 deftype = ROFFDEF_USER; 2269 name = roff_getstrn(r, iname, namesz, &deftype); 2270 if (name == NULL) { 2271 mandoc_msg(MANDOCERR_STR_UNDEF, 2272 ln, (int)(iname - buf->buf), 2273 "%.*s", (int)namesz, iname); 2274 namesz = 0; 2275 } else 2276 namesz = strlen(name); 2277 } else 2278 name = iname; 2279 2280 if (namesz) 2281 r->last->end = mandoc_strndup(name, namesz); 2282 2283 if (*cp != '\0') 2284 mandoc_msg(MANDOCERR_ARG_EXCESS, 2285 ln, pos, ".%s ... %s", roff_name[tok], cp); 2286 2287 return ROFF_IGN; 2288 } 2289 2290 static int 2291 roff_block_sub(ROFF_ARGS) 2292 { 2293 enum roff_tok t; 2294 int i, j; 2295 2296 /* 2297 * If a custom end marker is a user-defined or predefined macro 2298 * or a request, interpret it. 2299 */ 2300 2301 if (r->last->end) { 2302 for (i = pos, j = 0; r->last->end[j]; j++, i++) 2303 if (buf->buf[i] != r->last->end[j]) 2304 break; 2305 2306 if (r->last->end[j] == '\0' && 2307 (buf->buf[i] == '\0' || 2308 buf->buf[i] == ' ' || 2309 buf->buf[i] == '\t')) { 2310 roffnode_pop(r); 2311 roffnode_cleanscope(r); 2312 2313 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 2314 i++; 2315 2316 pos = i; 2317 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 2318 TOKEN_NONE) 2319 return ROFF_RERUN; 2320 return ROFF_IGN; 2321 } 2322 } 2323 2324 /* Handle the standard end marker. */ 2325 2326 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2327 if (t == ROFF_cblock) 2328 return roff_cblock(r, t, buf, ln, ppos, pos, offs); 2329 2330 /* Not an end marker, so append the line to the block. */ 2331 2332 if (tok != ROFF_ig) 2333 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 2334 return ROFF_IGN; 2335 } 2336 2337 static int 2338 roff_block_text(ROFF_ARGS) 2339 { 2340 2341 if (tok != ROFF_ig) 2342 roff_setstr(r, r->last->name, buf->buf + pos, 2); 2343 2344 return ROFF_IGN; 2345 } 2346 2347 /* 2348 * Check for a closing "\}" and handle it. 2349 * In this function, the final "int *offs" argument is used for 2350 * different purposes than elsewhere: 2351 * Input: *offs == 0: caller wants to discard arguments following \} 2352 * *offs == 1: caller wants to preserve text following \} 2353 * Output: *offs = 0: tell caller to discard input line 2354 * *offs = 1: tell caller to use input line 2355 */ 2356 static int 2357 roff_cond_checkend(ROFF_ARGS) 2358 { 2359 char *ep; 2360 int endloop, irc, rr; 2361 2362 irc = ROFF_IGN; 2363 rr = r->last->rule; 2364 endloop = tok != ROFF_while ? ROFF_IGN : 2365 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2366 if (roffnode_cleanscope(r)) 2367 irc |= endloop; 2368 2369 /* 2370 * If "\}" occurs on a macro line without a preceding macro or 2371 * a text line contains nothing else, drop the line completely. 2372 */ 2373 2374 ep = buf->buf + pos; 2375 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0)) 2376 rr = 0; 2377 2378 /* 2379 * The closing delimiter "\}" rewinds the conditional scope 2380 * but is otherwise ignored when interpreting the line. 2381 */ 2382 2383 while ((ep = strchr(ep, '\\')) != NULL) { 2384 switch (ep[1]) { 2385 case '}': 2386 if (ep[2] == '\0') 2387 ep[0] = '\0'; 2388 else if (rr) 2389 ep[1] = '&'; 2390 else 2391 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2392 if (roff_ccond(r, ln, ep - buf->buf)) 2393 irc |= endloop; 2394 break; 2395 case '\0': 2396 ++ep; 2397 break; 2398 default: 2399 ep += 2; 2400 break; 2401 } 2402 } 2403 *offs = rr; 2404 return irc; 2405 } 2406 2407 /* 2408 * Parse and process a request or macro line in conditional scope. 2409 */ 2410 static int 2411 roff_cond_sub(ROFF_ARGS) 2412 { 2413 struct roffnode *bl; 2414 int irc, rr, spos; 2415 enum roff_tok t; 2416 2417 rr = 0; /* If arguments follow "\}", skip them. */ 2418 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2419 spos = pos; 2420 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2421 2422 /* 2423 * Handle requests and macros if the conditional evaluated 2424 * to true or if they are structurally required. 2425 * The .break request is always handled specially. 2426 */ 2427 2428 if (t == ROFF_break) { 2429 if (irc & ROFF_LOOPMASK) 2430 irc = ROFF_IGN | ROFF_LOOPEXIT; 2431 else if (rr) { 2432 for (bl = r->last; bl != NULL; bl = bl->parent) { 2433 bl->rule = 0; 2434 if (bl->tok == ROFF_while) 2435 break; 2436 } 2437 } 2438 } else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) { 2439 irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs); 2440 if (irc & ROFF_WHILE) 2441 irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT); 2442 } 2443 return irc; 2444 } 2445 2446 /* 2447 * Parse and process a text line in conditional scope. 2448 */ 2449 static int 2450 roff_cond_text(ROFF_ARGS) 2451 { 2452 int irc, rr; 2453 2454 rr = 1; /* If arguments follow "\}", preserve them. */ 2455 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2456 if (rr) 2457 irc |= ROFF_CONT; 2458 return irc; 2459 } 2460 2461 /* --- handling of numeric and conditional expressions -------------------- */ 2462 2463 /* 2464 * Parse a single signed integer number. Stop at the first non-digit. 2465 * If there is at least one digit, return success and advance the 2466 * parse point, else return failure and let the parse point unchanged. 2467 * Ignore overflows, treat them just like the C language. 2468 */ 2469 static int 2470 roff_getnum(const char *v, int *pos, int *res, int flags) 2471 { 2472 int myres, scaled, n, p; 2473 2474 if (NULL == res) 2475 res = &myres; 2476 2477 p = *pos; 2478 n = v[p] == '-'; 2479 if (n || v[p] == '+') 2480 p++; 2481 2482 if (flags & ROFFNUM_WHITE) 2483 while (isspace((unsigned char)v[p])) 2484 p++; 2485 2486 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2487 *res = 10 * *res + v[p] - '0'; 2488 if (p == *pos + n) 2489 return 0; 2490 2491 if (n) 2492 *res = -*res; 2493 2494 /* Each number may be followed by one optional scaling unit. */ 2495 2496 switch (v[p]) { 2497 case 'f': 2498 scaled = *res * 65536; 2499 break; 2500 case 'i': 2501 scaled = *res * 240; 2502 break; 2503 case 'c': 2504 scaled = *res * 240 / 2.54; 2505 break; 2506 case 'v': 2507 case 'P': 2508 scaled = *res * 40; 2509 break; 2510 case 'm': 2511 case 'n': 2512 scaled = *res * 24; 2513 break; 2514 case 'p': 2515 scaled = *res * 10 / 3; 2516 break; 2517 case 'u': 2518 scaled = *res; 2519 break; 2520 case 'M': 2521 scaled = *res * 6 / 25; 2522 break; 2523 default: 2524 scaled = *res; 2525 p--; 2526 break; 2527 } 2528 if (flags & ROFFNUM_SCALE) 2529 *res = scaled; 2530 2531 *pos = p + 1; 2532 return 1; 2533 } 2534 2535 /* 2536 * Evaluate a string comparison condition. 2537 * The first character is the delimiter. 2538 * Succeed if the string up to its second occurrence 2539 * matches the string up to its third occurence. 2540 * Advance the cursor after the third occurrence 2541 * or lacking that, to the end of the line. 2542 */ 2543 static int 2544 roff_evalstrcond(const char *v, int *pos) 2545 { 2546 const char *s1, *s2, *s3; 2547 int match; 2548 2549 match = 0; 2550 s1 = v + *pos; /* initial delimiter */ 2551 s2 = s1 + 1; /* for scanning the first string */ 2552 s3 = strchr(s2, *s1); /* for scanning the second string */ 2553 2554 if (NULL == s3) /* found no middle delimiter */ 2555 goto out; 2556 2557 while ('\0' != *++s3) { 2558 if (*s2 != *s3) { /* mismatch */ 2559 s3 = strchr(s3, *s1); 2560 break; 2561 } 2562 if (*s3 == *s1) { /* found the final delimiter */ 2563 match = 1; 2564 break; 2565 } 2566 s2++; 2567 } 2568 2569 out: 2570 if (NULL == s3) 2571 s3 = strchr(s2, '\0'); 2572 else if (*s3 != '\0') 2573 s3++; 2574 *pos = s3 - v; 2575 return match; 2576 } 2577 2578 /* 2579 * Evaluate an optionally negated single character, numerical, 2580 * or string condition. 2581 */ 2582 static int 2583 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2584 { 2585 const char *start, *end; 2586 char *cp, *name; 2587 size_t sz; 2588 int deftype, len, number, savepos, istrue, wanttrue; 2589 2590 if ('!' == v[*pos]) { 2591 wanttrue = 0; 2592 (*pos)++; 2593 } else 2594 wanttrue = 1; 2595 2596 switch (v[*pos]) { 2597 case '\0': 2598 return 0; 2599 case 'n': 2600 case 'o': 2601 (*pos)++; 2602 return wanttrue; 2603 case 'e': 2604 case 't': 2605 case 'v': 2606 (*pos)++; 2607 return !wanttrue; 2608 case 'c': 2609 do { 2610 (*pos)++; 2611 } while (v[*pos] == ' '); 2612 2613 /* 2614 * Quirk for groff compatibility: 2615 * The horizontal tab is neither available nor unavailable. 2616 */ 2617 2618 if (v[*pos] == '\t') { 2619 (*pos)++; 2620 return 0; 2621 } 2622 2623 /* Printable ASCII characters are available. */ 2624 2625 if (v[*pos] != '\\') { 2626 (*pos)++; 2627 return wanttrue; 2628 } 2629 2630 end = v + ++*pos; 2631 switch (mandoc_escape(&end, &start, &len)) { 2632 case ESCAPE_SPECIAL: 2633 istrue = mchars_spec2cp(start, len) != -1; 2634 break; 2635 case ESCAPE_UNICODE: 2636 istrue = 1; 2637 break; 2638 case ESCAPE_NUMBERED: 2639 istrue = mchars_num2char(start, len) != -1; 2640 break; 2641 default: 2642 istrue = !wanttrue; 2643 break; 2644 } 2645 *pos = end - v; 2646 return istrue == wanttrue; 2647 case 'd': 2648 case 'r': 2649 cp = v + *pos + 1; 2650 while (*cp == ' ') 2651 cp++; 2652 name = cp; 2653 sz = roff_getname(r, &cp, ln, cp - v); 2654 if (sz == 0) 2655 istrue = 0; 2656 else if (v[*pos] == 'r') 2657 istrue = roff_hasregn(r, name, sz); 2658 else { 2659 deftype = ROFFDEF_ANY; 2660 roff_getstrn(r, name, sz, &deftype); 2661 istrue = !!deftype; 2662 } 2663 *pos = (name + sz) - v; 2664 return istrue == wanttrue; 2665 default: 2666 break; 2667 } 2668 2669 savepos = *pos; 2670 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2671 return (number > 0) == wanttrue; 2672 else if (*pos == savepos) 2673 return roff_evalstrcond(v, pos) == wanttrue; 2674 else 2675 return 0; 2676 } 2677 2678 static int 2679 roff_line_ignore(ROFF_ARGS) 2680 { 2681 2682 return ROFF_IGN; 2683 } 2684 2685 static int 2686 roff_insec(ROFF_ARGS) 2687 { 2688 2689 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]); 2690 return ROFF_IGN; 2691 } 2692 2693 static int 2694 roff_unsupp(ROFF_ARGS) 2695 { 2696 2697 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]); 2698 return ROFF_IGN; 2699 } 2700 2701 static int 2702 roff_cond(ROFF_ARGS) 2703 { 2704 int irc; 2705 2706 roffnode_push(r, tok, NULL, ln, ppos); 2707 2708 /* 2709 * An `.el' has no conditional body: it will consume the value 2710 * of the current rstack entry set in prior `ie' calls or 2711 * defaults to DENY. 2712 * 2713 * If we're not an `el', however, then evaluate the conditional. 2714 */ 2715 2716 r->last->rule = tok == ROFF_el ? 2717 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2718 roff_evalcond(r, ln, buf->buf, &pos); 2719 2720 /* 2721 * An if-else will put the NEGATION of the current evaluated 2722 * conditional into the stack of rules. 2723 */ 2724 2725 if (tok == ROFF_ie) { 2726 if (r->rstackpos + 1 == r->rstacksz) { 2727 r->rstacksz += 16; 2728 r->rstack = mandoc_reallocarray(r->rstack, 2729 r->rstacksz, sizeof(int)); 2730 } 2731 r->rstack[++r->rstackpos] = !r->last->rule; 2732 } 2733 2734 /* If the parent has false as its rule, then so do we. */ 2735 2736 if (r->last->parent && !r->last->parent->rule) 2737 r->last->rule = 0; 2738 2739 /* 2740 * Determine scope. 2741 * If there is nothing on the line after the conditional, 2742 * not even whitespace, use next-line scope. 2743 * Except that .while does not support next-line scope. 2744 */ 2745 2746 if (buf->buf[pos] == '\0' && tok != ROFF_while) { 2747 r->last->endspan = 2; 2748 goto out; 2749 } 2750 2751 while (buf->buf[pos] == ' ') 2752 pos++; 2753 2754 /* An opening brace requests multiline scope. */ 2755 2756 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2757 r->last->endspan = -1; 2758 pos += 2; 2759 while (buf->buf[pos] == ' ') 2760 pos++; 2761 goto out; 2762 } 2763 2764 /* 2765 * Anything else following the conditional causes 2766 * single-line scope. Warn if the scope contains 2767 * nothing but trailing whitespace. 2768 */ 2769 2770 if (buf->buf[pos] == '\0') 2771 mandoc_msg(MANDOCERR_COND_EMPTY, 2772 ln, ppos, "%s", roff_name[tok]); 2773 2774 r->last->endspan = 1; 2775 2776 out: 2777 *offs = pos; 2778 irc = ROFF_RERUN; 2779 if (tok == ROFF_while) 2780 irc |= ROFF_WHILE; 2781 return irc; 2782 } 2783 2784 static int 2785 roff_ds(ROFF_ARGS) 2786 { 2787 char *string; 2788 const char *name; 2789 size_t namesz; 2790 2791 /* Ignore groff compatibility mode for now. */ 2792 2793 if (tok == ROFF_ds1) 2794 tok = ROFF_ds; 2795 else if (tok == ROFF_as1) 2796 tok = ROFF_as; 2797 2798 /* 2799 * The first word is the name of the string. 2800 * If it is empty or terminated by an escape sequence, 2801 * abort the `ds' request without defining anything. 2802 */ 2803 2804 name = string = buf->buf + pos; 2805 if (*name == '\0') 2806 return ROFF_IGN; 2807 2808 namesz = roff_getname(r, &string, ln, pos); 2809 switch (name[namesz]) { 2810 case '\\': 2811 return ROFF_IGN; 2812 case '\t': 2813 string = buf->buf + pos + namesz; 2814 break; 2815 default: 2816 break; 2817 } 2818 2819 /* Read past the initial double-quote, if any. */ 2820 if (*string == '"') 2821 string++; 2822 2823 /* The rest is the value. */ 2824 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2825 ROFF_as == tok); 2826 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2827 return ROFF_IGN; 2828 } 2829 2830 /* 2831 * Parse a single operator, one or two characters long. 2832 * If the operator is recognized, return success and advance the 2833 * parse point, else return failure and let the parse point unchanged. 2834 */ 2835 static int 2836 roff_getop(const char *v, int *pos, char *res) 2837 { 2838 2839 *res = v[*pos]; 2840 2841 switch (*res) { 2842 case '+': 2843 case '-': 2844 case '*': 2845 case '/': 2846 case '%': 2847 case '&': 2848 case ':': 2849 break; 2850 case '<': 2851 switch (v[*pos + 1]) { 2852 case '=': 2853 *res = 'l'; 2854 (*pos)++; 2855 break; 2856 case '>': 2857 *res = '!'; 2858 (*pos)++; 2859 break; 2860 case '?': 2861 *res = 'i'; 2862 (*pos)++; 2863 break; 2864 default: 2865 break; 2866 } 2867 break; 2868 case '>': 2869 switch (v[*pos + 1]) { 2870 case '=': 2871 *res = 'g'; 2872 (*pos)++; 2873 break; 2874 case '?': 2875 *res = 'a'; 2876 (*pos)++; 2877 break; 2878 default: 2879 break; 2880 } 2881 break; 2882 case '=': 2883 if ('=' == v[*pos + 1]) 2884 (*pos)++; 2885 break; 2886 default: 2887 return 0; 2888 } 2889 (*pos)++; 2890 2891 return *res; 2892 } 2893 2894 /* 2895 * Evaluate either a parenthesized numeric expression 2896 * or a single signed integer number. 2897 */ 2898 static int 2899 roff_evalpar(struct roff *r, int ln, 2900 const char *v, int *pos, int *res, int flags) 2901 { 2902 2903 if ('(' != v[*pos]) 2904 return roff_getnum(v, pos, res, flags); 2905 2906 (*pos)++; 2907 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2908 return 0; 2909 2910 /* 2911 * Omission of the closing parenthesis 2912 * is an error in validation mode, 2913 * but ignored in evaluation mode. 2914 */ 2915 2916 if (')' == v[*pos]) 2917 (*pos)++; 2918 else if (NULL == res) 2919 return 0; 2920 2921 return 1; 2922 } 2923 2924 /* 2925 * Evaluate a complete numeric expression. 2926 * Proceed left to right, there is no concept of precedence. 2927 */ 2928 static int 2929 roff_evalnum(struct roff *r, int ln, const char *v, 2930 int *pos, int *res, int flags) 2931 { 2932 int mypos, operand2; 2933 char operator; 2934 2935 if (NULL == pos) { 2936 mypos = 0; 2937 pos = &mypos; 2938 } 2939 2940 if (flags & ROFFNUM_WHITE) 2941 while (isspace((unsigned char)v[*pos])) 2942 (*pos)++; 2943 2944 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2945 return 0; 2946 2947 while (1) { 2948 if (flags & ROFFNUM_WHITE) 2949 while (isspace((unsigned char)v[*pos])) 2950 (*pos)++; 2951 2952 if ( ! roff_getop(v, pos, &operator)) 2953 break; 2954 2955 if (flags & ROFFNUM_WHITE) 2956 while (isspace((unsigned char)v[*pos])) 2957 (*pos)++; 2958 2959 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2960 return 0; 2961 2962 if (flags & ROFFNUM_WHITE) 2963 while (isspace((unsigned char)v[*pos])) 2964 (*pos)++; 2965 2966 if (NULL == res) 2967 continue; 2968 2969 switch (operator) { 2970 case '+': 2971 *res += operand2; 2972 break; 2973 case '-': 2974 *res -= operand2; 2975 break; 2976 case '*': 2977 *res *= operand2; 2978 break; 2979 case '/': 2980 if (operand2 == 0) { 2981 mandoc_msg(MANDOCERR_DIVZERO, 2982 ln, *pos, "%s", v); 2983 *res = 0; 2984 break; 2985 } 2986 *res /= operand2; 2987 break; 2988 case '%': 2989 if (operand2 == 0) { 2990 mandoc_msg(MANDOCERR_DIVZERO, 2991 ln, *pos, "%s", v); 2992 *res = 0; 2993 break; 2994 } 2995 *res %= operand2; 2996 break; 2997 case '<': 2998 *res = *res < operand2; 2999 break; 3000 case '>': 3001 *res = *res > operand2; 3002 break; 3003 case 'l': 3004 *res = *res <= operand2; 3005 break; 3006 case 'g': 3007 *res = *res >= operand2; 3008 break; 3009 case '=': 3010 *res = *res == operand2; 3011 break; 3012 case '!': 3013 *res = *res != operand2; 3014 break; 3015 case '&': 3016 *res = *res && operand2; 3017 break; 3018 case ':': 3019 *res = *res || operand2; 3020 break; 3021 case 'i': 3022 if (operand2 < *res) 3023 *res = operand2; 3024 break; 3025 case 'a': 3026 if (operand2 > *res) 3027 *res = operand2; 3028 break; 3029 default: 3030 abort(); 3031 } 3032 } 3033 return 1; 3034 } 3035 3036 /* --- register management ------------------------------------------------ */ 3037 3038 void 3039 roff_setreg(struct roff *r, const char *name, int val, char sign) 3040 { 3041 roff_setregn(r, name, strlen(name), val, sign, INT_MIN); 3042 } 3043 3044 static void 3045 roff_setregn(struct roff *r, const char *name, size_t len, 3046 int val, char sign, int step) 3047 { 3048 struct roffreg *reg; 3049 3050 /* Search for an existing register with the same name. */ 3051 reg = r->regtab; 3052 3053 while (reg != NULL && (reg->key.sz != len || 3054 strncmp(reg->key.p, name, len) != 0)) 3055 reg = reg->next; 3056 3057 if (NULL == reg) { 3058 /* Create a new register. */ 3059 reg = mandoc_malloc(sizeof(struct roffreg)); 3060 reg->key.p = mandoc_strndup(name, len); 3061 reg->key.sz = len; 3062 reg->val = 0; 3063 reg->step = 0; 3064 reg->next = r->regtab; 3065 r->regtab = reg; 3066 } 3067 3068 if ('+' == sign) 3069 reg->val += val; 3070 else if ('-' == sign) 3071 reg->val -= val; 3072 else 3073 reg->val = val; 3074 if (step != INT_MIN) 3075 reg->step = step; 3076 } 3077 3078 /* 3079 * Handle some predefined read-only number registers. 3080 * For now, return -1 if the requested register is not predefined; 3081 * in case a predefined read-only register having the value -1 3082 * were to turn up, another special value would have to be chosen. 3083 */ 3084 static int 3085 roff_getregro(const struct roff *r, const char *name) 3086 { 3087 3088 switch (*name) { 3089 case '$': /* Number of arguments of the last macro evaluated. */ 3090 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; 3091 case 'A': /* ASCII approximation mode is always off. */ 3092 return 0; 3093 case 'g': /* Groff compatibility mode is always on. */ 3094 return 1; 3095 case 'H': /* Fixed horizontal resolution. */ 3096 return 24; 3097 case 'j': /* Always adjust left margin only. */ 3098 return 0; 3099 case 'T': /* Some output device is always defined. */ 3100 return 1; 3101 case 'V': /* Fixed vertical resolution. */ 3102 return 40; 3103 default: 3104 return -1; 3105 } 3106 } 3107 3108 int 3109 roff_getreg(struct roff *r, const char *name) 3110 { 3111 return roff_getregn(r, name, strlen(name), '\0'); 3112 } 3113 3114 static int 3115 roff_getregn(struct roff *r, const char *name, size_t len, char sign) 3116 { 3117 struct roffreg *reg; 3118 int val; 3119 3120 if ('.' == name[0] && 2 == len) { 3121 val = roff_getregro(r, name + 1); 3122 if (-1 != val) 3123 return val; 3124 } 3125 3126 for (reg = r->regtab; reg; reg = reg->next) { 3127 if (len == reg->key.sz && 3128 0 == strncmp(name, reg->key.p, len)) { 3129 switch (sign) { 3130 case '+': 3131 reg->val += reg->step; 3132 break; 3133 case '-': 3134 reg->val -= reg->step; 3135 break; 3136 default: 3137 break; 3138 } 3139 return reg->val; 3140 } 3141 } 3142 3143 roff_setregn(r, name, len, 0, '\0', INT_MIN); 3144 return 0; 3145 } 3146 3147 static int 3148 roff_hasregn(const struct roff *r, const char *name, size_t len) 3149 { 3150 struct roffreg *reg; 3151 int val; 3152 3153 if ('.' == name[0] && 2 == len) { 3154 val = roff_getregro(r, name + 1); 3155 if (-1 != val) 3156 return 1; 3157 } 3158 3159 for (reg = r->regtab; reg; reg = reg->next) 3160 if (len == reg->key.sz && 3161 0 == strncmp(name, reg->key.p, len)) 3162 return 1; 3163 3164 return 0; 3165 } 3166 3167 static void 3168 roff_freereg(struct roffreg *reg) 3169 { 3170 struct roffreg *old_reg; 3171 3172 while (NULL != reg) { 3173 free(reg->key.p); 3174 old_reg = reg; 3175 reg = reg->next; 3176 free(old_reg); 3177 } 3178 } 3179 3180 static int 3181 roff_nr(ROFF_ARGS) 3182 { 3183 char *key, *val, *step; 3184 size_t keysz; 3185 int iv, is, len; 3186 char sign; 3187 3188 key = val = buf->buf + pos; 3189 if (*key == '\0') 3190 return ROFF_IGN; 3191 3192 keysz = roff_getname(r, &val, ln, pos); 3193 if (key[keysz] == '\\' || key[keysz] == '\t') 3194 return ROFF_IGN; 3195 3196 sign = *val; 3197 if (sign == '+' || sign == '-') 3198 val++; 3199 3200 len = 0; 3201 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) 3202 return ROFF_IGN; 3203 3204 step = val + len; 3205 while (isspace((unsigned char)*step)) 3206 step++; 3207 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) 3208 is = INT_MIN; 3209 3210 roff_setregn(r, key, keysz, iv, sign, is); 3211 return ROFF_IGN; 3212 } 3213 3214 static int 3215 roff_rr(ROFF_ARGS) 3216 { 3217 struct roffreg *reg, **prev; 3218 char *name, *cp; 3219 size_t namesz; 3220 3221 name = cp = buf->buf + pos; 3222 if (*name == '\0') 3223 return ROFF_IGN; 3224 namesz = roff_getname(r, &cp, ln, pos); 3225 name[namesz] = '\0'; 3226 3227 prev = &r->regtab; 3228 while (1) { 3229 reg = *prev; 3230 if (reg == NULL || !strcmp(name, reg->key.p)) 3231 break; 3232 prev = ®->next; 3233 } 3234 if (reg != NULL) { 3235 *prev = reg->next; 3236 free(reg->key.p); 3237 free(reg); 3238 } 3239 return ROFF_IGN; 3240 } 3241 3242 /* --- handler functions for roff requests -------------------------------- */ 3243 3244 static int 3245 roff_rm(ROFF_ARGS) 3246 { 3247 const char *name; 3248 char *cp; 3249 size_t namesz; 3250 3251 cp = buf->buf + pos; 3252 while (*cp != '\0') { 3253 name = cp; 3254 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 3255 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 3256 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3257 if (name[namesz] == '\\' || name[namesz] == '\t') 3258 break; 3259 } 3260 return ROFF_IGN; 3261 } 3262 3263 static int 3264 roff_it(ROFF_ARGS) 3265 { 3266 int iv; 3267 3268 /* Parse the number of lines. */ 3269 3270 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 3271 mandoc_msg(MANDOCERR_IT_NONUM, 3272 ln, ppos, "%s", buf->buf + 1); 3273 return ROFF_IGN; 3274 } 3275 3276 while (isspace((unsigned char)buf->buf[pos])) 3277 pos++; 3278 3279 /* 3280 * Arm the input line trap. 3281 * Special-casing "an-trap" is an ugly workaround to cope 3282 * with DocBook stupidly fiddling with man(7) internals. 3283 */ 3284 3285 roffit_lines = iv; 3286 roffit_macro = mandoc_strdup(iv != 1 || 3287 strcmp(buf->buf + pos, "an-trap") ? 3288 buf->buf + pos : "br"); 3289 return ROFF_IGN; 3290 } 3291 3292 static int 3293 roff_Dd(ROFF_ARGS) 3294 { 3295 int mask; 3296 enum roff_tok t, te; 3297 3298 switch (tok) { 3299 case ROFF_Dd: 3300 tok = MDOC_Dd; 3301 te = MDOC_MAX; 3302 if (r->format == 0) 3303 r->format = MPARSE_MDOC; 3304 mask = MPARSE_MDOC | MPARSE_QUICK; 3305 break; 3306 case ROFF_TH: 3307 tok = MAN_TH; 3308 te = MAN_MAX; 3309 if (r->format == 0) 3310 r->format = MPARSE_MAN; 3311 mask = MPARSE_QUICK; 3312 break; 3313 default: 3314 abort(); 3315 } 3316 if ((r->options & mask) == 0) 3317 for (t = tok; t < te; t++) 3318 roff_setstr(r, roff_name[t], NULL, 0); 3319 return ROFF_CONT; 3320 } 3321 3322 static int 3323 roff_TE(ROFF_ARGS) 3324 { 3325 r->man->flags &= ~ROFF_NONOFILL; 3326 if (r->tbl == NULL) { 3327 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE"); 3328 return ROFF_IGN; 3329 } 3330 if (tbl_end(r->tbl, 0) == 0) { 3331 r->tbl = NULL; 3332 free(buf->buf); 3333 buf->buf = mandoc_strdup(".sp"); 3334 buf->sz = 4; 3335 *offs = 0; 3336 return ROFF_REPARSE; 3337 } 3338 r->tbl = NULL; 3339 return ROFF_IGN; 3340 } 3341 3342 static int 3343 roff_T_(ROFF_ARGS) 3344 { 3345 3346 if (NULL == r->tbl) 3347 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&"); 3348 else 3349 tbl_restart(ln, ppos, r->tbl); 3350 3351 return ROFF_IGN; 3352 } 3353 3354 /* 3355 * Handle in-line equation delimiters. 3356 */ 3357 static int 3358 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 3359 { 3360 char *cp1, *cp2; 3361 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 3362 3363 /* 3364 * Outside equations, look for an opening delimiter. 3365 * If we are inside an equation, we already know it is 3366 * in-line, or this function wouldn't have been called; 3367 * so look for a closing delimiter. 3368 */ 3369 3370 cp1 = buf->buf + pos; 3371 cp2 = strchr(cp1, r->eqn == NULL ? 3372 r->last_eqn->odelim : r->last_eqn->cdelim); 3373 if (cp2 == NULL) 3374 return ROFF_CONT; 3375 3376 *cp2++ = '\0'; 3377 bef_pr = bef_nl = aft_nl = aft_pr = ""; 3378 3379 /* Handle preceding text, protecting whitespace. */ 3380 3381 if (*buf->buf != '\0') { 3382 if (r->eqn == NULL) 3383 bef_pr = "\\&"; 3384 bef_nl = "\n"; 3385 } 3386 3387 /* 3388 * Prepare replacing the delimiter with an equation macro 3389 * and drop leading white space from the equation. 3390 */ 3391 3392 if (r->eqn == NULL) { 3393 while (*cp2 == ' ') 3394 cp2++; 3395 mac = ".EQ"; 3396 } else 3397 mac = ".EN"; 3398 3399 /* Handle following text, protecting whitespace. */ 3400 3401 if (*cp2 != '\0') { 3402 aft_nl = "\n"; 3403 if (r->eqn != NULL) 3404 aft_pr = "\\&"; 3405 } 3406 3407 /* Do the actual replacement. */ 3408 3409 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 3410 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 3411 free(buf->buf); 3412 buf->buf = cp1; 3413 3414 /* Toggle the in-line state of the eqn subsystem. */ 3415 3416 r->eqn_inline = r->eqn == NULL; 3417 return ROFF_REPARSE; 3418 } 3419 3420 static int 3421 roff_EQ(ROFF_ARGS) 3422 { 3423 struct roff_node *n; 3424 3425 if (r->man->meta.macroset == MACROSET_MAN) 3426 man_breakscope(r->man, ROFF_EQ); 3427 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); 3428 if (ln > r->man->last->line) 3429 n->flags |= NODE_LINE; 3430 n->eqn = eqn_box_new(); 3431 roff_node_append(r->man, n); 3432 r->man->next = ROFF_NEXT_SIBLING; 3433 3434 assert(r->eqn == NULL); 3435 if (r->last_eqn == NULL) 3436 r->last_eqn = eqn_alloc(); 3437 else 3438 eqn_reset(r->last_eqn); 3439 r->eqn = r->last_eqn; 3440 r->eqn->node = n; 3441 3442 if (buf->buf[pos] != '\0') 3443 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3444 ".EQ %s", buf->buf + pos); 3445 3446 return ROFF_IGN; 3447 } 3448 3449 static int 3450 roff_EN(ROFF_ARGS) 3451 { 3452 if (r->eqn != NULL) { 3453 eqn_parse(r->eqn); 3454 r->eqn = NULL; 3455 } else 3456 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN"); 3457 if (buf->buf[pos] != '\0') 3458 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3459 "EN %s", buf->buf + pos); 3460 return ROFF_IGN; 3461 } 3462 3463 static int 3464 roff_TS(ROFF_ARGS) 3465 { 3466 if (r->tbl != NULL) { 3467 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); 3468 tbl_end(r->tbl, 0); 3469 } 3470 r->man->flags |= ROFF_NONOFILL; 3471 r->tbl = tbl_alloc(ppos, ln, r->last_tbl); 3472 if (r->last_tbl == NULL) 3473 r->first_tbl = r->tbl; 3474 r->last_tbl = r->tbl; 3475 return ROFF_IGN; 3476 } 3477 3478 static int 3479 roff_noarg(ROFF_ARGS) 3480 { 3481 if (r->man->flags & (MAN_BLINE | MAN_ELINE)) 3482 man_breakscope(r->man, tok); 3483 if (tok == ROFF_brp) 3484 tok = ROFF_br; 3485 roff_elem_alloc(r->man, ln, ppos, tok); 3486 if (buf->buf[pos] != '\0') 3487 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3488 "%s %s", roff_name[tok], buf->buf + pos); 3489 if (tok == ROFF_nf) 3490 r->man->flags |= ROFF_NOFILL; 3491 else if (tok == ROFF_fi) 3492 r->man->flags &= ~ROFF_NOFILL; 3493 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3494 r->man->next = ROFF_NEXT_SIBLING; 3495 return ROFF_IGN; 3496 } 3497 3498 static int 3499 roff_onearg(ROFF_ARGS) 3500 { 3501 struct roff_node *n; 3502 char *cp; 3503 int npos; 3504 3505 if (r->man->flags & (MAN_BLINE | MAN_ELINE) && 3506 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || 3507 tok == ROFF_ti)) 3508 man_breakscope(r->man, tok); 3509 3510 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { 3511 r->man->last = roffce_node; 3512 r->man->next = ROFF_NEXT_SIBLING; 3513 } 3514 3515 roff_elem_alloc(r->man, ln, ppos, tok); 3516 n = r->man->last; 3517 3518 cp = buf->buf + pos; 3519 if (*cp != '\0') { 3520 while (*cp != '\0' && *cp != ' ') 3521 cp++; 3522 while (*cp == ' ') 3523 *cp++ = '\0'; 3524 if (*cp != '\0') 3525 mandoc_msg(MANDOCERR_ARG_EXCESS, 3526 ln, (int)(cp - buf->buf), 3527 "%s ... %s", roff_name[tok], cp); 3528 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3529 } 3530 3531 if (tok == ROFF_ce || tok == ROFF_rj) { 3532 if (r->man->last->type == ROFFT_ELEM) { 3533 roff_word_alloc(r->man, ln, pos, "1"); 3534 r->man->last->flags |= NODE_NOSRC; 3535 } 3536 npos = 0; 3537 if (roff_evalnum(r, ln, r->man->last->string, &npos, 3538 &roffce_lines, 0) == 0) { 3539 mandoc_msg(MANDOCERR_CE_NONUM, 3540 ln, pos, "ce %s", buf->buf + pos); 3541 roffce_lines = 1; 3542 } 3543 if (roffce_lines < 1) { 3544 r->man->last = r->man->last->parent; 3545 roffce_node = NULL; 3546 roffce_lines = 0; 3547 } else 3548 roffce_node = r->man->last->parent; 3549 } else { 3550 n->flags |= NODE_VALID | NODE_ENDED; 3551 r->man->last = n; 3552 } 3553 n->flags |= NODE_LINE; 3554 r->man->next = ROFF_NEXT_SIBLING; 3555 return ROFF_IGN; 3556 } 3557 3558 static int 3559 roff_manyarg(ROFF_ARGS) 3560 { 3561 struct roff_node *n; 3562 char *sp, *ep; 3563 3564 roff_elem_alloc(r->man, ln, ppos, tok); 3565 n = r->man->last; 3566 3567 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { 3568 while (*ep != '\0' && *ep != ' ') 3569 ep++; 3570 while (*ep == ' ') 3571 *ep++ = '\0'; 3572 roff_word_alloc(r->man, ln, sp - buf->buf, sp); 3573 } 3574 3575 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3576 r->man->last = n; 3577 r->man->next = ROFF_NEXT_SIBLING; 3578 return ROFF_IGN; 3579 } 3580 3581 static int 3582 roff_als(ROFF_ARGS) 3583 { 3584 char *oldn, *newn, *end, *value; 3585 size_t oldsz, newsz, valsz; 3586 3587 newn = oldn = buf->buf + pos; 3588 if (*newn == '\0') 3589 return ROFF_IGN; 3590 3591 newsz = roff_getname(r, &oldn, ln, pos); 3592 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0') 3593 return ROFF_IGN; 3594 3595 end = oldn; 3596 oldsz = roff_getname(r, &end, ln, oldn - buf->buf); 3597 if (oldsz == 0) 3598 return ROFF_IGN; 3599 3600 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", 3601 (int)oldsz, oldn); 3602 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); 3603 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3604 free(value); 3605 return ROFF_IGN; 3606 } 3607 3608 /* 3609 * The .break request only makes sense inside conditionals, 3610 * and that case is already handled in roff_cond_sub(). 3611 */ 3612 static int 3613 roff_break(ROFF_ARGS) 3614 { 3615 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break"); 3616 return ROFF_IGN; 3617 } 3618 3619 static int 3620 roff_cc(ROFF_ARGS) 3621 { 3622 const char *p; 3623 3624 p = buf->buf + pos; 3625 3626 if (*p == '\0' || (r->control = *p++) == '.') 3627 r->control = '\0'; 3628 3629 if (*p != '\0') 3630 mandoc_msg(MANDOCERR_ARG_EXCESS, 3631 ln, p - buf->buf, "cc ... %s", p); 3632 3633 return ROFF_IGN; 3634 } 3635 3636 static int 3637 roff_char(ROFF_ARGS) 3638 { 3639 const char *p, *kp, *vp; 3640 size_t ksz, vsz; 3641 int font; 3642 3643 /* Parse the character to be replaced. */ 3644 3645 kp = buf->buf + pos; 3646 p = kp + 1; 3647 if (*kp == '\0' || (*kp == '\\' && 3648 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) || 3649 (*p != ' ' && *p != '\0')) { 3650 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp); 3651 return ROFF_IGN; 3652 } 3653 ksz = p - kp; 3654 while (*p == ' ') 3655 p++; 3656 3657 /* 3658 * If the replacement string contains a font escape sequence, 3659 * we have to restore the font at the end. 3660 */ 3661 3662 vp = p; 3663 vsz = strlen(p); 3664 font = 0; 3665 while (*p != '\0') { 3666 if (*p++ != '\\') 3667 continue; 3668 switch (mandoc_escape(&p, NULL, NULL)) { 3669 case ESCAPE_FONT: 3670 case ESCAPE_FONTROMAN: 3671 case ESCAPE_FONTITALIC: 3672 case ESCAPE_FONTBOLD: 3673 case ESCAPE_FONTBI: 3674 case ESCAPE_FONTCR: 3675 case ESCAPE_FONTCB: 3676 case ESCAPE_FONTCI: 3677 case ESCAPE_FONTPREV: 3678 font++; 3679 break; 3680 default: 3681 break; 3682 } 3683 } 3684 if (font > 1) 3685 mandoc_msg(MANDOCERR_CHAR_FONT, 3686 ln, (int)(vp - buf->buf), "%s", vp); 3687 3688 /* 3689 * Approximate the effect of .char using the .tr tables. 3690 * XXX In groff, .char and .tr interact differently. 3691 */ 3692 3693 if (ksz == 1) { 3694 if (r->xtab == NULL) 3695 r->xtab = mandoc_calloc(128, sizeof(*r->xtab)); 3696 assert((unsigned int)*kp < 128); 3697 free(r->xtab[(int)*kp].p); 3698 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p, 3699 "%s%s", vp, font ? "\fP" : ""); 3700 } else { 3701 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0); 3702 if (font) 3703 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1); 3704 } 3705 return ROFF_IGN; 3706 } 3707 3708 static int 3709 roff_ec(ROFF_ARGS) 3710 { 3711 const char *p; 3712 3713 p = buf->buf + pos; 3714 if (*p == '\0') 3715 r->escape = '\\'; 3716 else { 3717 r->escape = *p; 3718 if (*++p != '\0') 3719 mandoc_msg(MANDOCERR_ARG_EXCESS, ln, 3720 (int)(p - buf->buf), "ec ... %s", p); 3721 } 3722 return ROFF_IGN; 3723 } 3724 3725 static int 3726 roff_eo(ROFF_ARGS) 3727 { 3728 r->escape = '\0'; 3729 if (buf->buf[pos] != '\0') 3730 mandoc_msg(MANDOCERR_ARG_SKIP, 3731 ln, pos, "eo %s", buf->buf + pos); 3732 return ROFF_IGN; 3733 } 3734 3735 static int 3736 roff_mc(ROFF_ARGS) 3737 { 3738 struct roff_node *n; 3739 char *cp; 3740 3741 /* Parse the first argument. */ 3742 3743 cp = buf->buf + pos; 3744 if (*cp != '\0') 3745 cp++; 3746 if (buf->buf[pos] == '\\') { 3747 switch (mandoc_escape((const char **)&cp, NULL, NULL)) { 3748 case ESCAPE_SPECIAL: 3749 case ESCAPE_UNICODE: 3750 case ESCAPE_NUMBERED: 3751 break; 3752 default: 3753 *cp = '\0'; 3754 mandoc_msg(MANDOCERR_MC_ESC, ln, pos, 3755 "mc %s", buf->buf + pos); 3756 buf->buf[pos] = '\0'; 3757 break; 3758 } 3759 } 3760 3761 /* Ignore additional arguments. */ 3762 3763 while (*cp == ' ') 3764 *cp++ = '\0'; 3765 if (*cp != '\0') { 3766 mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf), 3767 "mc ... %s", cp); 3768 *cp = '\0'; 3769 } 3770 3771 /* Create the .mc node. */ 3772 3773 roff_elem_alloc(r->man, ln, ppos, tok); 3774 n = r->man->last; 3775 if (buf->buf[pos] != '\0') 3776 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3777 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3778 r->man->last = n; 3779 r->man->next = ROFF_NEXT_SIBLING; 3780 return ROFF_IGN; 3781 } 3782 3783 static int 3784 roff_nop(ROFF_ARGS) 3785 { 3786 while (buf->buf[pos] == ' ') 3787 pos++; 3788 *offs = pos; 3789 return ROFF_RERUN; 3790 } 3791 3792 static int 3793 roff_tr(ROFF_ARGS) 3794 { 3795 const char *p, *first, *second; 3796 size_t fsz, ssz; 3797 enum mandoc_esc esc; 3798 3799 p = buf->buf + pos; 3800 3801 if (*p == '\0') { 3802 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr"); 3803 return ROFF_IGN; 3804 } 3805 3806 while (*p != '\0') { 3807 fsz = ssz = 1; 3808 3809 first = p++; 3810 if (*first == '\\') { 3811 esc = mandoc_escape(&p, NULL, NULL); 3812 if (esc == ESCAPE_ERROR) { 3813 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3814 (int)(p - buf->buf), "%s", first); 3815 return ROFF_IGN; 3816 } 3817 fsz = (size_t)(p - first); 3818 } 3819 3820 second = p++; 3821 if (*second == '\\') { 3822 esc = mandoc_escape(&p, NULL, NULL); 3823 if (esc == ESCAPE_ERROR) { 3824 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3825 (int)(p - buf->buf), "%s", second); 3826 return ROFF_IGN; 3827 } 3828 ssz = (size_t)(p - second); 3829 } else if (*second == '\0') { 3830 mandoc_msg(MANDOCERR_TR_ODD, ln, 3831 (int)(first - buf->buf), "tr %s", first); 3832 second = " "; 3833 p--; 3834 } 3835 3836 if (fsz > 1) { 3837 roff_setstrn(&r->xmbtab, first, fsz, 3838 second, ssz, 0); 3839 continue; 3840 } 3841 3842 if (r->xtab == NULL) 3843 r->xtab = mandoc_calloc(128, 3844 sizeof(struct roffstr)); 3845 3846 free(r->xtab[(int)*first].p); 3847 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 3848 r->xtab[(int)*first].sz = ssz; 3849 } 3850 3851 return ROFF_IGN; 3852 } 3853 3854 /* 3855 * Implementation of the .return request. 3856 * There is no need to call roff_userret() from here. 3857 * The read module will call that after rewinding the reader stack 3858 * to the place from where the current macro was called. 3859 */ 3860 static int 3861 roff_return(ROFF_ARGS) 3862 { 3863 if (r->mstackpos >= 0) 3864 return ROFF_IGN | ROFF_USERRET; 3865 3866 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return"); 3867 return ROFF_IGN; 3868 } 3869 3870 static int 3871 roff_rn(ROFF_ARGS) 3872 { 3873 const char *value; 3874 char *oldn, *newn, *end; 3875 size_t oldsz, newsz; 3876 int deftype; 3877 3878 oldn = newn = buf->buf + pos; 3879 if (*oldn == '\0') 3880 return ROFF_IGN; 3881 3882 oldsz = roff_getname(r, &newn, ln, pos); 3883 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0') 3884 return ROFF_IGN; 3885 3886 end = newn; 3887 newsz = roff_getname(r, &end, ln, newn - buf->buf); 3888 if (newsz == 0) 3889 return ROFF_IGN; 3890 3891 deftype = ROFFDEF_ANY; 3892 value = roff_getstrn(r, oldn, oldsz, &deftype); 3893 switch (deftype) { 3894 case ROFFDEF_USER: 3895 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3896 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); 3897 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3898 break; 3899 case ROFFDEF_PRE: 3900 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3901 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3902 break; 3903 case ROFFDEF_REN: 3904 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); 3905 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); 3906 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3907 break; 3908 case ROFFDEF_STD: 3909 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); 3910 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3911 break; 3912 default: 3913 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3914 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3915 break; 3916 } 3917 return ROFF_IGN; 3918 } 3919 3920 static int 3921 roff_shift(ROFF_ARGS) 3922 { 3923 struct mctx *ctx; 3924 int argpos, levels, i; 3925 3926 argpos = pos; 3927 levels = 1; 3928 if (buf->buf[pos] != '\0' && 3929 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { 3930 mandoc_msg(MANDOCERR_CE_NONUM, 3931 ln, pos, "shift %s", buf->buf + pos); 3932 levels = 1; 3933 } 3934 if (r->mstackpos < 0) { 3935 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift"); 3936 return ROFF_IGN; 3937 } 3938 ctx = r->mstack + r->mstackpos; 3939 if (levels > ctx->argc) { 3940 mandoc_msg(MANDOCERR_SHIFT, 3941 ln, argpos, "%d, but max is %d", levels, ctx->argc); 3942 levels = ctx->argc; 3943 } 3944 if (levels < 0) { 3945 mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels); 3946 levels = 0; 3947 } 3948 if (levels == 0) 3949 return ROFF_IGN; 3950 for (i = 0; i < levels; i++) 3951 free(ctx->argv[i]); 3952 ctx->argc -= levels; 3953 for (i = 0; i < ctx->argc; i++) 3954 ctx->argv[i] = ctx->argv[i + levels]; 3955 return ROFF_IGN; 3956 } 3957 3958 static int 3959 roff_so(ROFF_ARGS) 3960 { 3961 char *name, *cp; 3962 3963 name = buf->buf + pos; 3964 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name); 3965 3966 /* 3967 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3968 * opening anything that's not in our cwd or anything beneath 3969 * it. Thus, explicitly disallow traversing up the file-system 3970 * or using absolute paths. 3971 */ 3972 3973 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3974 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name); 3975 buf->sz = mandoc_asprintf(&cp, 3976 ".sp\nSee the file %s.\n.sp", name) + 1; 3977 free(buf->buf); 3978 buf->buf = cp; 3979 *offs = 0; 3980 return ROFF_REPARSE; 3981 } 3982 3983 *offs = pos; 3984 return ROFF_SO; 3985 } 3986 3987 /* --- user defined strings and macros ------------------------------------ */ 3988 3989 static int 3990 roff_userdef(ROFF_ARGS) 3991 { 3992 struct mctx *ctx; 3993 char *arg, *ap, *dst, *src; 3994 size_t sz; 3995 3996 /* If the macro is empty, ignore it altogether. */ 3997 3998 if (*r->current_string == '\0') 3999 return ROFF_IGN; 4000 4001 /* Initialize a new macro stack context. */ 4002 4003 if (++r->mstackpos == r->mstacksz) { 4004 r->mstack = mandoc_recallocarray(r->mstack, 4005 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); 4006 r->mstacksz += 8; 4007 } 4008 ctx = r->mstack + r->mstackpos; 4009 ctx->argc = 0; 4010 4011 /* 4012 * Collect pointers to macro argument strings, 4013 * NUL-terminating them and escaping quotes. 4014 */ 4015 4016 src = buf->buf + pos; 4017 while (*src != '\0') { 4018 if (ctx->argc == ctx->argsz) { 4019 ctx->argsz += 8; 4020 ctx->argv = mandoc_reallocarray(ctx->argv, 4021 ctx->argsz, sizeof(*ctx->argv)); 4022 } 4023 arg = roff_getarg(r, &src, ln, &pos); 4024 sz = 1; /* For the terminating NUL. */ 4025 for (ap = arg; *ap != '\0'; ap++) 4026 sz += *ap == '"' ? 4 : 1; 4027 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); 4028 for (ap = arg; *ap != '\0'; ap++) { 4029 if (*ap == '"') { 4030 memcpy(dst, "\\(dq", 4); 4031 dst += 4; 4032 } else 4033 *dst++ = *ap; 4034 } 4035 *dst = '\0'; 4036 free(arg); 4037 } 4038 4039 /* Replace the macro invocation by the macro definition. */ 4040 4041 free(buf->buf); 4042 buf->buf = mandoc_strdup(r->current_string); 4043 buf->sz = strlen(buf->buf) + 1; 4044 *offs = 0; 4045 4046 return buf->buf[buf->sz - 2] == '\n' ? 4047 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND; 4048 } 4049 4050 /* 4051 * Calling a high-level macro that was renamed with .rn. 4052 * r->current_string has already been set up by roff_parse(). 4053 */ 4054 static int 4055 roff_renamed(ROFF_ARGS) 4056 { 4057 char *nbuf; 4058 4059 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, 4060 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; 4061 free(buf->buf); 4062 buf->buf = nbuf; 4063 *offs = 0; 4064 return ROFF_CONT; 4065 } 4066 4067 /* 4068 * Measure the length in bytes of the roff identifier at *cpp 4069 * and advance the pointer to the next word. 4070 */ 4071 static size_t 4072 roff_getname(struct roff *r, char **cpp, int ln, int pos) 4073 { 4074 char *name, *cp; 4075 size_t namesz; 4076 4077 name = *cpp; 4078 if (*name == '\0') 4079 return 0; 4080 4081 /* Advance cp to the byte after the end of the name. */ 4082 4083 for (cp = name; 1; cp++) { 4084 namesz = cp - name; 4085 if (*cp == '\0') 4086 break; 4087 if (*cp == ' ' || *cp == '\t') { 4088 cp++; 4089 break; 4090 } 4091 if (*cp != '\\') 4092 continue; 4093 if (cp[1] == '{' || cp[1] == '}') 4094 break; 4095 if (*++cp == '\\') 4096 continue; 4097 mandoc_msg(MANDOCERR_NAMESC, ln, pos, 4098 "%.*s", (int)(cp - name + 1), name); 4099 mandoc_escape((const char **)&cp, NULL, NULL); 4100 break; 4101 } 4102 4103 /* Read past spaces. */ 4104 4105 while (*cp == ' ') 4106 cp++; 4107 4108 *cpp = cp; 4109 return namesz; 4110 } 4111 4112 /* 4113 * Store *string into the user-defined string called *name. 4114 * To clear an existing entry, call with (*r, *name, NULL, 0). 4115 * append == 0: replace mode 4116 * append == 1: single-line append mode 4117 * append == 2: multiline append mode, append '\n' after each call 4118 */ 4119 static void 4120 roff_setstr(struct roff *r, const char *name, const char *string, 4121 int append) 4122 { 4123 size_t namesz; 4124 4125 namesz = strlen(name); 4126 roff_setstrn(&r->strtab, name, namesz, string, 4127 string ? strlen(string) : 0, append); 4128 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 4129 } 4130 4131 static void 4132 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 4133 const char *string, size_t stringsz, int append) 4134 { 4135 struct roffkv *n; 4136 char *c; 4137 int i; 4138 size_t oldch, newch; 4139 4140 /* Search for an existing string with the same name. */ 4141 n = *r; 4142 4143 while (n && (namesz != n->key.sz || 4144 strncmp(n->key.p, name, namesz))) 4145 n = n->next; 4146 4147 if (NULL == n) { 4148 /* Create a new string table entry. */ 4149 n = mandoc_malloc(sizeof(struct roffkv)); 4150 n->key.p = mandoc_strndup(name, namesz); 4151 n->key.sz = namesz; 4152 n->val.p = NULL; 4153 n->val.sz = 0; 4154 n->next = *r; 4155 *r = n; 4156 } else if (0 == append) { 4157 free(n->val.p); 4158 n->val.p = NULL; 4159 n->val.sz = 0; 4160 } 4161 4162 if (NULL == string) 4163 return; 4164 4165 /* 4166 * One additional byte for the '\n' in multiline mode, 4167 * and one for the terminating '\0'. 4168 */ 4169 newch = stringsz + (1 < append ? 2u : 1u); 4170 4171 if (NULL == n->val.p) { 4172 n->val.p = mandoc_malloc(newch); 4173 *n->val.p = '\0'; 4174 oldch = 0; 4175 } else { 4176 oldch = n->val.sz; 4177 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 4178 } 4179 4180 /* Skip existing content in the destination buffer. */ 4181 c = n->val.p + (int)oldch; 4182 4183 /* Append new content to the destination buffer. */ 4184 i = 0; 4185 while (i < (int)stringsz) { 4186 /* 4187 * Rudimentary roff copy mode: 4188 * Handle escaped backslashes. 4189 */ 4190 if ('\\' == string[i] && '\\' == string[i + 1]) 4191 i++; 4192 *c++ = string[i++]; 4193 } 4194 4195 /* Append terminating bytes. */ 4196 if (1 < append) 4197 *c++ = '\n'; 4198 4199 *c = '\0'; 4200 n->val.sz = (int)(c - n->val.p); 4201 } 4202 4203 static const char * 4204 roff_getstrn(struct roff *r, const char *name, size_t len, 4205 int *deftype) 4206 { 4207 const struct roffkv *n; 4208 int found, i; 4209 enum roff_tok tok; 4210 4211 found = 0; 4212 for (n = r->strtab; n != NULL; n = n->next) { 4213 if (strncmp(name, n->key.p, len) != 0 || 4214 n->key.p[len] != '\0' || n->val.p == NULL) 4215 continue; 4216 if (*deftype & ROFFDEF_USER) { 4217 *deftype = ROFFDEF_USER; 4218 return n->val.p; 4219 } else { 4220 found = 1; 4221 break; 4222 } 4223 } 4224 for (n = r->rentab; n != NULL; n = n->next) { 4225 if (strncmp(name, n->key.p, len) != 0 || 4226 n->key.p[len] != '\0' || n->val.p == NULL) 4227 continue; 4228 if (*deftype & ROFFDEF_REN) { 4229 *deftype = ROFFDEF_REN; 4230 return n->val.p; 4231 } else { 4232 found = 1; 4233 break; 4234 } 4235 } 4236 for (i = 0; i < PREDEFS_MAX; i++) { 4237 if (strncmp(name, predefs[i].name, len) != 0 || 4238 predefs[i].name[len] != '\0') 4239 continue; 4240 if (*deftype & ROFFDEF_PRE) { 4241 *deftype = ROFFDEF_PRE; 4242 return predefs[i].str; 4243 } else { 4244 found = 1; 4245 break; 4246 } 4247 } 4248 if (r->man->meta.macroset != MACROSET_MAN) { 4249 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { 4250 if (strncmp(name, roff_name[tok], len) != 0 || 4251 roff_name[tok][len] != '\0') 4252 continue; 4253 if (*deftype & ROFFDEF_STD) { 4254 *deftype = ROFFDEF_STD; 4255 return NULL; 4256 } else { 4257 found = 1; 4258 break; 4259 } 4260 } 4261 } 4262 if (r->man->meta.macroset != MACROSET_MDOC) { 4263 for (tok = MAN_TH; tok < MAN_MAX; tok++) { 4264 if (strncmp(name, roff_name[tok], len) != 0 || 4265 roff_name[tok][len] != '\0') 4266 continue; 4267 if (*deftype & ROFFDEF_STD) { 4268 *deftype = ROFFDEF_STD; 4269 return NULL; 4270 } else { 4271 found = 1; 4272 break; 4273 } 4274 } 4275 } 4276 4277 if (found == 0 && *deftype != ROFFDEF_ANY) { 4278 if (*deftype & ROFFDEF_REN) { 4279 /* 4280 * This might still be a request, 4281 * so do not treat it as undefined yet. 4282 */ 4283 *deftype = ROFFDEF_UNDEF; 4284 return NULL; 4285 } 4286 4287 /* Using an undefined string defines it to be empty. */ 4288 4289 roff_setstrn(&r->strtab, name, len, "", 0, 0); 4290 roff_setstrn(&r->rentab, name, len, NULL, 0, 0); 4291 } 4292 4293 *deftype = 0; 4294 return NULL; 4295 } 4296 4297 static void 4298 roff_freestr(struct roffkv *r) 4299 { 4300 struct roffkv *n, *nn; 4301 4302 for (n = r; n; n = nn) { 4303 free(n->key.p); 4304 free(n->val.p); 4305 nn = n->next; 4306 free(n); 4307 } 4308 } 4309 4310 /* --- accessors and utility functions ------------------------------------ */ 4311 4312 /* 4313 * Duplicate an input string, making the appropriate character 4314 * conversations (as stipulated by `tr') along the way. 4315 * Returns a heap-allocated string with all the replacements made. 4316 */ 4317 char * 4318 roff_strdup(const struct roff *r, const char *p) 4319 { 4320 const struct roffkv *cp; 4321 char *res; 4322 const char *pp; 4323 size_t ssz, sz; 4324 enum mandoc_esc esc; 4325 4326 if (NULL == r->xmbtab && NULL == r->xtab) 4327 return mandoc_strdup(p); 4328 else if ('\0' == *p) 4329 return mandoc_strdup(""); 4330 4331 /* 4332 * Step through each character looking for term matches 4333 * (remember that a `tr' can be invoked with an escape, which is 4334 * a glyph but the escape is multi-character). 4335 * We only do this if the character hash has been initialised 4336 * and the string is >0 length. 4337 */ 4338 4339 res = NULL; 4340 ssz = 0; 4341 4342 while ('\0' != *p) { 4343 assert((unsigned int)*p < 128); 4344 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { 4345 sz = r->xtab[(int)*p].sz; 4346 res = mandoc_realloc(res, ssz + sz + 1); 4347 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 4348 ssz += sz; 4349 p++; 4350 continue; 4351 } else if ('\\' != *p) { 4352 res = mandoc_realloc(res, ssz + 2); 4353 res[ssz++] = *p++; 4354 continue; 4355 } 4356 4357 /* Search for term matches. */ 4358 for (cp = r->xmbtab; cp; cp = cp->next) 4359 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 4360 break; 4361 4362 if (NULL != cp) { 4363 /* 4364 * A match has been found. 4365 * Append the match to the array and move 4366 * forward by its keysize. 4367 */ 4368 res = mandoc_realloc(res, 4369 ssz + cp->val.sz + 1); 4370 memcpy(res + ssz, cp->val.p, cp->val.sz); 4371 ssz += cp->val.sz; 4372 p += (int)cp->key.sz; 4373 continue; 4374 } 4375 4376 /* 4377 * Handle escapes carefully: we need to copy 4378 * over just the escape itself, or else we might 4379 * do replacements within the escape itself. 4380 * Make sure to pass along the bogus string. 4381 */ 4382 pp = p++; 4383 esc = mandoc_escape(&p, NULL, NULL); 4384 if (ESCAPE_ERROR == esc) { 4385 sz = strlen(pp); 4386 res = mandoc_realloc(res, ssz + sz + 1); 4387 memcpy(res + ssz, pp, sz); 4388 break; 4389 } 4390 /* 4391 * We bail out on bad escapes. 4392 * No need to warn: we already did so when 4393 * roff_expand() was called. 4394 */ 4395 sz = (int)(p - pp); 4396 res = mandoc_realloc(res, ssz + sz + 1); 4397 memcpy(res + ssz, pp, sz); 4398 ssz += sz; 4399 } 4400 4401 res[(int)ssz] = '\0'; 4402 return res; 4403 } 4404 4405 int 4406 roff_getformat(const struct roff *r) 4407 { 4408 4409 return r->format; 4410 } 4411 4412 /* 4413 * Find out whether a line is a macro line or not. 4414 * If it is, adjust the current position and return one; if it isn't, 4415 * return zero and don't change the current position. 4416 * If the control character has been set with `.cc', then let that grain 4417 * precedence. 4418 * This is slighly contrary to groff, where using the non-breaking 4419 * control character when `cc' has been invoked will cause the 4420 * non-breaking macro contents to be printed verbatim. 4421 */ 4422 int 4423 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 4424 { 4425 int pos; 4426 4427 pos = *ppos; 4428 4429 if (r->control != '\0' && cp[pos] == r->control) 4430 pos++; 4431 else if (r->control != '\0') 4432 return 0; 4433 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 4434 pos += 2; 4435 else if ('.' == cp[pos] || '\'' == cp[pos]) 4436 pos++; 4437 else 4438 return 0; 4439 4440 while (' ' == cp[pos] || '\t' == cp[pos]) 4441 pos++; 4442 4443 *ppos = pos; 4444 return 1; 4445 } 4446