1 /* $OpenBSD: roff.c,v 1.248 2020/08/27 12:58:00 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Implementation of the roff(7) parser for mandoc(1). 19 */ 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <limits.h> 25 #include <stddef.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 31 #include "mandoc_aux.h" 32 #include "mandoc_ohash.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mandoc_parse.h" 36 #include "libmandoc.h" 37 #include "roff_int.h" 38 #include "tbl_parse.h" 39 #include "eqn_parse.h" 40 41 /* 42 * ASCII_ESC is used to signal from roff_getarg() to roff_expand() 43 * that an escape sequence resulted from copy-in processing and 44 * needs to be checked or interpolated. As it is used nowhere 45 * else, it is defined here rather than in a header file. 46 */ 47 #define ASCII_ESC 27 48 49 /* Maximum number of string expansions per line, to break infinite loops. */ 50 #define EXPAND_LIMIT 1000 51 52 /* Types of definitions of macros and strings. */ 53 #define ROFFDEF_USER (1 << 1) /* User-defined. */ 54 #define ROFFDEF_PRE (1 << 2) /* Predefined. */ 55 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ 56 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ 57 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ 58 ROFFDEF_REN | ROFFDEF_STD) 59 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ 60 61 /* --- data types --------------------------------------------------------- */ 62 63 /* 64 * An incredibly-simple string buffer. 65 */ 66 struct roffstr { 67 char *p; /* nil-terminated buffer */ 68 size_t sz; /* saved strlen(p) */ 69 }; 70 71 /* 72 * A key-value roffstr pair as part of a singly-linked list. 73 */ 74 struct roffkv { 75 struct roffstr key; 76 struct roffstr val; 77 struct roffkv *next; /* next in list */ 78 }; 79 80 /* 81 * A single number register as part of a singly-linked list. 82 */ 83 struct roffreg { 84 struct roffstr key; 85 int val; 86 int step; 87 struct roffreg *next; 88 }; 89 90 /* 91 * Association of request and macro names with token IDs. 92 */ 93 struct roffreq { 94 enum roff_tok tok; 95 char name[]; 96 }; 97 98 /* 99 * A macro processing context. 100 * More than one is needed when macro calls are nested. 101 */ 102 struct mctx { 103 char **argv; 104 int argc; 105 int argsz; 106 }; 107 108 struct roff { 109 struct roff_man *man; /* mdoc or man parser */ 110 struct roffnode *last; /* leaf of stack */ 111 struct mctx *mstack; /* stack of macro contexts */ 112 int *rstack; /* stack of inverted `ie' values */ 113 struct ohash *reqtab; /* request lookup table */ 114 struct roffreg *regtab; /* number registers */ 115 struct roffkv *strtab; /* user-defined strings & macros */ 116 struct roffkv *rentab; /* renamed strings & macros */ 117 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 118 struct roffstr *xtab; /* single-byte trans table (`tr') */ 119 const char *current_string; /* value of last called user macro */ 120 struct tbl_node *first_tbl; /* first table parsed */ 121 struct tbl_node *last_tbl; /* last table parsed */ 122 struct tbl_node *tbl; /* current table being parsed */ 123 struct eqn_node *last_eqn; /* equation parser */ 124 struct eqn_node *eqn; /* active equation parser */ 125 int eqn_inline; /* current equation is inline */ 126 int options; /* parse options */ 127 int mstacksz; /* current size of mstack */ 128 int mstackpos; /* position in mstack */ 129 int rstacksz; /* current size limit of rstack */ 130 int rstackpos; /* position in rstack */ 131 int format; /* current file in mdoc or man format */ 132 char control; /* control character */ 133 char escape; /* escape character */ 134 }; 135 136 /* 137 * A macro definition, condition, or ignored block. 138 */ 139 struct roffnode { 140 enum roff_tok tok; /* type of node */ 141 struct roffnode *parent; /* up one in stack */ 142 int line; /* parse line */ 143 int col; /* parse col */ 144 char *name; /* node name, e.g. macro name */ 145 char *end; /* custom end macro of the block */ 146 int endspan; /* scope to: 1=eol 2=next line -1=\} */ 147 int rule; /* content is: 1=evaluated 0=skipped */ 148 }; 149 150 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 151 enum roff_tok tok, /* tok of macro */ \ 152 struct buf *buf, /* input buffer */ \ 153 int ln, /* parse line */ \ 154 int ppos, /* original pos in buffer */ \ 155 int pos, /* current pos in buffer */ \ 156 int *offs /* reset offset of buffer data */ 157 158 typedef int (*roffproc)(ROFF_ARGS); 159 160 struct roffmac { 161 roffproc proc; /* process new macro */ 162 roffproc text; /* process as child text of macro */ 163 roffproc sub; /* process as child of macro */ 164 int flags; 165 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 166 }; 167 168 struct predef { 169 const char *name; /* predefined input name */ 170 const char *str; /* replacement symbol */ 171 }; 172 173 #define PREDEF(__name, __str) \ 174 { (__name), (__str) }, 175 176 /* --- function prototypes ------------------------------------------------ */ 177 178 static int roffnode_cleanscope(struct roff *); 179 static int roffnode_pop(struct roff *); 180 static void roffnode_push(struct roff *, enum roff_tok, 181 const char *, int, int); 182 static void roff_addtbl(struct roff_man *, int, struct tbl_node *); 183 static int roff_als(ROFF_ARGS); 184 static int roff_block(ROFF_ARGS); 185 static int roff_block_text(ROFF_ARGS); 186 static int roff_block_sub(ROFF_ARGS); 187 static int roff_break(ROFF_ARGS); 188 static int roff_cblock(ROFF_ARGS); 189 static int roff_cc(ROFF_ARGS); 190 static int roff_ccond(struct roff *, int, int); 191 static int roff_char(ROFF_ARGS); 192 static int roff_cond(ROFF_ARGS); 193 static int roff_cond_checkend(ROFF_ARGS); 194 static int roff_cond_text(ROFF_ARGS); 195 static int roff_cond_sub(ROFF_ARGS); 196 static int roff_ds(ROFF_ARGS); 197 static int roff_ec(ROFF_ARGS); 198 static int roff_eo(ROFF_ARGS); 199 static int roff_eqndelim(struct roff *, struct buf *, int); 200 static int roff_evalcond(struct roff *, int, char *, int *); 201 static int roff_evalnum(struct roff *, int, 202 const char *, int *, int *, int); 203 static int roff_evalpar(struct roff *, int, 204 const char *, int *, int *, int); 205 static int roff_evalstrcond(const char *, int *); 206 static int roff_expand(struct roff *, struct buf *, 207 int, int, char); 208 static void roff_free1(struct roff *); 209 static void roff_freereg(struct roffreg *); 210 static void roff_freestr(struct roffkv *); 211 static size_t roff_getname(struct roff *, char **, int, int); 212 static int roff_getnum(const char *, int *, int *, int); 213 static int roff_getop(const char *, int *, char *); 214 static int roff_getregn(struct roff *, 215 const char *, size_t, char); 216 static int roff_getregro(const struct roff *, 217 const char *name); 218 static const char *roff_getstrn(struct roff *, 219 const char *, size_t, int *); 220 static int roff_hasregn(const struct roff *, 221 const char *, size_t); 222 static int roff_insec(ROFF_ARGS); 223 static int roff_it(ROFF_ARGS); 224 static int roff_line_ignore(ROFF_ARGS); 225 static void roff_man_alloc1(struct roff_man *); 226 static void roff_man_free1(struct roff_man *); 227 static int roff_manyarg(ROFF_ARGS); 228 static int roff_noarg(ROFF_ARGS); 229 static int roff_nop(ROFF_ARGS); 230 static int roff_nr(ROFF_ARGS); 231 static int roff_onearg(ROFF_ARGS); 232 static enum roff_tok roff_parse(struct roff *, char *, int *, 233 int, int); 234 static int roff_parsetext(struct roff *, struct buf *, 235 int, int *); 236 static int roff_renamed(ROFF_ARGS); 237 static int roff_return(ROFF_ARGS); 238 static int roff_rm(ROFF_ARGS); 239 static int roff_rn(ROFF_ARGS); 240 static int roff_rr(ROFF_ARGS); 241 static void roff_setregn(struct roff *, const char *, 242 size_t, int, char, int); 243 static void roff_setstr(struct roff *, 244 const char *, const char *, int); 245 static void roff_setstrn(struct roffkv **, const char *, 246 size_t, const char *, size_t, int); 247 static int roff_shift(ROFF_ARGS); 248 static int roff_so(ROFF_ARGS); 249 static int roff_tr(ROFF_ARGS); 250 static int roff_Dd(ROFF_ARGS); 251 static int roff_TE(ROFF_ARGS); 252 static int roff_TS(ROFF_ARGS); 253 static int roff_EQ(ROFF_ARGS); 254 static int roff_EN(ROFF_ARGS); 255 static int roff_T_(ROFF_ARGS); 256 static int roff_unsupp(ROFF_ARGS); 257 static int roff_userdef(ROFF_ARGS); 258 259 /* --- constant data ------------------------------------------------------ */ 260 261 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 262 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 263 264 const char *__roff_name[MAN_MAX + 1] = { 265 "br", "ce", "fi", "ft", 266 "ll", "mc", "nf", 267 "po", "rj", "sp", 268 "ta", "ti", NULL, 269 "ab", "ad", "af", "aln", 270 "als", "am", "am1", "ami", 271 "ami1", "as", "as1", "asciify", 272 "backtrace", "bd", "bleedat", "blm", 273 "box", "boxa", "bp", "BP", 274 "break", "breakchar", "brnl", "brp", 275 "brpnl", "c2", "cc", 276 "cf", "cflags", "ch", "char", 277 "chop", "class", "close", "CL", 278 "color", "composite", "continue", "cp", 279 "cropat", "cs", "cu", "da", 280 "dch", "Dd", "de", "de1", 281 "defcolor", "dei", "dei1", "device", 282 "devicem", "di", "do", "ds", 283 "ds1", "dwh", "dt", "ec", 284 "ecr", "ecs", "el", "em", 285 "EN", "eo", "EP", "EQ", 286 "errprint", "ev", "evc", "ex", 287 "fallback", "fam", "fc", "fchar", 288 "fcolor", "fdeferlig", "feature", "fkern", 289 "fl", "flig", "fp", "fps", 290 "fschar", "fspacewidth", "fspecial", "ftr", 291 "fzoom", "gcolor", "hc", "hcode", 292 "hidechar", "hla", "hlm", "hpf", 293 "hpfa", "hpfcode", "hw", "hy", 294 "hylang", "hylen", "hym", "hypp", 295 "hys", "ie", "if", "ig", 296 "index", "it", "itc", "IX", 297 "kern", "kernafter", "kernbefore", "kernpair", 298 "lc", "lc_ctype", "lds", "length", 299 "letadj", "lf", "lg", "lhang", 300 "linetabs", "lnr", "lnrf", "lpfx", 301 "ls", "lsm", "lt", 302 "mediasize", "minss", "mk", "mso", 303 "na", "ne", "nh", "nhychar", 304 "nm", "nn", "nop", "nr", 305 "nrf", "nroff", "ns", "nx", 306 "open", "opena", "os", "output", 307 "padj", "papersize", "pc", "pev", 308 "pi", "PI", "pl", "pm", 309 "pn", "pnr", "ps", 310 "psbb", "pshape", "pso", "ptr", 311 "pvs", "rchar", "rd", "recursionlimit", 312 "return", "rfschar", "rhang", 313 "rm", "rn", "rnn", "rr", 314 "rs", "rt", "schar", "sentchar", 315 "shc", "shift", "sizes", "so", 316 "spacewidth", "special", "spreadwarn", "ss", 317 "sty", "substring", "sv", "sy", 318 "T&", "tc", "TE", 319 "TH", "tkf", "tl", 320 "tm", "tm1", "tmc", "tr", 321 "track", "transchar", "trf", "trimat", 322 "trin", "trnt", "troff", "TS", 323 "uf", "ul", "unformat", "unwatch", 324 "unwatchn", "vpt", "vs", "warn", 325 "warnscale", "watch", "watchlength", "watchn", 326 "wh", "while", "write", "writec", 327 "writem", "xflag", ".", NULL, 328 NULL, "text", 329 "Dd", "Dt", "Os", "Sh", 330 "Ss", "Pp", "D1", "Dl", 331 "Bd", "Ed", "Bl", "El", 332 "It", "Ad", "An", "Ap", 333 "Ar", "Cd", "Cm", "Dv", 334 "Er", "Ev", "Ex", "Fa", 335 "Fd", "Fl", "Fn", "Ft", 336 "Ic", "In", "Li", "Nd", 337 "Nm", "Op", "Ot", "Pa", 338 "Rv", "St", "Va", "Vt", 339 "Xr", "%A", "%B", "%D", 340 "%I", "%J", "%N", "%O", 341 "%P", "%R", "%T", "%V", 342 "Ac", "Ao", "Aq", "At", 343 "Bc", "Bf", "Bo", "Bq", 344 "Bsx", "Bx", "Db", "Dc", 345 "Do", "Dq", "Ec", "Ef", 346 "Em", "Eo", "Fx", "Ms", 347 "No", "Ns", "Nx", "Ox", 348 "Pc", "Pf", "Po", "Pq", 349 "Qc", "Ql", "Qo", "Qq", 350 "Re", "Rs", "Sc", "So", 351 "Sq", "Sm", "Sx", "Sy", 352 "Tn", "Ux", "Xc", "Xo", 353 "Fo", "Fc", "Oo", "Oc", 354 "Bk", "Ek", "Bt", "Hf", 355 "Fr", "Ud", "Lb", "Lp", 356 "Lk", "Mt", "Brq", "Bro", 357 "Brc", "%C", "Es", "En", 358 "Dx", "%Q", "%U", "Ta", 359 "Tg", NULL, 360 "TH", "SH", "SS", "TP", 361 "TQ", 362 "LP", "PP", "P", "IP", 363 "HP", "SM", "SB", "BI", 364 "IB", "BR", "RB", "R", 365 "B", "I", "IR", "RI", 366 "RE", "RS", "DT", "UC", 367 "PD", "AT", "in", 368 "SY", "YS", "OP", 369 "EX", "EE", "UR", 370 "UE", "MT", "ME", NULL 371 }; 372 const char *const *roff_name = __roff_name; 373 374 static struct roffmac roffs[TOKEN_NONE] = { 375 { roff_noarg, NULL, NULL, 0 }, /* br */ 376 { roff_onearg, NULL, NULL, 0 }, /* ce */ 377 { roff_noarg, NULL, NULL, 0 }, /* fi */ 378 { roff_onearg, NULL, NULL, 0 }, /* ft */ 379 { roff_onearg, NULL, NULL, 0 }, /* ll */ 380 { roff_onearg, NULL, NULL, 0 }, /* mc */ 381 { roff_noarg, NULL, NULL, 0 }, /* nf */ 382 { roff_onearg, NULL, NULL, 0 }, /* po */ 383 { roff_onearg, NULL, NULL, 0 }, /* rj */ 384 { roff_onearg, NULL, NULL, 0 }, /* sp */ 385 { roff_manyarg, NULL, NULL, 0 }, /* ta */ 386 { roff_onearg, NULL, NULL, 0 }, /* ti */ 387 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ 388 { roff_unsupp, NULL, NULL, 0 }, /* ab */ 389 { roff_line_ignore, NULL, NULL, 0 }, /* ad */ 390 { roff_line_ignore, NULL, NULL, 0 }, /* af */ 391 { roff_unsupp, NULL, NULL, 0 }, /* aln */ 392 { roff_als, NULL, NULL, 0 }, /* als */ 393 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ 394 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ 395 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ 396 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ 397 { roff_ds, NULL, NULL, 0 }, /* as */ 398 { roff_ds, NULL, NULL, 0 }, /* as1 */ 399 { roff_unsupp, NULL, NULL, 0 }, /* asciify */ 400 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ 401 { roff_line_ignore, NULL, NULL, 0 }, /* bd */ 402 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ 403 { roff_unsupp, NULL, NULL, 0 }, /* blm */ 404 { roff_unsupp, NULL, NULL, 0 }, /* box */ 405 { roff_unsupp, NULL, NULL, 0 }, /* boxa */ 406 { roff_line_ignore, NULL, NULL, 0 }, /* bp */ 407 { roff_unsupp, NULL, NULL, 0 }, /* BP */ 408 { roff_break, NULL, NULL, 0 }, /* break */ 409 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ 410 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ 411 { roff_noarg, NULL, NULL, 0 }, /* brp */ 412 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ 413 { roff_unsupp, NULL, NULL, 0 }, /* c2 */ 414 { roff_cc, NULL, NULL, 0 }, /* cc */ 415 { roff_insec, NULL, NULL, 0 }, /* cf */ 416 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ 417 { roff_line_ignore, NULL, NULL, 0 }, /* ch */ 418 { roff_char, NULL, NULL, 0 }, /* char */ 419 { roff_unsupp, NULL, NULL, 0 }, /* chop */ 420 { roff_line_ignore, NULL, NULL, 0 }, /* class */ 421 { roff_insec, NULL, NULL, 0 }, /* close */ 422 { roff_unsupp, NULL, NULL, 0 }, /* CL */ 423 { roff_line_ignore, NULL, NULL, 0 }, /* color */ 424 { roff_unsupp, NULL, NULL, 0 }, /* composite */ 425 { roff_unsupp, NULL, NULL, 0 }, /* continue */ 426 { roff_line_ignore, NULL, NULL, 0 }, /* cp */ 427 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ 428 { roff_line_ignore, NULL, NULL, 0 }, /* cs */ 429 { roff_line_ignore, NULL, NULL, 0 }, /* cu */ 430 { roff_unsupp, NULL, NULL, 0 }, /* da */ 431 { roff_unsupp, NULL, NULL, 0 }, /* dch */ 432 { roff_Dd, NULL, NULL, 0 }, /* Dd */ 433 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ 434 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ 435 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ 436 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ 437 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ 438 { roff_unsupp, NULL, NULL, 0 }, /* device */ 439 { roff_unsupp, NULL, NULL, 0 }, /* devicem */ 440 { roff_unsupp, NULL, NULL, 0 }, /* di */ 441 { roff_unsupp, NULL, NULL, 0 }, /* do */ 442 { roff_ds, NULL, NULL, 0 }, /* ds */ 443 { roff_ds, NULL, NULL, 0 }, /* ds1 */ 444 { roff_unsupp, NULL, NULL, 0 }, /* dwh */ 445 { roff_unsupp, NULL, NULL, 0 }, /* dt */ 446 { roff_ec, NULL, NULL, 0 }, /* ec */ 447 { roff_unsupp, NULL, NULL, 0 }, /* ecr */ 448 { roff_unsupp, NULL, NULL, 0 }, /* ecs */ 449 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ 450 { roff_unsupp, NULL, NULL, 0 }, /* em */ 451 { roff_EN, NULL, NULL, 0 }, /* EN */ 452 { roff_eo, NULL, NULL, 0 }, /* eo */ 453 { roff_unsupp, NULL, NULL, 0 }, /* EP */ 454 { roff_EQ, NULL, NULL, 0 }, /* EQ */ 455 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ 456 { roff_unsupp, NULL, NULL, 0 }, /* ev */ 457 { roff_unsupp, NULL, NULL, 0 }, /* evc */ 458 { roff_unsupp, NULL, NULL, 0 }, /* ex */ 459 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ 460 { roff_line_ignore, NULL, NULL, 0 }, /* fam */ 461 { roff_unsupp, NULL, NULL, 0 }, /* fc */ 462 { roff_unsupp, NULL, NULL, 0 }, /* fchar */ 463 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ 464 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ 465 { roff_line_ignore, NULL, NULL, 0 }, /* feature */ 466 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ 467 { roff_line_ignore, NULL, NULL, 0 }, /* fl */ 468 { roff_line_ignore, NULL, NULL, 0 }, /* flig */ 469 { roff_line_ignore, NULL, NULL, 0 }, /* fp */ 470 { roff_line_ignore, NULL, NULL, 0 }, /* fps */ 471 { roff_unsupp, NULL, NULL, 0 }, /* fschar */ 472 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ 473 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ 474 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ 475 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ 476 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ 477 { roff_line_ignore, NULL, NULL, 0 }, /* hc */ 478 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ 479 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ 480 { roff_line_ignore, NULL, NULL, 0 }, /* hla */ 481 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ 482 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ 483 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ 484 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ 485 { roff_line_ignore, NULL, NULL, 0 }, /* hw */ 486 { roff_line_ignore, NULL, NULL, 0 }, /* hy */ 487 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ 488 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ 489 { roff_line_ignore, NULL, NULL, 0 }, /* hym */ 490 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ 491 { roff_line_ignore, NULL, NULL, 0 }, /* hys */ 492 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ 493 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ 494 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ 495 { roff_unsupp, NULL, NULL, 0 }, /* index */ 496 { roff_it, NULL, NULL, 0 }, /* it */ 497 { roff_unsupp, NULL, NULL, 0 }, /* itc */ 498 { roff_line_ignore, NULL, NULL, 0 }, /* IX */ 499 { roff_line_ignore, NULL, NULL, 0 }, /* kern */ 500 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ 501 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ 502 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ 503 { roff_unsupp, NULL, NULL, 0 }, /* lc */ 504 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ 505 { roff_unsupp, NULL, NULL, 0 }, /* lds */ 506 { roff_unsupp, NULL, NULL, 0 }, /* length */ 507 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ 508 { roff_insec, NULL, NULL, 0 }, /* lf */ 509 { roff_line_ignore, NULL, NULL, 0 }, /* lg */ 510 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ 511 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ 512 { roff_unsupp, NULL, NULL, 0 }, /* lnr */ 513 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ 514 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ 515 { roff_line_ignore, NULL, NULL, 0 }, /* ls */ 516 { roff_unsupp, NULL, NULL, 0 }, /* lsm */ 517 { roff_line_ignore, NULL, NULL, 0 }, /* lt */ 518 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ 519 { roff_line_ignore, NULL, NULL, 0 }, /* minss */ 520 { roff_line_ignore, NULL, NULL, 0 }, /* mk */ 521 { roff_insec, NULL, NULL, 0 }, /* mso */ 522 { roff_line_ignore, NULL, NULL, 0 }, /* na */ 523 { roff_line_ignore, NULL, NULL, 0 }, /* ne */ 524 { roff_line_ignore, NULL, NULL, 0 }, /* nh */ 525 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ 526 { roff_unsupp, NULL, NULL, 0 }, /* nm */ 527 { roff_unsupp, NULL, NULL, 0 }, /* nn */ 528 { roff_nop, NULL, NULL, 0 }, /* nop */ 529 { roff_nr, NULL, NULL, 0 }, /* nr */ 530 { roff_unsupp, NULL, NULL, 0 }, /* nrf */ 531 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ 532 { roff_line_ignore, NULL, NULL, 0 }, /* ns */ 533 { roff_insec, NULL, NULL, 0 }, /* nx */ 534 { roff_insec, NULL, NULL, 0 }, /* open */ 535 { roff_insec, NULL, NULL, 0 }, /* opena */ 536 { roff_line_ignore, NULL, NULL, 0 }, /* os */ 537 { roff_unsupp, NULL, NULL, 0 }, /* output */ 538 { roff_line_ignore, NULL, NULL, 0 }, /* padj */ 539 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ 540 { roff_line_ignore, NULL, NULL, 0 }, /* pc */ 541 { roff_line_ignore, NULL, NULL, 0 }, /* pev */ 542 { roff_insec, NULL, NULL, 0 }, /* pi */ 543 { roff_unsupp, NULL, NULL, 0 }, /* PI */ 544 { roff_line_ignore, NULL, NULL, 0 }, /* pl */ 545 { roff_line_ignore, NULL, NULL, 0 }, /* pm */ 546 { roff_line_ignore, NULL, NULL, 0 }, /* pn */ 547 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ 548 { roff_line_ignore, NULL, NULL, 0 }, /* ps */ 549 { roff_unsupp, NULL, NULL, 0 }, /* psbb */ 550 { roff_unsupp, NULL, NULL, 0 }, /* pshape */ 551 { roff_insec, NULL, NULL, 0 }, /* pso */ 552 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ 553 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ 554 { roff_unsupp, NULL, NULL, 0 }, /* rchar */ 555 { roff_line_ignore, NULL, NULL, 0 }, /* rd */ 556 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ 557 { roff_return, NULL, NULL, 0 }, /* return */ 558 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ 559 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ 560 { roff_rm, NULL, NULL, 0 }, /* rm */ 561 { roff_rn, NULL, NULL, 0 }, /* rn */ 562 { roff_unsupp, NULL, NULL, 0 }, /* rnn */ 563 { roff_rr, NULL, NULL, 0 }, /* rr */ 564 { roff_line_ignore, NULL, NULL, 0 }, /* rs */ 565 { roff_line_ignore, NULL, NULL, 0 }, /* rt */ 566 { roff_unsupp, NULL, NULL, 0 }, /* schar */ 567 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ 568 { roff_line_ignore, NULL, NULL, 0 }, /* shc */ 569 { roff_shift, NULL, NULL, 0 }, /* shift */ 570 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ 571 { roff_so, NULL, NULL, 0 }, /* so */ 572 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ 573 { roff_line_ignore, NULL, NULL, 0 }, /* special */ 574 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ 575 { roff_line_ignore, NULL, NULL, 0 }, /* ss */ 576 { roff_line_ignore, NULL, NULL, 0 }, /* sty */ 577 { roff_unsupp, NULL, NULL, 0 }, /* substring */ 578 { roff_line_ignore, NULL, NULL, 0 }, /* sv */ 579 { roff_insec, NULL, NULL, 0 }, /* sy */ 580 { roff_T_, NULL, NULL, 0 }, /* T& */ 581 { roff_unsupp, NULL, NULL, 0 }, /* tc */ 582 { roff_TE, NULL, NULL, 0 }, /* TE */ 583 { roff_Dd, NULL, NULL, 0 }, /* TH */ 584 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ 585 { roff_unsupp, NULL, NULL, 0 }, /* tl */ 586 { roff_line_ignore, NULL, NULL, 0 }, /* tm */ 587 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ 588 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ 589 { roff_tr, NULL, NULL, 0 }, /* tr */ 590 { roff_line_ignore, NULL, NULL, 0 }, /* track */ 591 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ 592 { roff_insec, NULL, NULL, 0 }, /* trf */ 593 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ 594 { roff_unsupp, NULL, NULL, 0 }, /* trin */ 595 { roff_unsupp, NULL, NULL, 0 }, /* trnt */ 596 { roff_line_ignore, NULL, NULL, 0 }, /* troff */ 597 { roff_TS, NULL, NULL, 0 }, /* TS */ 598 { roff_line_ignore, NULL, NULL, 0 }, /* uf */ 599 { roff_line_ignore, NULL, NULL, 0 }, /* ul */ 600 { roff_unsupp, NULL, NULL, 0 }, /* unformat */ 601 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ 602 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ 603 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ 604 { roff_line_ignore, NULL, NULL, 0 }, /* vs */ 605 { roff_line_ignore, NULL, NULL, 0 }, /* warn */ 606 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ 607 { roff_line_ignore, NULL, NULL, 0 }, /* watch */ 608 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ 609 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ 610 { roff_unsupp, NULL, NULL, 0 }, /* wh */ 611 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/ 612 { roff_insec, NULL, NULL, 0 }, /* write */ 613 { roff_insec, NULL, NULL, 0 }, /* writec */ 614 { roff_insec, NULL, NULL, 0 }, /* writem */ 615 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ 616 { roff_cblock, NULL, NULL, 0 }, /* . */ 617 { roff_renamed, NULL, NULL, 0 }, 618 { roff_userdef, NULL, NULL, 0 } 619 }; 620 621 /* Array of injected predefined strings. */ 622 #define PREDEFS_MAX 38 623 static const struct predef predefs[PREDEFS_MAX] = { 624 #include "predefs.in" 625 }; 626 627 static int roffce_lines; /* number of input lines to center */ 628 static struct roff_node *roffce_node; /* active request */ 629 static int roffit_lines; /* number of lines to delay */ 630 static char *roffit_macro; /* nil-terminated macro line */ 631 632 633 /* --- request table ------------------------------------------------------ */ 634 635 struct ohash * 636 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) 637 { 638 struct ohash *htab; 639 struct roffreq *req; 640 enum roff_tok tok; 641 size_t sz; 642 unsigned int slot; 643 644 htab = mandoc_malloc(sizeof(*htab)); 645 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); 646 647 for (tok = mintok; tok < maxtok; tok++) { 648 if (roff_name[tok] == NULL) 649 continue; 650 sz = strlen(roff_name[tok]); 651 req = mandoc_malloc(sizeof(*req) + sz + 1); 652 req->tok = tok; 653 memcpy(req->name, roff_name[tok], sz + 1); 654 slot = ohash_qlookup(htab, req->name); 655 ohash_insert(htab, slot, req); 656 } 657 return htab; 658 } 659 660 void 661 roffhash_free(struct ohash *htab) 662 { 663 struct roffreq *req; 664 unsigned int slot; 665 666 if (htab == NULL) 667 return; 668 for (req = ohash_first(htab, &slot); req != NULL; 669 req = ohash_next(htab, &slot)) 670 free(req); 671 ohash_delete(htab); 672 free(htab); 673 } 674 675 enum roff_tok 676 roffhash_find(struct ohash *htab, const char *name, size_t sz) 677 { 678 struct roffreq *req; 679 const char *end; 680 681 if (sz) { 682 end = name + sz; 683 req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); 684 } else 685 req = ohash_find(htab, ohash_qlookup(htab, name)); 686 return req == NULL ? TOKEN_NONE : req->tok; 687 } 688 689 /* --- stack of request blocks -------------------------------------------- */ 690 691 /* 692 * Pop the current node off of the stack of roff instructions currently 693 * pending. Return 1 if it is a loop or 0 otherwise. 694 */ 695 static int 696 roffnode_pop(struct roff *r) 697 { 698 struct roffnode *p; 699 int inloop; 700 701 p = r->last; 702 inloop = p->tok == ROFF_while; 703 r->last = p->parent; 704 free(p->name); 705 free(p->end); 706 free(p); 707 return inloop; 708 } 709 710 /* 711 * Push a roff node onto the instruction stack. This must later be 712 * removed with roffnode_pop(). 713 */ 714 static void 715 roffnode_push(struct roff *r, enum roff_tok tok, const char *name, 716 int line, int col) 717 { 718 struct roffnode *p; 719 720 p = mandoc_calloc(1, sizeof(struct roffnode)); 721 p->tok = tok; 722 if (name) 723 p->name = mandoc_strdup(name); 724 p->parent = r->last; 725 p->line = line; 726 p->col = col; 727 p->rule = p->parent ? p->parent->rule : 0; 728 729 r->last = p; 730 } 731 732 /* --- roff parser state data management ---------------------------------- */ 733 734 static void 735 roff_free1(struct roff *r) 736 { 737 int i; 738 739 tbl_free(r->first_tbl); 740 r->first_tbl = r->last_tbl = r->tbl = NULL; 741 742 eqn_free(r->last_eqn); 743 r->last_eqn = r->eqn = NULL; 744 745 while (r->mstackpos >= 0) 746 roff_userret(r); 747 748 while (r->last) 749 roffnode_pop(r); 750 751 free (r->rstack); 752 r->rstack = NULL; 753 r->rstacksz = 0; 754 r->rstackpos = -1; 755 756 roff_freereg(r->regtab); 757 r->regtab = NULL; 758 759 roff_freestr(r->strtab); 760 roff_freestr(r->rentab); 761 roff_freestr(r->xmbtab); 762 r->strtab = r->rentab = r->xmbtab = NULL; 763 764 if (r->xtab) 765 for (i = 0; i < 128; i++) 766 free(r->xtab[i].p); 767 free(r->xtab); 768 r->xtab = NULL; 769 } 770 771 void 772 roff_reset(struct roff *r) 773 { 774 roff_free1(r); 775 r->options |= MPARSE_COMMENT; 776 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 777 r->control = '\0'; 778 r->escape = '\\'; 779 roffce_lines = 0; 780 roffce_node = NULL; 781 roffit_lines = 0; 782 roffit_macro = NULL; 783 } 784 785 void 786 roff_free(struct roff *r) 787 { 788 int i; 789 790 roff_free1(r); 791 for (i = 0; i < r->mstacksz; i++) 792 free(r->mstack[i].argv); 793 free(r->mstack); 794 roffhash_free(r->reqtab); 795 free(r); 796 } 797 798 struct roff * 799 roff_alloc(int options) 800 { 801 struct roff *r; 802 803 r = mandoc_calloc(1, sizeof(struct roff)); 804 r->reqtab = roffhash_alloc(0, ROFF_RENAMED); 805 r->options = options | MPARSE_COMMENT; 806 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 807 r->mstackpos = -1; 808 r->rstackpos = -1; 809 r->escape = '\\'; 810 return r; 811 } 812 813 /* --- syntax tree state data management ---------------------------------- */ 814 815 static void 816 roff_man_free1(struct roff_man *man) 817 { 818 if (man->meta.first != NULL) 819 roff_node_delete(man, man->meta.first); 820 free(man->meta.msec); 821 free(man->meta.vol); 822 free(man->meta.os); 823 free(man->meta.arch); 824 free(man->meta.title); 825 free(man->meta.name); 826 free(man->meta.date); 827 free(man->meta.sodest); 828 } 829 830 void 831 roff_state_reset(struct roff_man *man) 832 { 833 man->last = man->meta.first; 834 man->last_es = NULL; 835 man->flags = 0; 836 man->lastsec = man->lastnamed = SEC_NONE; 837 man->next = ROFF_NEXT_CHILD; 838 roff_setreg(man->roff, "nS", 0, '='); 839 } 840 841 static void 842 roff_man_alloc1(struct roff_man *man) 843 { 844 memset(&man->meta, 0, sizeof(man->meta)); 845 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first)); 846 man->meta.first->type = ROFFT_ROOT; 847 man->meta.macroset = MACROSET_NONE; 848 roff_state_reset(man); 849 } 850 851 void 852 roff_man_reset(struct roff_man *man) 853 { 854 roff_man_free1(man); 855 roff_man_alloc1(man); 856 } 857 858 void 859 roff_man_free(struct roff_man *man) 860 { 861 roff_man_free1(man); 862 free(man); 863 } 864 865 struct roff_man * 866 roff_man_alloc(struct roff *roff, const char *os_s, int quick) 867 { 868 struct roff_man *man; 869 870 man = mandoc_calloc(1, sizeof(*man)); 871 man->roff = roff; 872 man->os_s = os_s; 873 man->quick = quick; 874 roff_man_alloc1(man); 875 roff->man = man; 876 return man; 877 } 878 879 /* --- syntax tree handling ----------------------------------------------- */ 880 881 struct roff_node * 882 roff_node_alloc(struct roff_man *man, int line, int pos, 883 enum roff_type type, int tok) 884 { 885 struct roff_node *n; 886 887 n = mandoc_calloc(1, sizeof(*n)); 888 n->line = line; 889 n->pos = pos; 890 n->tok = tok; 891 n->type = type; 892 n->sec = man->lastsec; 893 894 if (man->flags & MDOC_SYNOPSIS) 895 n->flags |= NODE_SYNPRETTY; 896 else 897 n->flags &= ~NODE_SYNPRETTY; 898 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL) 899 n->flags |= NODE_NOFILL; 900 else 901 n->flags &= ~NODE_NOFILL; 902 if (man->flags & MDOC_NEWLINE) 903 n->flags |= NODE_LINE; 904 man->flags &= ~MDOC_NEWLINE; 905 906 return n; 907 } 908 909 void 910 roff_node_append(struct roff_man *man, struct roff_node *n) 911 { 912 913 switch (man->next) { 914 case ROFF_NEXT_SIBLING: 915 if (man->last->next != NULL) { 916 n->next = man->last->next; 917 man->last->next->prev = n; 918 } else 919 man->last->parent->last = n; 920 man->last->next = n; 921 n->prev = man->last; 922 n->parent = man->last->parent; 923 break; 924 case ROFF_NEXT_CHILD: 925 if (man->last->child != NULL) { 926 n->next = man->last->child; 927 man->last->child->prev = n; 928 } else 929 man->last->last = n; 930 man->last->child = n; 931 n->parent = man->last; 932 break; 933 default: 934 abort(); 935 } 936 man->last = n; 937 938 switch (n->type) { 939 case ROFFT_HEAD: 940 n->parent->head = n; 941 break; 942 case ROFFT_BODY: 943 if (n->end != ENDBODY_NOT) 944 return; 945 n->parent->body = n; 946 break; 947 case ROFFT_TAIL: 948 n->parent->tail = n; 949 break; 950 default: 951 return; 952 } 953 954 /* 955 * Copy over the normalised-data pointer of our parent. Not 956 * everybody has one, but copying a null pointer is fine. 957 */ 958 959 n->norm = n->parent->norm; 960 assert(n->parent->type == ROFFT_BLOCK); 961 } 962 963 void 964 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 965 { 966 struct roff_node *n; 967 968 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 969 n->string = roff_strdup(man->roff, word); 970 roff_node_append(man, n); 971 n->flags |= NODE_VALID | NODE_ENDED; 972 man->next = ROFF_NEXT_SIBLING; 973 } 974 975 void 976 roff_word_append(struct roff_man *man, const char *word) 977 { 978 struct roff_node *n; 979 char *addstr, *newstr; 980 981 n = man->last; 982 addstr = roff_strdup(man->roff, word); 983 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 984 free(addstr); 985 free(n->string); 986 n->string = newstr; 987 man->next = ROFF_NEXT_SIBLING; 988 } 989 990 void 991 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 992 { 993 struct roff_node *n; 994 995 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 996 roff_node_append(man, n); 997 man->next = ROFF_NEXT_CHILD; 998 } 999 1000 struct roff_node * 1001 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 1002 { 1003 struct roff_node *n; 1004 1005 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 1006 roff_node_append(man, n); 1007 man->next = ROFF_NEXT_CHILD; 1008 return n; 1009 } 1010 1011 struct roff_node * 1012 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 1013 { 1014 struct roff_node *n; 1015 1016 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 1017 roff_node_append(man, n); 1018 man->next = ROFF_NEXT_CHILD; 1019 return n; 1020 } 1021 1022 struct roff_node * 1023 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 1024 { 1025 struct roff_node *n; 1026 1027 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 1028 roff_node_append(man, n); 1029 man->next = ROFF_NEXT_CHILD; 1030 return n; 1031 } 1032 1033 static void 1034 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl) 1035 { 1036 struct roff_node *n; 1037 struct tbl_span *span; 1038 1039 if (man->meta.macroset == MACROSET_MAN) 1040 man_breakscope(man, ROFF_TS); 1041 while ((span = tbl_span(tbl)) != NULL) { 1042 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE); 1043 n->span = span; 1044 roff_node_append(man, n); 1045 n->flags |= NODE_VALID | NODE_ENDED; 1046 man->next = ROFF_NEXT_SIBLING; 1047 } 1048 } 1049 1050 void 1051 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1052 { 1053 1054 /* Adjust siblings. */ 1055 1056 if (n->prev) 1057 n->prev->next = n->next; 1058 if (n->next) 1059 n->next->prev = n->prev; 1060 1061 /* Adjust parent. */ 1062 1063 if (n->parent != NULL) { 1064 if (n->parent->child == n) 1065 n->parent->child = n->next; 1066 if (n->parent->last == n) 1067 n->parent->last = n->prev; 1068 } 1069 1070 /* Adjust parse point. */ 1071 1072 if (man == NULL) 1073 return; 1074 if (man->last == n) { 1075 if (n->prev == NULL) { 1076 man->last = n->parent; 1077 man->next = ROFF_NEXT_CHILD; 1078 } else { 1079 man->last = n->prev; 1080 man->next = ROFF_NEXT_SIBLING; 1081 } 1082 } 1083 if (man->meta.first == n) 1084 man->meta.first = NULL; 1085 } 1086 1087 void 1088 roff_node_relink(struct roff_man *man, struct roff_node *n) 1089 { 1090 roff_node_unlink(man, n); 1091 n->prev = n->next = NULL; 1092 roff_node_append(man, n); 1093 } 1094 1095 void 1096 roff_node_free(struct roff_node *n) 1097 { 1098 1099 if (n->args != NULL) 1100 mdoc_argv_free(n->args); 1101 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1102 free(n->norm); 1103 eqn_box_free(n->eqn); 1104 free(n->string); 1105 free(n->tag); 1106 free(n); 1107 } 1108 1109 void 1110 roff_node_delete(struct roff_man *man, struct roff_node *n) 1111 { 1112 1113 while (n->child != NULL) 1114 roff_node_delete(man, n->child); 1115 roff_node_unlink(man, n); 1116 roff_node_free(n); 1117 } 1118 1119 int 1120 roff_node_transparent(struct roff_node *n) 1121 { 1122 if (n == NULL) 1123 return 0; 1124 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) 1125 return 1; 1126 return roff_tok_transparent(n->tok); 1127 } 1128 1129 int 1130 roff_tok_transparent(enum roff_tok tok) 1131 { 1132 switch (tok) { 1133 case ROFF_ft: 1134 case ROFF_ll: 1135 case ROFF_mc: 1136 case ROFF_po: 1137 case ROFF_ta: 1138 case MDOC_Db: 1139 case MDOC_Es: 1140 case MDOC_Sm: 1141 case MDOC_Tg: 1142 case MAN_DT: 1143 case MAN_UC: 1144 case MAN_PD: 1145 case MAN_AT: 1146 return 1; 1147 default: 1148 return 0; 1149 } 1150 } 1151 1152 struct roff_node * 1153 roff_node_child(struct roff_node *n) 1154 { 1155 for (n = n->child; roff_node_transparent(n); n = n->next) 1156 continue; 1157 return n; 1158 } 1159 1160 struct roff_node * 1161 roff_node_prev(struct roff_node *n) 1162 { 1163 do { 1164 n = n->prev; 1165 } while (roff_node_transparent(n)); 1166 return n; 1167 } 1168 1169 struct roff_node * 1170 roff_node_next(struct roff_node *n) 1171 { 1172 do { 1173 n = n->next; 1174 } while (roff_node_transparent(n)); 1175 return n; 1176 } 1177 1178 void 1179 deroff(char **dest, const struct roff_node *n) 1180 { 1181 char *cp; 1182 size_t sz; 1183 1184 if (n->string == NULL) { 1185 for (n = n->child; n != NULL; n = n->next) 1186 deroff(dest, n); 1187 return; 1188 } 1189 1190 /* Skip leading whitespace. */ 1191 1192 for (cp = n->string; *cp != '\0'; cp++) { 1193 if (cp[0] == '\\' && cp[1] != '\0' && 1194 strchr(" %&0^|~", cp[1]) != NULL) 1195 cp++; 1196 else if ( ! isspace((unsigned char)*cp)) 1197 break; 1198 } 1199 1200 /* Skip trailing backslash. */ 1201 1202 sz = strlen(cp); 1203 if (sz > 0 && cp[sz - 1] == '\\') 1204 sz--; 1205 1206 /* Skip trailing whitespace. */ 1207 1208 for (; sz; sz--) 1209 if ( ! isspace((unsigned char)cp[sz-1])) 1210 break; 1211 1212 /* Skip empty strings. */ 1213 1214 if (sz == 0) 1215 return; 1216 1217 if (*dest == NULL) { 1218 *dest = mandoc_strndup(cp, sz); 1219 return; 1220 } 1221 1222 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1223 free(*dest); 1224 *dest = cp; 1225 } 1226 1227 /* --- main functions of the roff parser ---------------------------------- */ 1228 1229 /* 1230 * In the current line, expand escape sequences that produce parsable 1231 * input text. Also check the syntax of the remaining escape sequences, 1232 * which typically produce output glyphs or change formatter state. 1233 */ 1234 static int 1235 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc) 1236 { 1237 struct mctx *ctx; /* current macro call context */ 1238 char ubuf[24]; /* buffer to print the number */ 1239 struct roff_node *n; /* used for header comments */ 1240 const char *start; /* start of the string to process */ 1241 char *stesc; /* start of an escape sequence ('\\') */ 1242 const char *esct; /* type of esccape sequence */ 1243 char *ep; /* end of comment string */ 1244 const char *stnam; /* start of the name, after "[(*" */ 1245 const char *cp; /* end of the name, e.g. before ']' */ 1246 const char *res; /* the string to be substituted */ 1247 char *nbuf; /* new buffer to copy buf->buf to */ 1248 size_t maxl; /* expected length of the escape name */ 1249 size_t naml; /* actual length of the escape name */ 1250 size_t asz; /* length of the replacement */ 1251 size_t rsz; /* length of the rest of the string */ 1252 int inaml; /* length returned from mandoc_escape() */ 1253 int expand_count; /* to avoid infinite loops */ 1254 int npos; /* position in numeric expression */ 1255 int arg_complete; /* argument not interrupted by eol */ 1256 int quote_args; /* true for \\$@, false for \\$* */ 1257 int done; /* no more input available */ 1258 int deftype; /* type of definition to paste */ 1259 int rcsid; /* kind of RCS id seen */ 1260 enum mandocerr err; /* for escape sequence problems */ 1261 char sign; /* increment number register */ 1262 char term; /* character terminating the escape */ 1263 1264 /* Search forward for comments. */ 1265 1266 done = 0; 1267 start = buf->buf + pos; 1268 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { 1269 if (stesc[0] != newesc || stesc[1] == '\0') 1270 continue; 1271 stesc++; 1272 if (*stesc != '"' && *stesc != '#') 1273 continue; 1274 1275 /* Comment found, look for RCS id. */ 1276 1277 rcsid = 0; 1278 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) { 1279 rcsid = 1 << MANDOC_OS_OPENBSD; 1280 cp += 8; 1281 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) { 1282 rcsid = 1 << MANDOC_OS_NETBSD; 1283 cp += 7; 1284 } 1285 if (cp != NULL && 1286 isalnum((unsigned char)*cp) == 0 && 1287 strchr(cp, '$') != NULL) { 1288 if (r->man->meta.rcsids & rcsid) 1289 mandoc_msg(MANDOCERR_RCS_REP, ln, 1290 (int)(stesc - buf->buf) + 1, 1291 "%s", stesc + 1); 1292 r->man->meta.rcsids |= rcsid; 1293 } 1294 1295 /* Handle trailing whitespace. */ 1296 1297 ep = strchr(stesc--, '\0') - 1; 1298 if (*ep == '\n') { 1299 done = 1; 1300 ep--; 1301 } 1302 if (*ep == ' ' || *ep == '\t') 1303 mandoc_msg(MANDOCERR_SPACE_EOL, 1304 ln, (int)(ep - buf->buf), NULL); 1305 1306 /* 1307 * Save comments preceding the title macro 1308 * in the syntax tree. 1309 */ 1310 1311 if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) { 1312 while (*ep == ' ' || *ep == '\t') 1313 ep--; 1314 ep[1] = '\0'; 1315 n = roff_node_alloc(r->man, 1316 ln, stesc + 1 - buf->buf, 1317 ROFFT_COMMENT, TOKEN_NONE); 1318 n->string = mandoc_strdup(stesc + 2); 1319 roff_node_append(r->man, n); 1320 n->flags |= NODE_VALID | NODE_ENDED; 1321 r->man->next = ROFF_NEXT_SIBLING; 1322 } 1323 1324 /* Line continuation with comment. */ 1325 1326 if (stesc[1] == '#') { 1327 *stesc = '\0'; 1328 return ROFF_IGN | ROFF_APPEND; 1329 } 1330 1331 /* Discard normal comments. */ 1332 1333 while (stesc > start && stesc[-1] == ' ' && 1334 (stesc == start + 1 || stesc[-2] != '\\')) 1335 stesc--; 1336 *stesc = '\0'; 1337 break; 1338 } 1339 if (stesc == start) 1340 return ROFF_CONT; 1341 stesc--; 1342 1343 /* Notice the end of the input. */ 1344 1345 if (*stesc == '\n') { 1346 *stesc-- = '\0'; 1347 done = 1; 1348 } 1349 1350 expand_count = 0; 1351 while (stesc >= start) { 1352 if (*stesc != newesc) { 1353 1354 /* 1355 * If we have a non-standard escape character, 1356 * escape literal backslashes because all 1357 * processing in subsequent functions uses 1358 * the standard escaping rules. 1359 */ 1360 1361 if (newesc != ASCII_ESC && *stesc == '\\') { 1362 *stesc = '\0'; 1363 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", 1364 buf->buf, stesc + 1) + 1; 1365 start = nbuf + pos; 1366 stesc = nbuf + (stesc - buf->buf); 1367 free(buf->buf); 1368 buf->buf = nbuf; 1369 } 1370 1371 /* Search backwards for the next escape. */ 1372 1373 stesc--; 1374 continue; 1375 } 1376 1377 /* If it is escaped, skip it. */ 1378 1379 for (cp = stesc - 1; cp >= start; cp--) 1380 if (*cp != r->escape) 1381 break; 1382 1383 if ((stesc - cp) % 2 == 0) { 1384 while (stesc > cp) 1385 *stesc-- = '\\'; 1386 continue; 1387 } else if (stesc[1] != '\0') { 1388 *stesc = '\\'; 1389 } else { 1390 *stesc-- = '\0'; 1391 if (done) 1392 continue; 1393 else 1394 return ROFF_IGN | ROFF_APPEND; 1395 } 1396 1397 /* Decide whether to expand or to check only. */ 1398 1399 term = '\0'; 1400 cp = stesc + 1; 1401 if (*cp == 'E') 1402 cp++; 1403 esct = cp; 1404 switch (*esct) { 1405 case '*': 1406 case '$': 1407 res = NULL; 1408 break; 1409 case 'B': 1410 case 'w': 1411 term = cp[1]; 1412 /* FALLTHROUGH */ 1413 case 'n': 1414 sign = cp[1]; 1415 if (sign == '+' || sign == '-') 1416 cp++; 1417 res = ubuf; 1418 break; 1419 default: 1420 err = MANDOCERR_OK; 1421 switch(mandoc_escape(&cp, &stnam, &inaml)) { 1422 case ESCAPE_SPECIAL: 1423 if (mchars_spec2cp(stnam, inaml) >= 0) 1424 break; 1425 /* FALLTHROUGH */ 1426 case ESCAPE_ERROR: 1427 err = MANDOCERR_ESC_BAD; 1428 break; 1429 case ESCAPE_UNDEF: 1430 err = MANDOCERR_ESC_UNDEF; 1431 break; 1432 case ESCAPE_UNSUPP: 1433 err = MANDOCERR_ESC_UNSUPP; 1434 break; 1435 default: 1436 break; 1437 } 1438 if (err != MANDOCERR_OK) 1439 mandoc_msg(err, ln, (int)(stesc - buf->buf), 1440 "%.*s", (int)(cp - stesc), stesc); 1441 stesc--; 1442 continue; 1443 } 1444 1445 if (EXPAND_LIMIT < ++expand_count) { 1446 mandoc_msg(MANDOCERR_ROFFLOOP, 1447 ln, (int)(stesc - buf->buf), NULL); 1448 return ROFF_IGN; 1449 } 1450 1451 /* 1452 * The third character decides the length 1453 * of the name of the string or register. 1454 * Save a pointer to the name. 1455 */ 1456 1457 if (term == '\0') { 1458 switch (*++cp) { 1459 case '\0': 1460 maxl = 0; 1461 break; 1462 case '(': 1463 cp++; 1464 maxl = 2; 1465 break; 1466 case '[': 1467 cp++; 1468 term = ']'; 1469 maxl = 0; 1470 break; 1471 default: 1472 maxl = 1; 1473 break; 1474 } 1475 } else { 1476 cp += 2; 1477 maxl = 0; 1478 } 1479 stnam = cp; 1480 1481 /* Advance to the end of the name. */ 1482 1483 naml = 0; 1484 arg_complete = 1; 1485 while (maxl == 0 || naml < maxl) { 1486 if (*cp == '\0') { 1487 mandoc_msg(MANDOCERR_ESC_BAD, ln, 1488 (int)(stesc - buf->buf), "%s", stesc); 1489 arg_complete = 0; 1490 break; 1491 } 1492 if (maxl == 0 && *cp == term) { 1493 cp++; 1494 break; 1495 } 1496 if (*cp++ != '\\' || *esct != 'w') { 1497 naml++; 1498 continue; 1499 } 1500 switch (mandoc_escape(&cp, NULL, NULL)) { 1501 case ESCAPE_SPECIAL: 1502 case ESCAPE_UNICODE: 1503 case ESCAPE_NUMBERED: 1504 case ESCAPE_UNDEF: 1505 case ESCAPE_OVERSTRIKE: 1506 naml++; 1507 break; 1508 default: 1509 break; 1510 } 1511 } 1512 1513 /* 1514 * Retrieve the replacement string; if it is 1515 * undefined, resume searching for escapes. 1516 */ 1517 1518 switch (*esct) { 1519 case '*': 1520 if (arg_complete) { 1521 deftype = ROFFDEF_USER | ROFFDEF_PRE; 1522 res = roff_getstrn(r, stnam, naml, &deftype); 1523 1524 /* 1525 * If not overriden, let \*(.T 1526 * through to the formatters. 1527 */ 1528 1529 if (res == NULL && naml == 2 && 1530 stnam[0] == '.' && stnam[1] == 'T') { 1531 roff_setstrn(&r->strtab, 1532 ".T", 2, NULL, 0, 0); 1533 stesc--; 1534 continue; 1535 } 1536 } 1537 break; 1538 case '$': 1539 if (r->mstackpos < 0) { 1540 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, 1541 (int)(stesc - buf->buf), "%.3s", stesc); 1542 break; 1543 } 1544 ctx = r->mstack + r->mstackpos; 1545 npos = esct[1] - '1'; 1546 if (npos >= 0 && npos <= 8) { 1547 res = npos < ctx->argc ? 1548 ctx->argv[npos] : ""; 1549 break; 1550 } 1551 if (esct[1] == '*') 1552 quote_args = 0; 1553 else if (esct[1] == '@') 1554 quote_args = 1; 1555 else { 1556 mandoc_msg(MANDOCERR_ARG_NONUM, ln, 1557 (int)(stesc - buf->buf), "%.3s", stesc); 1558 break; 1559 } 1560 asz = 0; 1561 for (npos = 0; npos < ctx->argc; npos++) { 1562 if (npos) 1563 asz++; /* blank */ 1564 if (quote_args) 1565 asz += 2; /* quotes */ 1566 asz += strlen(ctx->argv[npos]); 1567 } 1568 if (asz != 3) { 1569 rsz = buf->sz - (stesc - buf->buf) - 3; 1570 if (asz < 3) 1571 memmove(stesc + asz, stesc + 3, rsz); 1572 buf->sz += asz - 3; 1573 nbuf = mandoc_realloc(buf->buf, buf->sz); 1574 start = nbuf + pos; 1575 stesc = nbuf + (stesc - buf->buf); 1576 buf->buf = nbuf; 1577 if (asz > 3) 1578 memmove(stesc + asz, stesc + 3, rsz); 1579 } 1580 for (npos = 0; npos < ctx->argc; npos++) { 1581 if (npos) 1582 *stesc++ = ' '; 1583 if (quote_args) 1584 *stesc++ = '"'; 1585 cp = ctx->argv[npos]; 1586 while (*cp != '\0') 1587 *stesc++ = *cp++; 1588 if (quote_args) 1589 *stesc++ = '"'; 1590 } 1591 continue; 1592 case 'B': 1593 npos = 0; 1594 ubuf[0] = arg_complete && 1595 roff_evalnum(r, ln, stnam, &npos, 1596 NULL, ROFFNUM_SCALE) && 1597 stnam + npos + 1 == cp ? '1' : '0'; 1598 ubuf[1] = '\0'; 1599 break; 1600 case 'n': 1601 if (arg_complete) 1602 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1603 roff_getregn(r, stnam, naml, sign)); 1604 else 1605 ubuf[0] = '\0'; 1606 break; 1607 case 'w': 1608 /* use even incomplete args */ 1609 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1610 24 * (int)naml); 1611 break; 1612 } 1613 1614 if (res == NULL) { 1615 if (*esct == '*') 1616 mandoc_msg(MANDOCERR_STR_UNDEF, 1617 ln, (int)(stesc - buf->buf), 1618 "%.*s", (int)naml, stnam); 1619 res = ""; 1620 } else if (buf->sz + strlen(res) > SHRT_MAX) { 1621 mandoc_msg(MANDOCERR_ROFFLOOP, 1622 ln, (int)(stesc - buf->buf), NULL); 1623 return ROFF_IGN; 1624 } 1625 1626 /* Replace the escape sequence by the string. */ 1627 1628 *stesc = '\0'; 1629 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", 1630 buf->buf, res, cp) + 1; 1631 1632 /* Prepare for the next replacement. */ 1633 1634 start = nbuf + pos; 1635 stesc = nbuf + (stesc - buf->buf) + strlen(res); 1636 free(buf->buf); 1637 buf->buf = nbuf; 1638 } 1639 return ROFF_CONT; 1640 } 1641 1642 /* 1643 * Parse a quoted or unquoted roff-style request or macro argument. 1644 * Return a pointer to the parsed argument, which is either the original 1645 * pointer or advanced by one byte in case the argument is quoted. 1646 * NUL-terminate the argument in place. 1647 * Collapse pairs of quotes inside quoted arguments. 1648 * Advance the argument pointer to the next argument, 1649 * or to the NUL byte terminating the argument line. 1650 */ 1651 char * 1652 roff_getarg(struct roff *r, char **cpp, int ln, int *pos) 1653 { 1654 struct buf buf; 1655 char *cp, *start; 1656 int newesc, pairs, quoted, white; 1657 1658 /* Quoting can only start with a new word. */ 1659 start = *cpp; 1660 quoted = 0; 1661 if ('"' == *start) { 1662 quoted = 1; 1663 start++; 1664 } 1665 1666 newesc = pairs = white = 0; 1667 for (cp = start; '\0' != *cp; cp++) { 1668 1669 /* 1670 * Move the following text left 1671 * after quoted quotes and after "\\" and "\t". 1672 */ 1673 if (pairs) 1674 cp[-pairs] = cp[0]; 1675 1676 if ('\\' == cp[0]) { 1677 /* 1678 * In copy mode, translate double to single 1679 * backslashes and backslash-t to literal tabs. 1680 */ 1681 switch (cp[1]) { 1682 case 'a': 1683 case 't': 1684 cp[-pairs] = '\t'; 1685 pairs++; 1686 cp++; 1687 break; 1688 case '\\': 1689 newesc = 1; 1690 cp[-pairs] = ASCII_ESC; 1691 pairs++; 1692 cp++; 1693 break; 1694 case ' ': 1695 /* Skip escaped blanks. */ 1696 if (0 == quoted) 1697 cp++; 1698 break; 1699 default: 1700 break; 1701 } 1702 } else if (0 == quoted) { 1703 if (' ' == cp[0]) { 1704 /* Unescaped blanks end unquoted args. */ 1705 white = 1; 1706 break; 1707 } 1708 } else if ('"' == cp[0]) { 1709 if ('"' == cp[1]) { 1710 /* Quoted quotes collapse. */ 1711 pairs++; 1712 cp++; 1713 } else { 1714 /* Unquoted quotes end quoted args. */ 1715 quoted = 2; 1716 break; 1717 } 1718 } 1719 } 1720 1721 /* Quoted argument without a closing quote. */ 1722 if (1 == quoted) 1723 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL); 1724 1725 /* NUL-terminate this argument and move to the next one. */ 1726 if (pairs) 1727 cp[-pairs] = '\0'; 1728 if ('\0' != *cp) { 1729 *cp++ = '\0'; 1730 while (' ' == *cp) 1731 cp++; 1732 } 1733 *pos += (int)(cp - start) + (quoted ? 1 : 0); 1734 *cpp = cp; 1735 1736 if ('\0' == *cp && (white || ' ' == cp[-1])) 1737 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); 1738 1739 start = mandoc_strdup(start); 1740 if (newesc == 0) 1741 return start; 1742 1743 buf.buf = start; 1744 buf.sz = strlen(start) + 1; 1745 buf.next = NULL; 1746 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { 1747 free(buf.buf); 1748 buf.buf = mandoc_strdup(""); 1749 } 1750 return buf.buf; 1751 } 1752 1753 1754 /* 1755 * Process text streams. 1756 */ 1757 static int 1758 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) 1759 { 1760 size_t sz; 1761 const char *start; 1762 char *p; 1763 int isz; 1764 enum mandoc_esc esc; 1765 1766 /* Spring the input line trap. */ 1767 1768 if (roffit_lines == 1) { 1769 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1770 free(buf->buf); 1771 buf->buf = p; 1772 buf->sz = isz + 1; 1773 *offs = 0; 1774 free(roffit_macro); 1775 roffit_lines = 0; 1776 return ROFF_REPARSE; 1777 } else if (roffit_lines > 1) 1778 --roffit_lines; 1779 1780 if (roffce_node != NULL && buf->buf[pos] != '\0') { 1781 if (roffce_lines < 1) { 1782 r->man->last = roffce_node; 1783 r->man->next = ROFF_NEXT_SIBLING; 1784 roffce_lines = 0; 1785 roffce_node = NULL; 1786 } else 1787 roffce_lines--; 1788 } 1789 1790 /* Convert all breakable hyphens into ASCII_HYPH. */ 1791 1792 start = p = buf->buf + pos; 1793 1794 while (*p != '\0') { 1795 sz = strcspn(p, "-\\"); 1796 p += sz; 1797 1798 if (*p == '\0') 1799 break; 1800 1801 if (*p == '\\') { 1802 /* Skip over escapes. */ 1803 p++; 1804 esc = mandoc_escape((const char **)&p, NULL, NULL); 1805 if (esc == ESCAPE_ERROR) 1806 break; 1807 while (*p == '-') 1808 p++; 1809 continue; 1810 } else if (p == start) { 1811 p++; 1812 continue; 1813 } 1814 1815 if (isalpha((unsigned char)p[-1]) && 1816 isalpha((unsigned char)p[1])) 1817 *p = ASCII_HYPH; 1818 p++; 1819 } 1820 return ROFF_CONT; 1821 } 1822 1823 int 1824 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) 1825 { 1826 enum roff_tok t; 1827 int e; 1828 int pos; /* parse point */ 1829 int spos; /* saved parse point for messages */ 1830 int ppos; /* original offset in buf->buf */ 1831 int ctl; /* macro line (boolean) */ 1832 1833 ppos = pos = *offs; 1834 1835 /* Handle in-line equation delimiters. */ 1836 1837 if (r->tbl == NULL && 1838 r->last_eqn != NULL && r->last_eqn->delim && 1839 (r->eqn == NULL || r->eqn_inline)) { 1840 e = roff_eqndelim(r, buf, pos); 1841 if (e == ROFF_REPARSE) 1842 return e; 1843 assert(e == ROFF_CONT); 1844 } 1845 1846 /* Expand some escape sequences. */ 1847 1848 e = roff_expand(r, buf, ln, pos, r->escape); 1849 if ((e & ROFF_MASK) == ROFF_IGN) 1850 return e; 1851 assert(e == ROFF_CONT); 1852 1853 ctl = roff_getcontrol(r, buf->buf, &pos); 1854 1855 /* 1856 * First, if a scope is open and we're not a macro, pass the 1857 * text through the macro's filter. 1858 * Equations process all content themselves. 1859 * Tables process almost all content themselves, but we want 1860 * to warn about macros before passing it there. 1861 */ 1862 1863 if (r->last != NULL && ! ctl) { 1864 t = r->last->tok; 1865 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1866 if ((e & ROFF_MASK) == ROFF_IGN) 1867 return e; 1868 e &= ~ROFF_MASK; 1869 } else 1870 e = ROFF_IGN; 1871 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { 1872 eqn_read(r->eqn, buf->buf + ppos); 1873 return e; 1874 } 1875 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { 1876 tbl_read(r->tbl, ln, buf->buf, ppos); 1877 roff_addtbl(r->man, ln, r->tbl); 1878 return e; 1879 } 1880 if ( ! ctl) { 1881 r->options &= ~MPARSE_COMMENT; 1882 return roff_parsetext(r, buf, pos, offs) | e; 1883 } 1884 1885 /* Skip empty request lines. */ 1886 1887 if (buf->buf[pos] == '"') { 1888 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL); 1889 return ROFF_IGN; 1890 } else if (buf->buf[pos] == '\0') 1891 return ROFF_IGN; 1892 1893 /* 1894 * If a scope is open, go to the child handler for that macro, 1895 * as it may want to preprocess before doing anything with it. 1896 * Don't do so if an equation is open. 1897 */ 1898 1899 if (r->last) { 1900 t = r->last->tok; 1901 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1902 } 1903 1904 /* No scope is open. This is a new request or macro. */ 1905 1906 r->options &= ~MPARSE_COMMENT; 1907 spos = pos; 1908 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1909 1910 /* Tables ignore most macros. */ 1911 1912 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS || 1913 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) { 1914 mandoc_msg(MANDOCERR_TBLMACRO, 1915 ln, pos, "%s", buf->buf + spos); 1916 if (t != TOKEN_NONE) 1917 return ROFF_IGN; 1918 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1919 pos++; 1920 while (buf->buf[pos] == ' ') 1921 pos++; 1922 tbl_read(r->tbl, ln, buf->buf, pos); 1923 roff_addtbl(r->man, ln, r->tbl); 1924 return ROFF_IGN; 1925 } 1926 1927 /* For now, let high level macros abort .ce mode. */ 1928 1929 if (ctl && roffce_node != NULL && 1930 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 1931 t == ROFF_TH || t == ROFF_TS)) { 1932 r->man->last = roffce_node; 1933 r->man->next = ROFF_NEXT_SIBLING; 1934 roffce_lines = 0; 1935 roffce_node = NULL; 1936 } 1937 1938 /* 1939 * This is neither a roff request nor a user-defined macro. 1940 * Let the standard macro set parsers handle it. 1941 */ 1942 1943 if (t == TOKEN_NONE) 1944 return ROFF_CONT; 1945 1946 /* Execute a roff request or a user defined macro. */ 1947 1948 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); 1949 } 1950 1951 /* 1952 * Internal interface function to tell the roff parser that execution 1953 * of the current macro ended. This is required because macro 1954 * definitions usually do not end with a .return request. 1955 */ 1956 void 1957 roff_userret(struct roff *r) 1958 { 1959 struct mctx *ctx; 1960 int i; 1961 1962 assert(r->mstackpos >= 0); 1963 ctx = r->mstack + r->mstackpos; 1964 for (i = 0; i < ctx->argc; i++) 1965 free(ctx->argv[i]); 1966 ctx->argc = 0; 1967 r->mstackpos--; 1968 } 1969 1970 void 1971 roff_endparse(struct roff *r) 1972 { 1973 if (r->last != NULL) 1974 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line, 1975 r->last->col, "%s", roff_name[r->last->tok]); 1976 1977 if (r->eqn != NULL) { 1978 mandoc_msg(MANDOCERR_BLK_NOEND, 1979 r->eqn->node->line, r->eqn->node->pos, "EQ"); 1980 eqn_parse(r->eqn); 1981 r->eqn = NULL; 1982 } 1983 1984 if (r->tbl != NULL) { 1985 tbl_end(r->tbl, 1); 1986 r->tbl = NULL; 1987 } 1988 } 1989 1990 /* 1991 * Parse a roff node's type from the input buffer. This must be in the 1992 * form of ".foo xxx" in the usual way. 1993 */ 1994 static enum roff_tok 1995 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 1996 { 1997 char *cp; 1998 const char *mac; 1999 size_t maclen; 2000 int deftype; 2001 enum roff_tok t; 2002 2003 cp = buf + *pos; 2004 2005 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 2006 return TOKEN_NONE; 2007 2008 mac = cp; 2009 maclen = roff_getname(r, &cp, ln, ppos); 2010 2011 deftype = ROFFDEF_USER | ROFFDEF_REN; 2012 r->current_string = roff_getstrn(r, mac, maclen, &deftype); 2013 switch (deftype) { 2014 case ROFFDEF_USER: 2015 t = ROFF_USERDEF; 2016 break; 2017 case ROFFDEF_REN: 2018 t = ROFF_RENAMED; 2019 break; 2020 default: 2021 t = roffhash_find(r->reqtab, mac, maclen); 2022 break; 2023 } 2024 if (t != TOKEN_NONE) 2025 *pos = cp - buf; 2026 else if (deftype == ROFFDEF_UNDEF) { 2027 /* Using an undefined macro defines it to be empty. */ 2028 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); 2029 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); 2030 } 2031 return t; 2032 } 2033 2034 /* --- handling of request blocks ----------------------------------------- */ 2035 2036 /* 2037 * Close a macro definition block or an "ignore" block. 2038 */ 2039 static int 2040 roff_cblock(ROFF_ARGS) 2041 { 2042 int rr; 2043 2044 if (r->last == NULL) { 2045 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2046 return ROFF_IGN; 2047 } 2048 2049 switch (r->last->tok) { 2050 case ROFF_am: 2051 case ROFF_ami: 2052 case ROFF_de: 2053 case ROFF_dei: 2054 case ROFF_ig: 2055 break; 2056 case ROFF_am1: 2057 case ROFF_de1: 2058 /* Remapped in roff_block(). */ 2059 abort(); 2060 default: 2061 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2062 return ROFF_IGN; 2063 } 2064 2065 roffnode_pop(r); 2066 roffnode_cleanscope(r); 2067 2068 /* 2069 * If a conditional block with braces is still open, 2070 * check for "\}" block end markers. 2071 */ 2072 2073 if (r->last != NULL && r->last->endspan < 0) { 2074 rr = 1; /* If arguments follow "\}", warn about them. */ 2075 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2076 } 2077 2078 if (buf->buf[pos] != '\0') 2079 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 2080 ".. %s", buf->buf + pos); 2081 2082 return ROFF_IGN; 2083 } 2084 2085 /* 2086 * Pop all nodes ending at the end of the current input line. 2087 * Return the number of loops ended. 2088 */ 2089 static int 2090 roffnode_cleanscope(struct roff *r) 2091 { 2092 int inloop; 2093 2094 inloop = 0; 2095 while (r->last != NULL && r->last->endspan > 0) { 2096 if (--r->last->endspan != 0) 2097 break; 2098 inloop += roffnode_pop(r); 2099 } 2100 return inloop; 2101 } 2102 2103 /* 2104 * Handle the closing "\}" of a conditional block. 2105 * Apart from generating warnings, this only pops nodes. 2106 * Return the number of loops ended. 2107 */ 2108 static int 2109 roff_ccond(struct roff *r, int ln, int ppos) 2110 { 2111 if (NULL == r->last) { 2112 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2113 return 0; 2114 } 2115 2116 switch (r->last->tok) { 2117 case ROFF_el: 2118 case ROFF_ie: 2119 case ROFF_if: 2120 case ROFF_while: 2121 break; 2122 default: 2123 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2124 return 0; 2125 } 2126 2127 if (r->last->endspan > -1) { 2128 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2129 return 0; 2130 } 2131 2132 return roffnode_pop(r) + roffnode_cleanscope(r); 2133 } 2134 2135 static int 2136 roff_block(ROFF_ARGS) 2137 { 2138 const char *name, *value; 2139 char *call, *cp, *iname, *rname; 2140 size_t csz, namesz, rsz; 2141 int deftype; 2142 2143 /* Ignore groff compatibility mode for now. */ 2144 2145 if (tok == ROFF_de1) 2146 tok = ROFF_de; 2147 else if (tok == ROFF_dei1) 2148 tok = ROFF_dei; 2149 else if (tok == ROFF_am1) 2150 tok = ROFF_am; 2151 else if (tok == ROFF_ami1) 2152 tok = ROFF_ami; 2153 2154 /* Parse the macro name argument. */ 2155 2156 cp = buf->buf + pos; 2157 if (tok == ROFF_ig) { 2158 iname = NULL; 2159 namesz = 0; 2160 } else { 2161 iname = cp; 2162 namesz = roff_getname(r, &cp, ln, ppos); 2163 iname[namesz] = '\0'; 2164 } 2165 2166 /* Resolve the macro name argument if it is indirect. */ 2167 2168 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2169 deftype = ROFFDEF_USER; 2170 name = roff_getstrn(r, iname, namesz, &deftype); 2171 if (name == NULL) { 2172 mandoc_msg(MANDOCERR_STR_UNDEF, 2173 ln, (int)(iname - buf->buf), 2174 "%.*s", (int)namesz, iname); 2175 namesz = 0; 2176 } else 2177 namesz = strlen(name); 2178 } else 2179 name = iname; 2180 2181 if (namesz == 0 && tok != ROFF_ig) { 2182 mandoc_msg(MANDOCERR_REQ_EMPTY, 2183 ln, ppos, "%s", roff_name[tok]); 2184 return ROFF_IGN; 2185 } 2186 2187 roffnode_push(r, tok, name, ln, ppos); 2188 2189 /* 2190 * At the beginning of a `de' macro, clear the existing string 2191 * with the same name, if there is one. New content will be 2192 * appended from roff_block_text() in multiline mode. 2193 */ 2194 2195 if (tok == ROFF_de || tok == ROFF_dei) { 2196 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 2197 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2198 } else if (tok == ROFF_am || tok == ROFF_ami) { 2199 deftype = ROFFDEF_ANY; 2200 value = roff_getstrn(r, iname, namesz, &deftype); 2201 switch (deftype) { /* Before appending, ... */ 2202 case ROFFDEF_PRE: /* copy predefined to user-defined. */ 2203 roff_setstrn(&r->strtab, name, namesz, 2204 value, strlen(value), 0); 2205 break; 2206 case ROFFDEF_REN: /* call original standard macro. */ 2207 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2208 (int)strlen(value), value); 2209 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2210 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2211 free(call); 2212 break; 2213 case ROFFDEF_STD: /* rename and call standard macro. */ 2214 rsz = mandoc_asprintf(&rname, "__%s_renamed", name); 2215 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); 2216 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2217 (int)rsz, rname); 2218 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2219 free(call); 2220 free(rname); 2221 break; 2222 default: 2223 break; 2224 } 2225 } 2226 2227 if (*cp == '\0') 2228 return ROFF_IGN; 2229 2230 /* Get the custom end marker. */ 2231 2232 iname = cp; 2233 namesz = roff_getname(r, &cp, ln, ppos); 2234 2235 /* Resolve the end marker if it is indirect. */ 2236 2237 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2238 deftype = ROFFDEF_USER; 2239 name = roff_getstrn(r, iname, namesz, &deftype); 2240 if (name == NULL) { 2241 mandoc_msg(MANDOCERR_STR_UNDEF, 2242 ln, (int)(iname - buf->buf), 2243 "%.*s", (int)namesz, iname); 2244 namesz = 0; 2245 } else 2246 namesz = strlen(name); 2247 } else 2248 name = iname; 2249 2250 if (namesz) 2251 r->last->end = mandoc_strndup(name, namesz); 2252 2253 if (*cp != '\0') 2254 mandoc_msg(MANDOCERR_ARG_EXCESS, 2255 ln, pos, ".%s ... %s", roff_name[tok], cp); 2256 2257 return ROFF_IGN; 2258 } 2259 2260 static int 2261 roff_block_sub(ROFF_ARGS) 2262 { 2263 enum roff_tok t; 2264 int i, j; 2265 2266 /* 2267 * First check whether a custom macro exists at this level. If 2268 * it does, then check against it. This is some of groff's 2269 * stranger behaviours. If we encountered a custom end-scope 2270 * tag and that tag also happens to be a "real" macro, then we 2271 * need to try interpreting it again as a real macro. If it's 2272 * not, then return ignore. Else continue. 2273 */ 2274 2275 if (r->last->end) { 2276 for (i = pos, j = 0; r->last->end[j]; j++, i++) 2277 if (buf->buf[i] != r->last->end[j]) 2278 break; 2279 2280 if (r->last->end[j] == '\0' && 2281 (buf->buf[i] == '\0' || 2282 buf->buf[i] == ' ' || 2283 buf->buf[i] == '\t')) { 2284 roffnode_pop(r); 2285 roffnode_cleanscope(r); 2286 2287 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 2288 i++; 2289 2290 pos = i; 2291 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 2292 TOKEN_NONE) 2293 return ROFF_RERUN; 2294 return ROFF_IGN; 2295 } 2296 } 2297 2298 /* 2299 * If we have no custom end-query or lookup failed, then try 2300 * pulling it out of the hashtable. 2301 */ 2302 2303 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2304 2305 if (t != ROFF_cblock) { 2306 if (tok != ROFF_ig) 2307 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 2308 return ROFF_IGN; 2309 } 2310 2311 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2312 } 2313 2314 static int 2315 roff_block_text(ROFF_ARGS) 2316 { 2317 2318 if (tok != ROFF_ig) 2319 roff_setstr(r, r->last->name, buf->buf + pos, 2); 2320 2321 return ROFF_IGN; 2322 } 2323 2324 /* 2325 * Check for a closing "\}" and handle it. 2326 * In this function, the final "int *offs" argument is used for 2327 * different purposes than elsewhere: 2328 * Input: *offs == 0: caller wants to discard arguments following \} 2329 * *offs == 1: caller wants to preserve text following \} 2330 * Output: *offs = 0: tell caller to discard input line 2331 * *offs = 1: tell caller to use input line 2332 */ 2333 static int 2334 roff_cond_checkend(ROFF_ARGS) 2335 { 2336 char *ep; 2337 int endloop, irc, rr; 2338 2339 irc = ROFF_IGN; 2340 rr = r->last->rule; 2341 endloop = tok != ROFF_while ? ROFF_IGN : 2342 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2343 if (roffnode_cleanscope(r)) 2344 irc |= endloop; 2345 2346 /* 2347 * If "\}" occurs on a macro line without a preceding macro or 2348 * a text line contains nothing else, drop the line completely. 2349 */ 2350 2351 ep = buf->buf + pos; 2352 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0)) 2353 rr = 0; 2354 2355 /* 2356 * The closing delimiter "\}" rewinds the conditional scope 2357 * but is otherwise ignored when interpreting the line. 2358 */ 2359 2360 while ((ep = strchr(ep, '\\')) != NULL) { 2361 switch (ep[1]) { 2362 case '}': 2363 if (ep[2] == '\0') 2364 ep[0] = '\0'; 2365 else if (rr) 2366 ep[1] = '&'; 2367 else 2368 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2369 if (roff_ccond(r, ln, ep - buf->buf)) 2370 irc |= endloop; 2371 break; 2372 case '\0': 2373 ++ep; 2374 break; 2375 default: 2376 ep += 2; 2377 break; 2378 } 2379 } 2380 *offs = rr; 2381 return irc; 2382 } 2383 2384 /* 2385 * Parse and process a request or macro line in conditional scope. 2386 */ 2387 static int 2388 roff_cond_sub(ROFF_ARGS) 2389 { 2390 struct roffnode *bl; 2391 int irc, rr; 2392 enum roff_tok t; 2393 2394 rr = 0; /* If arguments follow "\}", skip them. */ 2395 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2396 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2397 2398 /* For now, let high level macros abort .ce mode. */ 2399 2400 if (roffce_node != NULL && 2401 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 2402 t == ROFF_TH || t == ROFF_TS)) { 2403 r->man->last = roffce_node; 2404 r->man->next = ROFF_NEXT_SIBLING; 2405 roffce_lines = 0; 2406 roffce_node = NULL; 2407 } 2408 2409 /* 2410 * Fully handle known macros when they are structurally 2411 * required or when the conditional evaluated to true. 2412 */ 2413 2414 if (t == ROFF_break) { 2415 if (irc & ROFF_LOOPMASK) 2416 irc = ROFF_IGN | ROFF_LOOPEXIT; 2417 else if (rr) { 2418 for (bl = r->last; bl != NULL; bl = bl->parent) { 2419 bl->rule = 0; 2420 if (bl->tok == ROFF_while) 2421 break; 2422 } 2423 } 2424 } else if (t != TOKEN_NONE && 2425 (rr || roffs[t].flags & ROFFMAC_STRUCT)) 2426 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2427 else 2428 irc |= rr ? ROFF_CONT : ROFF_IGN; 2429 return irc; 2430 } 2431 2432 /* 2433 * Parse and process a text line in conditional scope. 2434 */ 2435 static int 2436 roff_cond_text(ROFF_ARGS) 2437 { 2438 int irc, rr; 2439 2440 rr = 1; /* If arguments follow "\}", preserve them. */ 2441 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2442 if (rr) 2443 irc |= ROFF_CONT; 2444 return irc; 2445 } 2446 2447 /* --- handling of numeric and conditional expressions -------------------- */ 2448 2449 /* 2450 * Parse a single signed integer number. Stop at the first non-digit. 2451 * If there is at least one digit, return success and advance the 2452 * parse point, else return failure and let the parse point unchanged. 2453 * Ignore overflows, treat them just like the C language. 2454 */ 2455 static int 2456 roff_getnum(const char *v, int *pos, int *res, int flags) 2457 { 2458 int myres, scaled, n, p; 2459 2460 if (NULL == res) 2461 res = &myres; 2462 2463 p = *pos; 2464 n = v[p] == '-'; 2465 if (n || v[p] == '+') 2466 p++; 2467 2468 if (flags & ROFFNUM_WHITE) 2469 while (isspace((unsigned char)v[p])) 2470 p++; 2471 2472 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2473 *res = 10 * *res + v[p] - '0'; 2474 if (p == *pos + n) 2475 return 0; 2476 2477 if (n) 2478 *res = -*res; 2479 2480 /* Each number may be followed by one optional scaling unit. */ 2481 2482 switch (v[p]) { 2483 case 'f': 2484 scaled = *res * 65536; 2485 break; 2486 case 'i': 2487 scaled = *res * 240; 2488 break; 2489 case 'c': 2490 scaled = *res * 240 / 2.54; 2491 break; 2492 case 'v': 2493 case 'P': 2494 scaled = *res * 40; 2495 break; 2496 case 'm': 2497 case 'n': 2498 scaled = *res * 24; 2499 break; 2500 case 'p': 2501 scaled = *res * 10 / 3; 2502 break; 2503 case 'u': 2504 scaled = *res; 2505 break; 2506 case 'M': 2507 scaled = *res * 6 / 25; 2508 break; 2509 default: 2510 scaled = *res; 2511 p--; 2512 break; 2513 } 2514 if (flags & ROFFNUM_SCALE) 2515 *res = scaled; 2516 2517 *pos = p + 1; 2518 return 1; 2519 } 2520 2521 /* 2522 * Evaluate a string comparison condition. 2523 * The first character is the delimiter. 2524 * Succeed if the string up to its second occurrence 2525 * matches the string up to its third occurence. 2526 * Advance the cursor after the third occurrence 2527 * or lacking that, to the end of the line. 2528 */ 2529 static int 2530 roff_evalstrcond(const char *v, int *pos) 2531 { 2532 const char *s1, *s2, *s3; 2533 int match; 2534 2535 match = 0; 2536 s1 = v + *pos; /* initial delimiter */ 2537 s2 = s1 + 1; /* for scanning the first string */ 2538 s3 = strchr(s2, *s1); /* for scanning the second string */ 2539 2540 if (NULL == s3) /* found no middle delimiter */ 2541 goto out; 2542 2543 while ('\0' != *++s3) { 2544 if (*s2 != *s3) { /* mismatch */ 2545 s3 = strchr(s3, *s1); 2546 break; 2547 } 2548 if (*s3 == *s1) { /* found the final delimiter */ 2549 match = 1; 2550 break; 2551 } 2552 s2++; 2553 } 2554 2555 out: 2556 if (NULL == s3) 2557 s3 = strchr(s2, '\0'); 2558 else if (*s3 != '\0') 2559 s3++; 2560 *pos = s3 - v; 2561 return match; 2562 } 2563 2564 /* 2565 * Evaluate an optionally negated single character, numerical, 2566 * or string condition. 2567 */ 2568 static int 2569 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2570 { 2571 const char *start, *end; 2572 char *cp, *name; 2573 size_t sz; 2574 int deftype, len, number, savepos, istrue, wanttrue; 2575 2576 if ('!' == v[*pos]) { 2577 wanttrue = 0; 2578 (*pos)++; 2579 } else 2580 wanttrue = 1; 2581 2582 switch (v[*pos]) { 2583 case '\0': 2584 return 0; 2585 case 'n': 2586 case 'o': 2587 (*pos)++; 2588 return wanttrue; 2589 case 'e': 2590 case 't': 2591 case 'v': 2592 (*pos)++; 2593 return !wanttrue; 2594 case 'c': 2595 do { 2596 (*pos)++; 2597 } while (v[*pos] == ' '); 2598 2599 /* 2600 * Quirk for groff compatibility: 2601 * The horizontal tab is neither available nor unavailable. 2602 */ 2603 2604 if (v[*pos] == '\t') { 2605 (*pos)++; 2606 return 0; 2607 } 2608 2609 /* Printable ASCII characters are available. */ 2610 2611 if (v[*pos] != '\\') { 2612 (*pos)++; 2613 return wanttrue; 2614 } 2615 2616 end = v + ++*pos; 2617 switch (mandoc_escape(&end, &start, &len)) { 2618 case ESCAPE_SPECIAL: 2619 istrue = mchars_spec2cp(start, len) != -1; 2620 break; 2621 case ESCAPE_UNICODE: 2622 istrue = 1; 2623 break; 2624 case ESCAPE_NUMBERED: 2625 istrue = mchars_num2char(start, len) != -1; 2626 break; 2627 default: 2628 istrue = !wanttrue; 2629 break; 2630 } 2631 *pos = end - v; 2632 return istrue == wanttrue; 2633 case 'd': 2634 case 'r': 2635 cp = v + *pos + 1; 2636 while (*cp == ' ') 2637 cp++; 2638 name = cp; 2639 sz = roff_getname(r, &cp, ln, cp - v); 2640 if (sz == 0) 2641 istrue = 0; 2642 else if (v[*pos] == 'r') 2643 istrue = roff_hasregn(r, name, sz); 2644 else { 2645 deftype = ROFFDEF_ANY; 2646 roff_getstrn(r, name, sz, &deftype); 2647 istrue = !!deftype; 2648 } 2649 *pos = (name + sz) - v; 2650 return istrue == wanttrue; 2651 default: 2652 break; 2653 } 2654 2655 savepos = *pos; 2656 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2657 return (number > 0) == wanttrue; 2658 else if (*pos == savepos) 2659 return roff_evalstrcond(v, pos) == wanttrue; 2660 else 2661 return 0; 2662 } 2663 2664 static int 2665 roff_line_ignore(ROFF_ARGS) 2666 { 2667 2668 return ROFF_IGN; 2669 } 2670 2671 static int 2672 roff_insec(ROFF_ARGS) 2673 { 2674 2675 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]); 2676 return ROFF_IGN; 2677 } 2678 2679 static int 2680 roff_unsupp(ROFF_ARGS) 2681 { 2682 2683 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]); 2684 return ROFF_IGN; 2685 } 2686 2687 static int 2688 roff_cond(ROFF_ARGS) 2689 { 2690 int irc; 2691 2692 roffnode_push(r, tok, NULL, ln, ppos); 2693 2694 /* 2695 * An `.el' has no conditional body: it will consume the value 2696 * of the current rstack entry set in prior `ie' calls or 2697 * defaults to DENY. 2698 * 2699 * If we're not an `el', however, then evaluate the conditional. 2700 */ 2701 2702 r->last->rule = tok == ROFF_el ? 2703 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2704 roff_evalcond(r, ln, buf->buf, &pos); 2705 2706 /* 2707 * An if-else will put the NEGATION of the current evaluated 2708 * conditional into the stack of rules. 2709 */ 2710 2711 if (tok == ROFF_ie) { 2712 if (r->rstackpos + 1 == r->rstacksz) { 2713 r->rstacksz += 16; 2714 r->rstack = mandoc_reallocarray(r->rstack, 2715 r->rstacksz, sizeof(int)); 2716 } 2717 r->rstack[++r->rstackpos] = !r->last->rule; 2718 } 2719 2720 /* If the parent has false as its rule, then so do we. */ 2721 2722 if (r->last->parent && !r->last->parent->rule) 2723 r->last->rule = 0; 2724 2725 /* 2726 * Determine scope. 2727 * If there is nothing on the line after the conditional, 2728 * not even whitespace, use next-line scope. 2729 * Except that .while does not support next-line scope. 2730 */ 2731 2732 if (buf->buf[pos] == '\0' && tok != ROFF_while) { 2733 r->last->endspan = 2; 2734 goto out; 2735 } 2736 2737 while (buf->buf[pos] == ' ') 2738 pos++; 2739 2740 /* An opening brace requests multiline scope. */ 2741 2742 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2743 r->last->endspan = -1; 2744 pos += 2; 2745 while (buf->buf[pos] == ' ') 2746 pos++; 2747 goto out; 2748 } 2749 2750 /* 2751 * Anything else following the conditional causes 2752 * single-line scope. Warn if the scope contains 2753 * nothing but trailing whitespace. 2754 */ 2755 2756 if (buf->buf[pos] == '\0') 2757 mandoc_msg(MANDOCERR_COND_EMPTY, 2758 ln, ppos, "%s", roff_name[tok]); 2759 2760 r->last->endspan = 1; 2761 2762 out: 2763 *offs = pos; 2764 irc = ROFF_RERUN; 2765 if (tok == ROFF_while) 2766 irc |= ROFF_WHILE; 2767 return irc; 2768 } 2769 2770 static int 2771 roff_ds(ROFF_ARGS) 2772 { 2773 char *string; 2774 const char *name; 2775 size_t namesz; 2776 2777 /* Ignore groff compatibility mode for now. */ 2778 2779 if (tok == ROFF_ds1) 2780 tok = ROFF_ds; 2781 else if (tok == ROFF_as1) 2782 tok = ROFF_as; 2783 2784 /* 2785 * The first word is the name of the string. 2786 * If it is empty or terminated by an escape sequence, 2787 * abort the `ds' request without defining anything. 2788 */ 2789 2790 name = string = buf->buf + pos; 2791 if (*name == '\0') 2792 return ROFF_IGN; 2793 2794 namesz = roff_getname(r, &string, ln, pos); 2795 switch (name[namesz]) { 2796 case '\\': 2797 return ROFF_IGN; 2798 case '\t': 2799 string = buf->buf + pos + namesz; 2800 break; 2801 default: 2802 break; 2803 } 2804 2805 /* Read past the initial double-quote, if any. */ 2806 if (*string == '"') 2807 string++; 2808 2809 /* The rest is the value. */ 2810 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2811 ROFF_as == tok); 2812 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2813 return ROFF_IGN; 2814 } 2815 2816 /* 2817 * Parse a single operator, one or two characters long. 2818 * If the operator is recognized, return success and advance the 2819 * parse point, else return failure and let the parse point unchanged. 2820 */ 2821 static int 2822 roff_getop(const char *v, int *pos, char *res) 2823 { 2824 2825 *res = v[*pos]; 2826 2827 switch (*res) { 2828 case '+': 2829 case '-': 2830 case '*': 2831 case '/': 2832 case '%': 2833 case '&': 2834 case ':': 2835 break; 2836 case '<': 2837 switch (v[*pos + 1]) { 2838 case '=': 2839 *res = 'l'; 2840 (*pos)++; 2841 break; 2842 case '>': 2843 *res = '!'; 2844 (*pos)++; 2845 break; 2846 case '?': 2847 *res = 'i'; 2848 (*pos)++; 2849 break; 2850 default: 2851 break; 2852 } 2853 break; 2854 case '>': 2855 switch (v[*pos + 1]) { 2856 case '=': 2857 *res = 'g'; 2858 (*pos)++; 2859 break; 2860 case '?': 2861 *res = 'a'; 2862 (*pos)++; 2863 break; 2864 default: 2865 break; 2866 } 2867 break; 2868 case '=': 2869 if ('=' == v[*pos + 1]) 2870 (*pos)++; 2871 break; 2872 default: 2873 return 0; 2874 } 2875 (*pos)++; 2876 2877 return *res; 2878 } 2879 2880 /* 2881 * Evaluate either a parenthesized numeric expression 2882 * or a single signed integer number. 2883 */ 2884 static int 2885 roff_evalpar(struct roff *r, int ln, 2886 const char *v, int *pos, int *res, int flags) 2887 { 2888 2889 if ('(' != v[*pos]) 2890 return roff_getnum(v, pos, res, flags); 2891 2892 (*pos)++; 2893 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2894 return 0; 2895 2896 /* 2897 * Omission of the closing parenthesis 2898 * is an error in validation mode, 2899 * but ignored in evaluation mode. 2900 */ 2901 2902 if (')' == v[*pos]) 2903 (*pos)++; 2904 else if (NULL == res) 2905 return 0; 2906 2907 return 1; 2908 } 2909 2910 /* 2911 * Evaluate a complete numeric expression. 2912 * Proceed left to right, there is no concept of precedence. 2913 */ 2914 static int 2915 roff_evalnum(struct roff *r, int ln, const char *v, 2916 int *pos, int *res, int flags) 2917 { 2918 int mypos, operand2; 2919 char operator; 2920 2921 if (NULL == pos) { 2922 mypos = 0; 2923 pos = &mypos; 2924 } 2925 2926 if (flags & ROFFNUM_WHITE) 2927 while (isspace((unsigned char)v[*pos])) 2928 (*pos)++; 2929 2930 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2931 return 0; 2932 2933 while (1) { 2934 if (flags & ROFFNUM_WHITE) 2935 while (isspace((unsigned char)v[*pos])) 2936 (*pos)++; 2937 2938 if ( ! roff_getop(v, pos, &operator)) 2939 break; 2940 2941 if (flags & ROFFNUM_WHITE) 2942 while (isspace((unsigned char)v[*pos])) 2943 (*pos)++; 2944 2945 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2946 return 0; 2947 2948 if (flags & ROFFNUM_WHITE) 2949 while (isspace((unsigned char)v[*pos])) 2950 (*pos)++; 2951 2952 if (NULL == res) 2953 continue; 2954 2955 switch (operator) { 2956 case '+': 2957 *res += operand2; 2958 break; 2959 case '-': 2960 *res -= operand2; 2961 break; 2962 case '*': 2963 *res *= operand2; 2964 break; 2965 case '/': 2966 if (operand2 == 0) { 2967 mandoc_msg(MANDOCERR_DIVZERO, 2968 ln, *pos, "%s", v); 2969 *res = 0; 2970 break; 2971 } 2972 *res /= operand2; 2973 break; 2974 case '%': 2975 if (operand2 == 0) { 2976 mandoc_msg(MANDOCERR_DIVZERO, 2977 ln, *pos, "%s", v); 2978 *res = 0; 2979 break; 2980 } 2981 *res %= operand2; 2982 break; 2983 case '<': 2984 *res = *res < operand2; 2985 break; 2986 case '>': 2987 *res = *res > operand2; 2988 break; 2989 case 'l': 2990 *res = *res <= operand2; 2991 break; 2992 case 'g': 2993 *res = *res >= operand2; 2994 break; 2995 case '=': 2996 *res = *res == operand2; 2997 break; 2998 case '!': 2999 *res = *res != operand2; 3000 break; 3001 case '&': 3002 *res = *res && operand2; 3003 break; 3004 case ':': 3005 *res = *res || operand2; 3006 break; 3007 case 'i': 3008 if (operand2 < *res) 3009 *res = operand2; 3010 break; 3011 case 'a': 3012 if (operand2 > *res) 3013 *res = operand2; 3014 break; 3015 default: 3016 abort(); 3017 } 3018 } 3019 return 1; 3020 } 3021 3022 /* --- register management ------------------------------------------------ */ 3023 3024 void 3025 roff_setreg(struct roff *r, const char *name, int val, char sign) 3026 { 3027 roff_setregn(r, name, strlen(name), val, sign, INT_MIN); 3028 } 3029 3030 static void 3031 roff_setregn(struct roff *r, const char *name, size_t len, 3032 int val, char sign, int step) 3033 { 3034 struct roffreg *reg; 3035 3036 /* Search for an existing register with the same name. */ 3037 reg = r->regtab; 3038 3039 while (reg != NULL && (reg->key.sz != len || 3040 strncmp(reg->key.p, name, len) != 0)) 3041 reg = reg->next; 3042 3043 if (NULL == reg) { 3044 /* Create a new register. */ 3045 reg = mandoc_malloc(sizeof(struct roffreg)); 3046 reg->key.p = mandoc_strndup(name, len); 3047 reg->key.sz = len; 3048 reg->val = 0; 3049 reg->step = 0; 3050 reg->next = r->regtab; 3051 r->regtab = reg; 3052 } 3053 3054 if ('+' == sign) 3055 reg->val += val; 3056 else if ('-' == sign) 3057 reg->val -= val; 3058 else 3059 reg->val = val; 3060 if (step != INT_MIN) 3061 reg->step = step; 3062 } 3063 3064 /* 3065 * Handle some predefined read-only number registers. 3066 * For now, return -1 if the requested register is not predefined; 3067 * in case a predefined read-only register having the value -1 3068 * were to turn up, another special value would have to be chosen. 3069 */ 3070 static int 3071 roff_getregro(const struct roff *r, const char *name) 3072 { 3073 3074 switch (*name) { 3075 case '$': /* Number of arguments of the last macro evaluated. */ 3076 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; 3077 case 'A': /* ASCII approximation mode is always off. */ 3078 return 0; 3079 case 'g': /* Groff compatibility mode is always on. */ 3080 return 1; 3081 case 'H': /* Fixed horizontal resolution. */ 3082 return 24; 3083 case 'j': /* Always adjust left margin only. */ 3084 return 0; 3085 case 'T': /* Some output device is always defined. */ 3086 return 1; 3087 case 'V': /* Fixed vertical resolution. */ 3088 return 40; 3089 default: 3090 return -1; 3091 } 3092 } 3093 3094 int 3095 roff_getreg(struct roff *r, const char *name) 3096 { 3097 return roff_getregn(r, name, strlen(name), '\0'); 3098 } 3099 3100 static int 3101 roff_getregn(struct roff *r, const char *name, size_t len, char sign) 3102 { 3103 struct roffreg *reg; 3104 int val; 3105 3106 if ('.' == name[0] && 2 == len) { 3107 val = roff_getregro(r, name + 1); 3108 if (-1 != val) 3109 return val; 3110 } 3111 3112 for (reg = r->regtab; reg; reg = reg->next) { 3113 if (len == reg->key.sz && 3114 0 == strncmp(name, reg->key.p, len)) { 3115 switch (sign) { 3116 case '+': 3117 reg->val += reg->step; 3118 break; 3119 case '-': 3120 reg->val -= reg->step; 3121 break; 3122 default: 3123 break; 3124 } 3125 return reg->val; 3126 } 3127 } 3128 3129 roff_setregn(r, name, len, 0, '\0', INT_MIN); 3130 return 0; 3131 } 3132 3133 static int 3134 roff_hasregn(const struct roff *r, const char *name, size_t len) 3135 { 3136 struct roffreg *reg; 3137 int val; 3138 3139 if ('.' == name[0] && 2 == len) { 3140 val = roff_getregro(r, name + 1); 3141 if (-1 != val) 3142 return 1; 3143 } 3144 3145 for (reg = r->regtab; reg; reg = reg->next) 3146 if (len == reg->key.sz && 3147 0 == strncmp(name, reg->key.p, len)) 3148 return 1; 3149 3150 return 0; 3151 } 3152 3153 static void 3154 roff_freereg(struct roffreg *reg) 3155 { 3156 struct roffreg *old_reg; 3157 3158 while (NULL != reg) { 3159 free(reg->key.p); 3160 old_reg = reg; 3161 reg = reg->next; 3162 free(old_reg); 3163 } 3164 } 3165 3166 static int 3167 roff_nr(ROFF_ARGS) 3168 { 3169 char *key, *val, *step; 3170 size_t keysz; 3171 int iv, is, len; 3172 char sign; 3173 3174 key = val = buf->buf + pos; 3175 if (*key == '\0') 3176 return ROFF_IGN; 3177 3178 keysz = roff_getname(r, &val, ln, pos); 3179 if (key[keysz] == '\\' || key[keysz] == '\t') 3180 return ROFF_IGN; 3181 3182 sign = *val; 3183 if (sign == '+' || sign == '-') 3184 val++; 3185 3186 len = 0; 3187 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) 3188 return ROFF_IGN; 3189 3190 step = val + len; 3191 while (isspace((unsigned char)*step)) 3192 step++; 3193 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) 3194 is = INT_MIN; 3195 3196 roff_setregn(r, key, keysz, iv, sign, is); 3197 return ROFF_IGN; 3198 } 3199 3200 static int 3201 roff_rr(ROFF_ARGS) 3202 { 3203 struct roffreg *reg, **prev; 3204 char *name, *cp; 3205 size_t namesz; 3206 3207 name = cp = buf->buf + pos; 3208 if (*name == '\0') 3209 return ROFF_IGN; 3210 namesz = roff_getname(r, &cp, ln, pos); 3211 name[namesz] = '\0'; 3212 3213 prev = &r->regtab; 3214 while (1) { 3215 reg = *prev; 3216 if (reg == NULL || !strcmp(name, reg->key.p)) 3217 break; 3218 prev = ®->next; 3219 } 3220 if (reg != NULL) { 3221 *prev = reg->next; 3222 free(reg->key.p); 3223 free(reg); 3224 } 3225 return ROFF_IGN; 3226 } 3227 3228 /* --- handler functions for roff requests -------------------------------- */ 3229 3230 static int 3231 roff_rm(ROFF_ARGS) 3232 { 3233 const char *name; 3234 char *cp; 3235 size_t namesz; 3236 3237 cp = buf->buf + pos; 3238 while (*cp != '\0') { 3239 name = cp; 3240 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 3241 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 3242 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3243 if (name[namesz] == '\\' || name[namesz] == '\t') 3244 break; 3245 } 3246 return ROFF_IGN; 3247 } 3248 3249 static int 3250 roff_it(ROFF_ARGS) 3251 { 3252 int iv; 3253 3254 /* Parse the number of lines. */ 3255 3256 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 3257 mandoc_msg(MANDOCERR_IT_NONUM, 3258 ln, ppos, "%s", buf->buf + 1); 3259 return ROFF_IGN; 3260 } 3261 3262 while (isspace((unsigned char)buf->buf[pos])) 3263 pos++; 3264 3265 /* 3266 * Arm the input line trap. 3267 * Special-casing "an-trap" is an ugly workaround to cope 3268 * with DocBook stupidly fiddling with man(7) internals. 3269 */ 3270 3271 roffit_lines = iv; 3272 roffit_macro = mandoc_strdup(iv != 1 || 3273 strcmp(buf->buf + pos, "an-trap") ? 3274 buf->buf + pos : "br"); 3275 return ROFF_IGN; 3276 } 3277 3278 static int 3279 roff_Dd(ROFF_ARGS) 3280 { 3281 int mask; 3282 enum roff_tok t, te; 3283 3284 switch (tok) { 3285 case ROFF_Dd: 3286 tok = MDOC_Dd; 3287 te = MDOC_MAX; 3288 if (r->format == 0) 3289 r->format = MPARSE_MDOC; 3290 mask = MPARSE_MDOC | MPARSE_QUICK; 3291 break; 3292 case ROFF_TH: 3293 tok = MAN_TH; 3294 te = MAN_MAX; 3295 if (r->format == 0) 3296 r->format = MPARSE_MAN; 3297 mask = MPARSE_QUICK; 3298 break; 3299 default: 3300 abort(); 3301 } 3302 if ((r->options & mask) == 0) 3303 for (t = tok; t < te; t++) 3304 roff_setstr(r, roff_name[t], NULL, 0); 3305 return ROFF_CONT; 3306 } 3307 3308 static int 3309 roff_TE(ROFF_ARGS) 3310 { 3311 r->man->flags &= ~ROFF_NONOFILL; 3312 if (r->tbl == NULL) { 3313 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE"); 3314 return ROFF_IGN; 3315 } 3316 if (tbl_end(r->tbl, 0) == 0) { 3317 r->tbl = NULL; 3318 free(buf->buf); 3319 buf->buf = mandoc_strdup(".sp"); 3320 buf->sz = 4; 3321 *offs = 0; 3322 return ROFF_REPARSE; 3323 } 3324 r->tbl = NULL; 3325 return ROFF_IGN; 3326 } 3327 3328 static int 3329 roff_T_(ROFF_ARGS) 3330 { 3331 3332 if (NULL == r->tbl) 3333 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&"); 3334 else 3335 tbl_restart(ln, ppos, r->tbl); 3336 3337 return ROFF_IGN; 3338 } 3339 3340 /* 3341 * Handle in-line equation delimiters. 3342 */ 3343 static int 3344 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 3345 { 3346 char *cp1, *cp2; 3347 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 3348 3349 /* 3350 * Outside equations, look for an opening delimiter. 3351 * If we are inside an equation, we already know it is 3352 * in-line, or this function wouldn't have been called; 3353 * so look for a closing delimiter. 3354 */ 3355 3356 cp1 = buf->buf + pos; 3357 cp2 = strchr(cp1, r->eqn == NULL ? 3358 r->last_eqn->odelim : r->last_eqn->cdelim); 3359 if (cp2 == NULL) 3360 return ROFF_CONT; 3361 3362 *cp2++ = '\0'; 3363 bef_pr = bef_nl = aft_nl = aft_pr = ""; 3364 3365 /* Handle preceding text, protecting whitespace. */ 3366 3367 if (*buf->buf != '\0') { 3368 if (r->eqn == NULL) 3369 bef_pr = "\\&"; 3370 bef_nl = "\n"; 3371 } 3372 3373 /* 3374 * Prepare replacing the delimiter with an equation macro 3375 * and drop leading white space from the equation. 3376 */ 3377 3378 if (r->eqn == NULL) { 3379 while (*cp2 == ' ') 3380 cp2++; 3381 mac = ".EQ"; 3382 } else 3383 mac = ".EN"; 3384 3385 /* Handle following text, protecting whitespace. */ 3386 3387 if (*cp2 != '\0') { 3388 aft_nl = "\n"; 3389 if (r->eqn != NULL) 3390 aft_pr = "\\&"; 3391 } 3392 3393 /* Do the actual replacement. */ 3394 3395 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 3396 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 3397 free(buf->buf); 3398 buf->buf = cp1; 3399 3400 /* Toggle the in-line state of the eqn subsystem. */ 3401 3402 r->eqn_inline = r->eqn == NULL; 3403 return ROFF_REPARSE; 3404 } 3405 3406 static int 3407 roff_EQ(ROFF_ARGS) 3408 { 3409 struct roff_node *n; 3410 3411 if (r->man->meta.macroset == MACROSET_MAN) 3412 man_breakscope(r->man, ROFF_EQ); 3413 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); 3414 if (ln > r->man->last->line) 3415 n->flags |= NODE_LINE; 3416 n->eqn = eqn_box_new(); 3417 roff_node_append(r->man, n); 3418 r->man->next = ROFF_NEXT_SIBLING; 3419 3420 assert(r->eqn == NULL); 3421 if (r->last_eqn == NULL) 3422 r->last_eqn = eqn_alloc(); 3423 else 3424 eqn_reset(r->last_eqn); 3425 r->eqn = r->last_eqn; 3426 r->eqn->node = n; 3427 3428 if (buf->buf[pos] != '\0') 3429 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3430 ".EQ %s", buf->buf + pos); 3431 3432 return ROFF_IGN; 3433 } 3434 3435 static int 3436 roff_EN(ROFF_ARGS) 3437 { 3438 if (r->eqn != NULL) { 3439 eqn_parse(r->eqn); 3440 r->eqn = NULL; 3441 } else 3442 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN"); 3443 if (buf->buf[pos] != '\0') 3444 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3445 "EN %s", buf->buf + pos); 3446 return ROFF_IGN; 3447 } 3448 3449 static int 3450 roff_TS(ROFF_ARGS) 3451 { 3452 if (r->tbl != NULL) { 3453 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); 3454 tbl_end(r->tbl, 0); 3455 } 3456 r->man->flags |= ROFF_NONOFILL; 3457 r->tbl = tbl_alloc(ppos, ln, r->last_tbl); 3458 if (r->last_tbl == NULL) 3459 r->first_tbl = r->tbl; 3460 r->last_tbl = r->tbl; 3461 return ROFF_IGN; 3462 } 3463 3464 static int 3465 roff_noarg(ROFF_ARGS) 3466 { 3467 if (r->man->flags & (MAN_BLINE | MAN_ELINE)) 3468 man_breakscope(r->man, tok); 3469 if (tok == ROFF_brp) 3470 tok = ROFF_br; 3471 roff_elem_alloc(r->man, ln, ppos, tok); 3472 if (buf->buf[pos] != '\0') 3473 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3474 "%s %s", roff_name[tok], buf->buf + pos); 3475 if (tok == ROFF_nf) 3476 r->man->flags |= ROFF_NOFILL; 3477 else if (tok == ROFF_fi) 3478 r->man->flags &= ~ROFF_NOFILL; 3479 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3480 r->man->next = ROFF_NEXT_SIBLING; 3481 return ROFF_IGN; 3482 } 3483 3484 static int 3485 roff_onearg(ROFF_ARGS) 3486 { 3487 struct roff_node *n; 3488 char *cp; 3489 int npos; 3490 3491 if (r->man->flags & (MAN_BLINE | MAN_ELINE) && 3492 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || 3493 tok == ROFF_ti)) 3494 man_breakscope(r->man, tok); 3495 3496 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { 3497 r->man->last = roffce_node; 3498 r->man->next = ROFF_NEXT_SIBLING; 3499 } 3500 3501 roff_elem_alloc(r->man, ln, ppos, tok); 3502 n = r->man->last; 3503 3504 cp = buf->buf + pos; 3505 if (*cp != '\0') { 3506 while (*cp != '\0' && *cp != ' ') 3507 cp++; 3508 while (*cp == ' ') 3509 *cp++ = '\0'; 3510 if (*cp != '\0') 3511 mandoc_msg(MANDOCERR_ARG_EXCESS, 3512 ln, (int)(cp - buf->buf), 3513 "%s ... %s", roff_name[tok], cp); 3514 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3515 } 3516 3517 if (tok == ROFF_ce || tok == ROFF_rj) { 3518 if (r->man->last->type == ROFFT_ELEM) { 3519 roff_word_alloc(r->man, ln, pos, "1"); 3520 r->man->last->flags |= NODE_NOSRC; 3521 } 3522 npos = 0; 3523 if (roff_evalnum(r, ln, r->man->last->string, &npos, 3524 &roffce_lines, 0) == 0) { 3525 mandoc_msg(MANDOCERR_CE_NONUM, 3526 ln, pos, "ce %s", buf->buf + pos); 3527 roffce_lines = 1; 3528 } 3529 if (roffce_lines < 1) { 3530 r->man->last = r->man->last->parent; 3531 roffce_node = NULL; 3532 roffce_lines = 0; 3533 } else 3534 roffce_node = r->man->last->parent; 3535 } else { 3536 n->flags |= NODE_VALID | NODE_ENDED; 3537 r->man->last = n; 3538 } 3539 n->flags |= NODE_LINE; 3540 r->man->next = ROFF_NEXT_SIBLING; 3541 return ROFF_IGN; 3542 } 3543 3544 static int 3545 roff_manyarg(ROFF_ARGS) 3546 { 3547 struct roff_node *n; 3548 char *sp, *ep; 3549 3550 roff_elem_alloc(r->man, ln, ppos, tok); 3551 n = r->man->last; 3552 3553 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { 3554 while (*ep != '\0' && *ep != ' ') 3555 ep++; 3556 while (*ep == ' ') 3557 *ep++ = '\0'; 3558 roff_word_alloc(r->man, ln, sp - buf->buf, sp); 3559 } 3560 3561 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3562 r->man->last = n; 3563 r->man->next = ROFF_NEXT_SIBLING; 3564 return ROFF_IGN; 3565 } 3566 3567 static int 3568 roff_als(ROFF_ARGS) 3569 { 3570 char *oldn, *newn, *end, *value; 3571 size_t oldsz, newsz, valsz; 3572 3573 newn = oldn = buf->buf + pos; 3574 if (*newn == '\0') 3575 return ROFF_IGN; 3576 3577 newsz = roff_getname(r, &oldn, ln, pos); 3578 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0') 3579 return ROFF_IGN; 3580 3581 end = oldn; 3582 oldsz = roff_getname(r, &end, ln, oldn - buf->buf); 3583 if (oldsz == 0) 3584 return ROFF_IGN; 3585 3586 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", 3587 (int)oldsz, oldn); 3588 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); 3589 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3590 free(value); 3591 return ROFF_IGN; 3592 } 3593 3594 /* 3595 * The .break request only makes sense inside conditionals, 3596 * and that case is already handled in roff_cond_sub(). 3597 */ 3598 static int 3599 roff_break(ROFF_ARGS) 3600 { 3601 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break"); 3602 return ROFF_IGN; 3603 } 3604 3605 static int 3606 roff_cc(ROFF_ARGS) 3607 { 3608 const char *p; 3609 3610 p = buf->buf + pos; 3611 3612 if (*p == '\0' || (r->control = *p++) == '.') 3613 r->control = '\0'; 3614 3615 if (*p != '\0') 3616 mandoc_msg(MANDOCERR_ARG_EXCESS, 3617 ln, p - buf->buf, "cc ... %s", p); 3618 3619 return ROFF_IGN; 3620 } 3621 3622 static int 3623 roff_char(ROFF_ARGS) 3624 { 3625 const char *p, *kp, *vp; 3626 size_t ksz, vsz; 3627 int font; 3628 3629 /* Parse the character to be replaced. */ 3630 3631 kp = buf->buf + pos; 3632 p = kp + 1; 3633 if (*kp == '\0' || (*kp == '\\' && 3634 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) || 3635 (*p != ' ' && *p != '\0')) { 3636 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp); 3637 return ROFF_IGN; 3638 } 3639 ksz = p - kp; 3640 while (*p == ' ') 3641 p++; 3642 3643 /* 3644 * If the replacement string contains a font escape sequence, 3645 * we have to restore the font at the end. 3646 */ 3647 3648 vp = p; 3649 vsz = strlen(p); 3650 font = 0; 3651 while (*p != '\0') { 3652 if (*p++ != '\\') 3653 continue; 3654 switch (mandoc_escape(&p, NULL, NULL)) { 3655 case ESCAPE_FONT: 3656 case ESCAPE_FONTROMAN: 3657 case ESCAPE_FONTITALIC: 3658 case ESCAPE_FONTBOLD: 3659 case ESCAPE_FONTBI: 3660 case ESCAPE_FONTCW: 3661 case ESCAPE_FONTPREV: 3662 font++; 3663 break; 3664 default: 3665 break; 3666 } 3667 } 3668 if (font > 1) 3669 mandoc_msg(MANDOCERR_CHAR_FONT, 3670 ln, (int)(vp - buf->buf), "%s", vp); 3671 3672 /* 3673 * Approximate the effect of .char using the .tr tables. 3674 * XXX In groff, .char and .tr interact differently. 3675 */ 3676 3677 if (ksz == 1) { 3678 if (r->xtab == NULL) 3679 r->xtab = mandoc_calloc(128, sizeof(*r->xtab)); 3680 assert((unsigned int)*kp < 128); 3681 free(r->xtab[(int)*kp].p); 3682 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p, 3683 "%s%s", vp, font ? "\fP" : ""); 3684 } else { 3685 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0); 3686 if (font) 3687 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1); 3688 } 3689 return ROFF_IGN; 3690 } 3691 3692 static int 3693 roff_ec(ROFF_ARGS) 3694 { 3695 const char *p; 3696 3697 p = buf->buf + pos; 3698 if (*p == '\0') 3699 r->escape = '\\'; 3700 else { 3701 r->escape = *p; 3702 if (*++p != '\0') 3703 mandoc_msg(MANDOCERR_ARG_EXCESS, ln, 3704 (int)(p - buf->buf), "ec ... %s", p); 3705 } 3706 return ROFF_IGN; 3707 } 3708 3709 static int 3710 roff_eo(ROFF_ARGS) 3711 { 3712 r->escape = '\0'; 3713 if (buf->buf[pos] != '\0') 3714 mandoc_msg(MANDOCERR_ARG_SKIP, 3715 ln, pos, "eo %s", buf->buf + pos); 3716 return ROFF_IGN; 3717 } 3718 3719 static int 3720 roff_nop(ROFF_ARGS) 3721 { 3722 while (buf->buf[pos] == ' ') 3723 pos++; 3724 *offs = pos; 3725 return ROFF_RERUN; 3726 } 3727 3728 static int 3729 roff_tr(ROFF_ARGS) 3730 { 3731 const char *p, *first, *second; 3732 size_t fsz, ssz; 3733 enum mandoc_esc esc; 3734 3735 p = buf->buf + pos; 3736 3737 if (*p == '\0') { 3738 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr"); 3739 return ROFF_IGN; 3740 } 3741 3742 while (*p != '\0') { 3743 fsz = ssz = 1; 3744 3745 first = p++; 3746 if (*first == '\\') { 3747 esc = mandoc_escape(&p, NULL, NULL); 3748 if (esc == ESCAPE_ERROR) { 3749 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3750 (int)(p - buf->buf), "%s", first); 3751 return ROFF_IGN; 3752 } 3753 fsz = (size_t)(p - first); 3754 } 3755 3756 second = p++; 3757 if (*second == '\\') { 3758 esc = mandoc_escape(&p, NULL, NULL); 3759 if (esc == ESCAPE_ERROR) { 3760 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3761 (int)(p - buf->buf), "%s", second); 3762 return ROFF_IGN; 3763 } 3764 ssz = (size_t)(p - second); 3765 } else if (*second == '\0') { 3766 mandoc_msg(MANDOCERR_TR_ODD, ln, 3767 (int)(first - buf->buf), "tr %s", first); 3768 second = " "; 3769 p--; 3770 } 3771 3772 if (fsz > 1) { 3773 roff_setstrn(&r->xmbtab, first, fsz, 3774 second, ssz, 0); 3775 continue; 3776 } 3777 3778 if (r->xtab == NULL) 3779 r->xtab = mandoc_calloc(128, 3780 sizeof(struct roffstr)); 3781 3782 free(r->xtab[(int)*first].p); 3783 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 3784 r->xtab[(int)*first].sz = ssz; 3785 } 3786 3787 return ROFF_IGN; 3788 } 3789 3790 /* 3791 * Implementation of the .return request. 3792 * There is no need to call roff_userret() from here. 3793 * The read module will call that after rewinding the reader stack 3794 * to the place from where the current macro was called. 3795 */ 3796 static int 3797 roff_return(ROFF_ARGS) 3798 { 3799 if (r->mstackpos >= 0) 3800 return ROFF_IGN | ROFF_USERRET; 3801 3802 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return"); 3803 return ROFF_IGN; 3804 } 3805 3806 static int 3807 roff_rn(ROFF_ARGS) 3808 { 3809 const char *value; 3810 char *oldn, *newn, *end; 3811 size_t oldsz, newsz; 3812 int deftype; 3813 3814 oldn = newn = buf->buf + pos; 3815 if (*oldn == '\0') 3816 return ROFF_IGN; 3817 3818 oldsz = roff_getname(r, &newn, ln, pos); 3819 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0') 3820 return ROFF_IGN; 3821 3822 end = newn; 3823 newsz = roff_getname(r, &end, ln, newn - buf->buf); 3824 if (newsz == 0) 3825 return ROFF_IGN; 3826 3827 deftype = ROFFDEF_ANY; 3828 value = roff_getstrn(r, oldn, oldsz, &deftype); 3829 switch (deftype) { 3830 case ROFFDEF_USER: 3831 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3832 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); 3833 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3834 break; 3835 case ROFFDEF_PRE: 3836 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3837 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3838 break; 3839 case ROFFDEF_REN: 3840 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); 3841 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); 3842 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3843 break; 3844 case ROFFDEF_STD: 3845 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); 3846 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3847 break; 3848 default: 3849 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3850 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3851 break; 3852 } 3853 return ROFF_IGN; 3854 } 3855 3856 static int 3857 roff_shift(ROFF_ARGS) 3858 { 3859 struct mctx *ctx; 3860 int levels, i; 3861 3862 levels = 1; 3863 if (buf->buf[pos] != '\0' && 3864 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { 3865 mandoc_msg(MANDOCERR_CE_NONUM, 3866 ln, pos, "shift %s", buf->buf + pos); 3867 levels = 1; 3868 } 3869 if (r->mstackpos < 0) { 3870 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift"); 3871 return ROFF_IGN; 3872 } 3873 ctx = r->mstack + r->mstackpos; 3874 if (levels > ctx->argc) { 3875 mandoc_msg(MANDOCERR_SHIFT, 3876 ln, pos, "%d, but max is %d", levels, ctx->argc); 3877 levels = ctx->argc; 3878 } 3879 if (levels == 0) 3880 return ROFF_IGN; 3881 for (i = 0; i < levels; i++) 3882 free(ctx->argv[i]); 3883 ctx->argc -= levels; 3884 for (i = 0; i < ctx->argc; i++) 3885 ctx->argv[i] = ctx->argv[i + levels]; 3886 return ROFF_IGN; 3887 } 3888 3889 static int 3890 roff_so(ROFF_ARGS) 3891 { 3892 char *name, *cp; 3893 3894 name = buf->buf + pos; 3895 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name); 3896 3897 /* 3898 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3899 * opening anything that's not in our cwd or anything beneath 3900 * it. Thus, explicitly disallow traversing up the file-system 3901 * or using absolute paths. 3902 */ 3903 3904 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3905 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name); 3906 buf->sz = mandoc_asprintf(&cp, 3907 ".sp\nSee the file %s.\n.sp", name) + 1; 3908 free(buf->buf); 3909 buf->buf = cp; 3910 *offs = 0; 3911 return ROFF_REPARSE; 3912 } 3913 3914 *offs = pos; 3915 return ROFF_SO; 3916 } 3917 3918 /* --- user defined strings and macros ------------------------------------ */ 3919 3920 static int 3921 roff_userdef(ROFF_ARGS) 3922 { 3923 struct mctx *ctx; 3924 char *arg, *ap, *dst, *src; 3925 size_t sz; 3926 3927 /* If the macro is empty, ignore it altogether. */ 3928 3929 if (*r->current_string == '\0') 3930 return ROFF_IGN; 3931 3932 /* Initialize a new macro stack context. */ 3933 3934 if (++r->mstackpos == r->mstacksz) { 3935 r->mstack = mandoc_recallocarray(r->mstack, 3936 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); 3937 r->mstacksz += 8; 3938 } 3939 ctx = r->mstack + r->mstackpos; 3940 ctx->argsz = 0; 3941 ctx->argc = 0; 3942 ctx->argv = NULL; 3943 3944 /* 3945 * Collect pointers to macro argument strings, 3946 * NUL-terminating them and escaping quotes. 3947 */ 3948 3949 src = buf->buf + pos; 3950 while (*src != '\0') { 3951 if (ctx->argc == ctx->argsz) { 3952 ctx->argsz += 8; 3953 ctx->argv = mandoc_reallocarray(ctx->argv, 3954 ctx->argsz, sizeof(*ctx->argv)); 3955 } 3956 arg = roff_getarg(r, &src, ln, &pos); 3957 sz = 1; /* For the terminating NUL. */ 3958 for (ap = arg; *ap != '\0'; ap++) 3959 sz += *ap == '"' ? 4 : 1; 3960 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); 3961 for (ap = arg; *ap != '\0'; ap++) { 3962 if (*ap == '"') { 3963 memcpy(dst, "\\(dq", 4); 3964 dst += 4; 3965 } else 3966 *dst++ = *ap; 3967 } 3968 *dst = '\0'; 3969 free(arg); 3970 } 3971 3972 /* Replace the macro invocation by the macro definition. */ 3973 3974 free(buf->buf); 3975 buf->buf = mandoc_strdup(r->current_string); 3976 buf->sz = strlen(buf->buf) + 1; 3977 *offs = 0; 3978 3979 return buf->buf[buf->sz - 2] == '\n' ? 3980 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND; 3981 } 3982 3983 /* 3984 * Calling a high-level macro that was renamed with .rn. 3985 * r->current_string has already been set up by roff_parse(). 3986 */ 3987 static int 3988 roff_renamed(ROFF_ARGS) 3989 { 3990 char *nbuf; 3991 3992 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, 3993 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; 3994 free(buf->buf); 3995 buf->buf = nbuf; 3996 *offs = 0; 3997 return ROFF_CONT; 3998 } 3999 4000 /* 4001 * Measure the length in bytes of the roff identifier at *cpp 4002 * and advance the pointer to the next word. 4003 */ 4004 static size_t 4005 roff_getname(struct roff *r, char **cpp, int ln, int pos) 4006 { 4007 char *name, *cp; 4008 size_t namesz; 4009 4010 name = *cpp; 4011 if (*name == '\0') 4012 return 0; 4013 4014 /* Advance cp to the byte after the end of the name. */ 4015 4016 for (cp = name; 1; cp++) { 4017 namesz = cp - name; 4018 if (*cp == '\0') 4019 break; 4020 if (*cp == ' ' || *cp == '\t') { 4021 cp++; 4022 break; 4023 } 4024 if (*cp != '\\') 4025 continue; 4026 if (cp[1] == '{' || cp[1] == '}') 4027 break; 4028 if (*++cp == '\\') 4029 continue; 4030 mandoc_msg(MANDOCERR_NAMESC, ln, pos, 4031 "%.*s", (int)(cp - name + 1), name); 4032 mandoc_escape((const char **)&cp, NULL, NULL); 4033 break; 4034 } 4035 4036 /* Read past spaces. */ 4037 4038 while (*cp == ' ') 4039 cp++; 4040 4041 *cpp = cp; 4042 return namesz; 4043 } 4044 4045 /* 4046 * Store *string into the user-defined string called *name. 4047 * To clear an existing entry, call with (*r, *name, NULL, 0). 4048 * append == 0: replace mode 4049 * append == 1: single-line append mode 4050 * append == 2: multiline append mode, append '\n' after each call 4051 */ 4052 static void 4053 roff_setstr(struct roff *r, const char *name, const char *string, 4054 int append) 4055 { 4056 size_t namesz; 4057 4058 namesz = strlen(name); 4059 roff_setstrn(&r->strtab, name, namesz, string, 4060 string ? strlen(string) : 0, append); 4061 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 4062 } 4063 4064 static void 4065 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 4066 const char *string, size_t stringsz, int append) 4067 { 4068 struct roffkv *n; 4069 char *c; 4070 int i; 4071 size_t oldch, newch; 4072 4073 /* Search for an existing string with the same name. */ 4074 n = *r; 4075 4076 while (n && (namesz != n->key.sz || 4077 strncmp(n->key.p, name, namesz))) 4078 n = n->next; 4079 4080 if (NULL == n) { 4081 /* Create a new string table entry. */ 4082 n = mandoc_malloc(sizeof(struct roffkv)); 4083 n->key.p = mandoc_strndup(name, namesz); 4084 n->key.sz = namesz; 4085 n->val.p = NULL; 4086 n->val.sz = 0; 4087 n->next = *r; 4088 *r = n; 4089 } else if (0 == append) { 4090 free(n->val.p); 4091 n->val.p = NULL; 4092 n->val.sz = 0; 4093 } 4094 4095 if (NULL == string) 4096 return; 4097 4098 /* 4099 * One additional byte for the '\n' in multiline mode, 4100 * and one for the terminating '\0'. 4101 */ 4102 newch = stringsz + (1 < append ? 2u : 1u); 4103 4104 if (NULL == n->val.p) { 4105 n->val.p = mandoc_malloc(newch); 4106 *n->val.p = '\0'; 4107 oldch = 0; 4108 } else { 4109 oldch = n->val.sz; 4110 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 4111 } 4112 4113 /* Skip existing content in the destination buffer. */ 4114 c = n->val.p + (int)oldch; 4115 4116 /* Append new content to the destination buffer. */ 4117 i = 0; 4118 while (i < (int)stringsz) { 4119 /* 4120 * Rudimentary roff copy mode: 4121 * Handle escaped backslashes. 4122 */ 4123 if ('\\' == string[i] && '\\' == string[i + 1]) 4124 i++; 4125 *c++ = string[i++]; 4126 } 4127 4128 /* Append terminating bytes. */ 4129 if (1 < append) 4130 *c++ = '\n'; 4131 4132 *c = '\0'; 4133 n->val.sz = (int)(c - n->val.p); 4134 } 4135 4136 static const char * 4137 roff_getstrn(struct roff *r, const char *name, size_t len, 4138 int *deftype) 4139 { 4140 const struct roffkv *n; 4141 int found, i; 4142 enum roff_tok tok; 4143 4144 found = 0; 4145 for (n = r->strtab; n != NULL; n = n->next) { 4146 if (strncmp(name, n->key.p, len) != 0 || 4147 n->key.p[len] != '\0' || n->val.p == NULL) 4148 continue; 4149 if (*deftype & ROFFDEF_USER) { 4150 *deftype = ROFFDEF_USER; 4151 return n->val.p; 4152 } else { 4153 found = 1; 4154 break; 4155 } 4156 } 4157 for (n = r->rentab; n != NULL; n = n->next) { 4158 if (strncmp(name, n->key.p, len) != 0 || 4159 n->key.p[len] != '\0' || n->val.p == NULL) 4160 continue; 4161 if (*deftype & ROFFDEF_REN) { 4162 *deftype = ROFFDEF_REN; 4163 return n->val.p; 4164 } else { 4165 found = 1; 4166 break; 4167 } 4168 } 4169 for (i = 0; i < PREDEFS_MAX; i++) { 4170 if (strncmp(name, predefs[i].name, len) != 0 || 4171 predefs[i].name[len] != '\0') 4172 continue; 4173 if (*deftype & ROFFDEF_PRE) { 4174 *deftype = ROFFDEF_PRE; 4175 return predefs[i].str; 4176 } else { 4177 found = 1; 4178 break; 4179 } 4180 } 4181 if (r->man->meta.macroset != MACROSET_MAN) { 4182 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { 4183 if (strncmp(name, roff_name[tok], len) != 0 || 4184 roff_name[tok][len] != '\0') 4185 continue; 4186 if (*deftype & ROFFDEF_STD) { 4187 *deftype = ROFFDEF_STD; 4188 return NULL; 4189 } else { 4190 found = 1; 4191 break; 4192 } 4193 } 4194 } 4195 if (r->man->meta.macroset != MACROSET_MDOC) { 4196 for (tok = MAN_TH; tok < MAN_MAX; tok++) { 4197 if (strncmp(name, roff_name[tok], len) != 0 || 4198 roff_name[tok][len] != '\0') 4199 continue; 4200 if (*deftype & ROFFDEF_STD) { 4201 *deftype = ROFFDEF_STD; 4202 return NULL; 4203 } else { 4204 found = 1; 4205 break; 4206 } 4207 } 4208 } 4209 4210 if (found == 0 && *deftype != ROFFDEF_ANY) { 4211 if (*deftype & ROFFDEF_REN) { 4212 /* 4213 * This might still be a request, 4214 * so do not treat it as undefined yet. 4215 */ 4216 *deftype = ROFFDEF_UNDEF; 4217 return NULL; 4218 } 4219 4220 /* Using an undefined string defines it to be empty. */ 4221 4222 roff_setstrn(&r->strtab, name, len, "", 0, 0); 4223 roff_setstrn(&r->rentab, name, len, NULL, 0, 0); 4224 } 4225 4226 *deftype = 0; 4227 return NULL; 4228 } 4229 4230 static void 4231 roff_freestr(struct roffkv *r) 4232 { 4233 struct roffkv *n, *nn; 4234 4235 for (n = r; n; n = nn) { 4236 free(n->key.p); 4237 free(n->val.p); 4238 nn = n->next; 4239 free(n); 4240 } 4241 } 4242 4243 /* --- accessors and utility functions ------------------------------------ */ 4244 4245 /* 4246 * Duplicate an input string, making the appropriate character 4247 * conversations (as stipulated by `tr') along the way. 4248 * Returns a heap-allocated string with all the replacements made. 4249 */ 4250 char * 4251 roff_strdup(const struct roff *r, const char *p) 4252 { 4253 const struct roffkv *cp; 4254 char *res; 4255 const char *pp; 4256 size_t ssz, sz; 4257 enum mandoc_esc esc; 4258 4259 if (NULL == r->xmbtab && NULL == r->xtab) 4260 return mandoc_strdup(p); 4261 else if ('\0' == *p) 4262 return mandoc_strdup(""); 4263 4264 /* 4265 * Step through each character looking for term matches 4266 * (remember that a `tr' can be invoked with an escape, which is 4267 * a glyph but the escape is multi-character). 4268 * We only do this if the character hash has been initialised 4269 * and the string is >0 length. 4270 */ 4271 4272 res = NULL; 4273 ssz = 0; 4274 4275 while ('\0' != *p) { 4276 assert((unsigned int)*p < 128); 4277 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { 4278 sz = r->xtab[(int)*p].sz; 4279 res = mandoc_realloc(res, ssz + sz + 1); 4280 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 4281 ssz += sz; 4282 p++; 4283 continue; 4284 } else if ('\\' != *p) { 4285 res = mandoc_realloc(res, ssz + 2); 4286 res[ssz++] = *p++; 4287 continue; 4288 } 4289 4290 /* Search for term matches. */ 4291 for (cp = r->xmbtab; cp; cp = cp->next) 4292 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 4293 break; 4294 4295 if (NULL != cp) { 4296 /* 4297 * A match has been found. 4298 * Append the match to the array and move 4299 * forward by its keysize. 4300 */ 4301 res = mandoc_realloc(res, 4302 ssz + cp->val.sz + 1); 4303 memcpy(res + ssz, cp->val.p, cp->val.sz); 4304 ssz += cp->val.sz; 4305 p += (int)cp->key.sz; 4306 continue; 4307 } 4308 4309 /* 4310 * Handle escapes carefully: we need to copy 4311 * over just the escape itself, or else we might 4312 * do replacements within the escape itself. 4313 * Make sure to pass along the bogus string. 4314 */ 4315 pp = p++; 4316 esc = mandoc_escape(&p, NULL, NULL); 4317 if (ESCAPE_ERROR == esc) { 4318 sz = strlen(pp); 4319 res = mandoc_realloc(res, ssz + sz + 1); 4320 memcpy(res + ssz, pp, sz); 4321 break; 4322 } 4323 /* 4324 * We bail out on bad escapes. 4325 * No need to warn: we already did so when 4326 * roff_expand() was called. 4327 */ 4328 sz = (int)(p - pp); 4329 res = mandoc_realloc(res, ssz + sz + 1); 4330 memcpy(res + ssz, pp, sz); 4331 ssz += sz; 4332 } 4333 4334 res[(int)ssz] = '\0'; 4335 return res; 4336 } 4337 4338 int 4339 roff_getformat(const struct roff *r) 4340 { 4341 4342 return r->format; 4343 } 4344 4345 /* 4346 * Find out whether a line is a macro line or not. 4347 * If it is, adjust the current position and return one; if it isn't, 4348 * return zero and don't change the current position. 4349 * If the control character has been set with `.cc', then let that grain 4350 * precedence. 4351 * This is slighly contrary to groff, where using the non-breaking 4352 * control character when `cc' has been invoked will cause the 4353 * non-breaking macro contents to be printed verbatim. 4354 */ 4355 int 4356 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 4357 { 4358 int pos; 4359 4360 pos = *ppos; 4361 4362 if (r->control != '\0' && cp[pos] == r->control) 4363 pos++; 4364 else if (r->control != '\0') 4365 return 0; 4366 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 4367 pos += 2; 4368 else if ('.' == cp[pos] || '\'' == cp[pos]) 4369 pos++; 4370 else 4371 return 0; 4372 4373 while (' ' == cp[pos] || '\t' == cp[pos]) 4374 pos++; 4375 4376 *ppos = pos; 4377 return 1; 4378 } 4379