1 /* $OpenBSD: roff.c,v 1.243 2020/02/27 21:38:27 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <limits.h> 23 #include <stddef.h> 24 #include <stdint.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 29 #include "mandoc_aux.h" 30 #include "mandoc_ohash.h" 31 #include "mandoc.h" 32 #include "roff.h" 33 #include "mandoc_parse.h" 34 #include "libmandoc.h" 35 #include "roff_int.h" 36 #include "tbl_parse.h" 37 #include "eqn_parse.h" 38 39 /* 40 * ASCII_ESC is used to signal from roff_getarg() to roff_expand() 41 * that an escape sequence resulted from copy-in processing and 42 * needs to be checked or interpolated. As it is used nowhere 43 * else, it is defined here rather than in a header file. 44 */ 45 #define ASCII_ESC 27 46 47 /* Maximum number of string expansions per line, to break infinite loops. */ 48 #define EXPAND_LIMIT 1000 49 50 /* Types of definitions of macros and strings. */ 51 #define ROFFDEF_USER (1 << 1) /* User-defined. */ 52 #define ROFFDEF_PRE (1 << 2) /* Predefined. */ 53 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ 54 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ 55 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ 56 ROFFDEF_REN | ROFFDEF_STD) 57 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ 58 59 /* --- data types --------------------------------------------------------- */ 60 61 /* 62 * An incredibly-simple string buffer. 63 */ 64 struct roffstr { 65 char *p; /* nil-terminated buffer */ 66 size_t sz; /* saved strlen(p) */ 67 }; 68 69 /* 70 * A key-value roffstr pair as part of a singly-linked list. 71 */ 72 struct roffkv { 73 struct roffstr key; 74 struct roffstr val; 75 struct roffkv *next; /* next in list */ 76 }; 77 78 /* 79 * A single number register as part of a singly-linked list. 80 */ 81 struct roffreg { 82 struct roffstr key; 83 int val; 84 int step; 85 struct roffreg *next; 86 }; 87 88 /* 89 * Association of request and macro names with token IDs. 90 */ 91 struct roffreq { 92 enum roff_tok tok; 93 char name[]; 94 }; 95 96 /* 97 * A macro processing context. 98 * More than one is needed when macro calls are nested. 99 */ 100 struct mctx { 101 char **argv; 102 int argc; 103 int argsz; 104 }; 105 106 struct roff { 107 struct roff_man *man; /* mdoc or man parser */ 108 struct roffnode *last; /* leaf of stack */ 109 struct mctx *mstack; /* stack of macro contexts */ 110 int *rstack; /* stack of inverted `ie' values */ 111 struct ohash *reqtab; /* request lookup table */ 112 struct roffreg *regtab; /* number registers */ 113 struct roffkv *strtab; /* user-defined strings & macros */ 114 struct roffkv *rentab; /* renamed strings & macros */ 115 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 116 struct roffstr *xtab; /* single-byte trans table (`tr') */ 117 const char *current_string; /* value of last called user macro */ 118 struct tbl_node *first_tbl; /* first table parsed */ 119 struct tbl_node *last_tbl; /* last table parsed */ 120 struct tbl_node *tbl; /* current table being parsed */ 121 struct eqn_node *last_eqn; /* equation parser */ 122 struct eqn_node *eqn; /* active equation parser */ 123 int eqn_inline; /* current equation is inline */ 124 int options; /* parse options */ 125 int mstacksz; /* current size of mstack */ 126 int mstackpos; /* position in mstack */ 127 int rstacksz; /* current size limit of rstack */ 128 int rstackpos; /* position in rstack */ 129 int format; /* current file in mdoc or man format */ 130 char control; /* control character */ 131 char escape; /* escape character */ 132 }; 133 134 /* 135 * A macro definition, condition, or ignored block. 136 */ 137 struct roffnode { 138 enum roff_tok tok; /* type of node */ 139 struct roffnode *parent; /* up one in stack */ 140 int line; /* parse line */ 141 int col; /* parse col */ 142 char *name; /* node name, e.g. macro name */ 143 char *end; /* custom end macro of the block */ 144 int endspan; /* scope to: 1=eol 2=next line -1=\} */ 145 int rule; /* content is: 1=evaluated 0=skipped */ 146 }; 147 148 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 149 enum roff_tok tok, /* tok of macro */ \ 150 struct buf *buf, /* input buffer */ \ 151 int ln, /* parse line */ \ 152 int ppos, /* original pos in buffer */ \ 153 int pos, /* current pos in buffer */ \ 154 int *offs /* reset offset of buffer data */ 155 156 typedef int (*roffproc)(ROFF_ARGS); 157 158 struct roffmac { 159 roffproc proc; /* process new macro */ 160 roffproc text; /* process as child text of macro */ 161 roffproc sub; /* process as child of macro */ 162 int flags; 163 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 164 }; 165 166 struct predef { 167 const char *name; /* predefined input name */ 168 const char *str; /* replacement symbol */ 169 }; 170 171 #define PREDEF(__name, __str) \ 172 { (__name), (__str) }, 173 174 /* --- function prototypes ------------------------------------------------ */ 175 176 static int roffnode_cleanscope(struct roff *); 177 static int roffnode_pop(struct roff *); 178 static void roffnode_push(struct roff *, enum roff_tok, 179 const char *, int, int); 180 static void roff_addtbl(struct roff_man *, int, struct tbl_node *); 181 static int roff_als(ROFF_ARGS); 182 static int roff_block(ROFF_ARGS); 183 static int roff_block_text(ROFF_ARGS); 184 static int roff_block_sub(ROFF_ARGS); 185 static int roff_break(ROFF_ARGS); 186 static int roff_cblock(ROFF_ARGS); 187 static int roff_cc(ROFF_ARGS); 188 static int roff_ccond(struct roff *, int, int); 189 static int roff_char(ROFF_ARGS); 190 static int roff_cond(ROFF_ARGS); 191 static int roff_cond_text(ROFF_ARGS); 192 static int roff_cond_sub(ROFF_ARGS); 193 static int roff_ds(ROFF_ARGS); 194 static int roff_ec(ROFF_ARGS); 195 static int roff_eo(ROFF_ARGS); 196 static int roff_eqndelim(struct roff *, struct buf *, int); 197 static int roff_evalcond(struct roff *r, int, char *, int *); 198 static int roff_evalnum(struct roff *, int, 199 const char *, int *, int *, int); 200 static int roff_evalpar(struct roff *, int, 201 const char *, int *, int *, int); 202 static int roff_evalstrcond(const char *, int *); 203 static int roff_expand(struct roff *, struct buf *, 204 int, int, char); 205 static void roff_free1(struct roff *); 206 static void roff_freereg(struct roffreg *); 207 static void roff_freestr(struct roffkv *); 208 static size_t roff_getname(struct roff *, char **, int, int); 209 static int roff_getnum(const char *, int *, int *, int); 210 static int roff_getop(const char *, int *, char *); 211 static int roff_getregn(struct roff *, 212 const char *, size_t, char); 213 static int roff_getregro(const struct roff *, 214 const char *name); 215 static const char *roff_getstrn(struct roff *, 216 const char *, size_t, int *); 217 static int roff_hasregn(const struct roff *, 218 const char *, size_t); 219 static int roff_insec(ROFF_ARGS); 220 static int roff_it(ROFF_ARGS); 221 static int roff_line_ignore(ROFF_ARGS); 222 static void roff_man_alloc1(struct roff_man *); 223 static void roff_man_free1(struct roff_man *); 224 static int roff_manyarg(ROFF_ARGS); 225 static int roff_noarg(ROFF_ARGS); 226 static int roff_nop(ROFF_ARGS); 227 static int roff_nr(ROFF_ARGS); 228 static int roff_onearg(ROFF_ARGS); 229 static enum roff_tok roff_parse(struct roff *, char *, int *, 230 int, int); 231 static int roff_parsetext(struct roff *, struct buf *, 232 int, int *); 233 static int roff_renamed(ROFF_ARGS); 234 static int roff_return(ROFF_ARGS); 235 static int roff_rm(ROFF_ARGS); 236 static int roff_rn(ROFF_ARGS); 237 static int roff_rr(ROFF_ARGS); 238 static void roff_setregn(struct roff *, const char *, 239 size_t, int, char, int); 240 static void roff_setstr(struct roff *, 241 const char *, const char *, int); 242 static void roff_setstrn(struct roffkv **, const char *, 243 size_t, const char *, size_t, int); 244 static int roff_shift(ROFF_ARGS); 245 static int roff_so(ROFF_ARGS); 246 static int roff_tr(ROFF_ARGS); 247 static int roff_Dd(ROFF_ARGS); 248 static int roff_TE(ROFF_ARGS); 249 static int roff_TS(ROFF_ARGS); 250 static int roff_EQ(ROFF_ARGS); 251 static int roff_EN(ROFF_ARGS); 252 static int roff_T_(ROFF_ARGS); 253 static int roff_unsupp(ROFF_ARGS); 254 static int roff_userdef(ROFF_ARGS); 255 256 /* --- constant data ------------------------------------------------------ */ 257 258 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 259 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 260 261 const char *__roff_name[MAN_MAX + 1] = { 262 "br", "ce", "fi", "ft", 263 "ll", "mc", "nf", 264 "po", "rj", "sp", 265 "ta", "ti", NULL, 266 "ab", "ad", "af", "aln", 267 "als", "am", "am1", "ami", 268 "ami1", "as", "as1", "asciify", 269 "backtrace", "bd", "bleedat", "blm", 270 "box", "boxa", "bp", "BP", 271 "break", "breakchar", "brnl", "brp", 272 "brpnl", "c2", "cc", 273 "cf", "cflags", "ch", "char", 274 "chop", "class", "close", "CL", 275 "color", "composite", "continue", "cp", 276 "cropat", "cs", "cu", "da", 277 "dch", "Dd", "de", "de1", 278 "defcolor", "dei", "dei1", "device", 279 "devicem", "di", "do", "ds", 280 "ds1", "dwh", "dt", "ec", 281 "ecr", "ecs", "el", "em", 282 "EN", "eo", "EP", "EQ", 283 "errprint", "ev", "evc", "ex", 284 "fallback", "fam", "fc", "fchar", 285 "fcolor", "fdeferlig", "feature", "fkern", 286 "fl", "flig", "fp", "fps", 287 "fschar", "fspacewidth", "fspecial", "ftr", 288 "fzoom", "gcolor", "hc", "hcode", 289 "hidechar", "hla", "hlm", "hpf", 290 "hpfa", "hpfcode", "hw", "hy", 291 "hylang", "hylen", "hym", "hypp", 292 "hys", "ie", "if", "ig", 293 "index", "it", "itc", "IX", 294 "kern", "kernafter", "kernbefore", "kernpair", 295 "lc", "lc_ctype", "lds", "length", 296 "letadj", "lf", "lg", "lhang", 297 "linetabs", "lnr", "lnrf", "lpfx", 298 "ls", "lsm", "lt", 299 "mediasize", "minss", "mk", "mso", 300 "na", "ne", "nh", "nhychar", 301 "nm", "nn", "nop", "nr", 302 "nrf", "nroff", "ns", "nx", 303 "open", "opena", "os", "output", 304 "padj", "papersize", "pc", "pev", 305 "pi", "PI", "pl", "pm", 306 "pn", "pnr", "ps", 307 "psbb", "pshape", "pso", "ptr", 308 "pvs", "rchar", "rd", "recursionlimit", 309 "return", "rfschar", "rhang", 310 "rm", "rn", "rnn", "rr", 311 "rs", "rt", "schar", "sentchar", 312 "shc", "shift", "sizes", "so", 313 "spacewidth", "special", "spreadwarn", "ss", 314 "sty", "substring", "sv", "sy", 315 "T&", "tc", "TE", 316 "TH", "tkf", "tl", 317 "tm", "tm1", "tmc", "tr", 318 "track", "transchar", "trf", "trimat", 319 "trin", "trnt", "troff", "TS", 320 "uf", "ul", "unformat", "unwatch", 321 "unwatchn", "vpt", "vs", "warn", 322 "warnscale", "watch", "watchlength", "watchn", 323 "wh", "while", "write", "writec", 324 "writem", "xflag", ".", NULL, 325 NULL, "text", 326 "Dd", "Dt", "Os", "Sh", 327 "Ss", "Pp", "D1", "Dl", 328 "Bd", "Ed", "Bl", "El", 329 "It", "Ad", "An", "Ap", 330 "Ar", "Cd", "Cm", "Dv", 331 "Er", "Ev", "Ex", "Fa", 332 "Fd", "Fl", "Fn", "Ft", 333 "Ic", "In", "Li", "Nd", 334 "Nm", "Op", "Ot", "Pa", 335 "Rv", "St", "Va", "Vt", 336 "Xr", "%A", "%B", "%D", 337 "%I", "%J", "%N", "%O", 338 "%P", "%R", "%T", "%V", 339 "Ac", "Ao", "Aq", "At", 340 "Bc", "Bf", "Bo", "Bq", 341 "Bsx", "Bx", "Db", "Dc", 342 "Do", "Dq", "Ec", "Ef", 343 "Em", "Eo", "Fx", "Ms", 344 "No", "Ns", "Nx", "Ox", 345 "Pc", "Pf", "Po", "Pq", 346 "Qc", "Ql", "Qo", "Qq", 347 "Re", "Rs", "Sc", "So", 348 "Sq", "Sm", "Sx", "Sy", 349 "Tn", "Ux", "Xc", "Xo", 350 "Fo", "Fc", "Oo", "Oc", 351 "Bk", "Ek", "Bt", "Hf", 352 "Fr", "Ud", "Lb", "Lp", 353 "Lk", "Mt", "Brq", "Bro", 354 "Brc", "%C", "Es", "En", 355 "Dx", "%Q", "%U", "Ta", 356 "Tg", NULL, 357 "TH", "SH", "SS", "TP", 358 "TQ", 359 "LP", "PP", "P", "IP", 360 "HP", "SM", "SB", "BI", 361 "IB", "BR", "RB", "R", 362 "B", "I", "IR", "RI", 363 "RE", "RS", "DT", "UC", 364 "PD", "AT", "in", 365 "SY", "YS", "OP", 366 "EX", "EE", "UR", 367 "UE", "MT", "ME", NULL 368 }; 369 const char *const *roff_name = __roff_name; 370 371 static struct roffmac roffs[TOKEN_NONE] = { 372 { roff_noarg, NULL, NULL, 0 }, /* br */ 373 { roff_onearg, NULL, NULL, 0 }, /* ce */ 374 { roff_noarg, NULL, NULL, 0 }, /* fi */ 375 { roff_onearg, NULL, NULL, 0 }, /* ft */ 376 { roff_onearg, NULL, NULL, 0 }, /* ll */ 377 { roff_onearg, NULL, NULL, 0 }, /* mc */ 378 { roff_noarg, NULL, NULL, 0 }, /* nf */ 379 { roff_onearg, NULL, NULL, 0 }, /* po */ 380 { roff_onearg, NULL, NULL, 0 }, /* rj */ 381 { roff_onearg, NULL, NULL, 0 }, /* sp */ 382 { roff_manyarg, NULL, NULL, 0 }, /* ta */ 383 { roff_onearg, NULL, NULL, 0 }, /* ti */ 384 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ 385 { roff_unsupp, NULL, NULL, 0 }, /* ab */ 386 { roff_line_ignore, NULL, NULL, 0 }, /* ad */ 387 { roff_line_ignore, NULL, NULL, 0 }, /* af */ 388 { roff_unsupp, NULL, NULL, 0 }, /* aln */ 389 { roff_als, NULL, NULL, 0 }, /* als */ 390 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ 391 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ 392 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ 393 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ 394 { roff_ds, NULL, NULL, 0 }, /* as */ 395 { roff_ds, NULL, NULL, 0 }, /* as1 */ 396 { roff_unsupp, NULL, NULL, 0 }, /* asciify */ 397 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ 398 { roff_line_ignore, NULL, NULL, 0 }, /* bd */ 399 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ 400 { roff_unsupp, NULL, NULL, 0 }, /* blm */ 401 { roff_unsupp, NULL, NULL, 0 }, /* box */ 402 { roff_unsupp, NULL, NULL, 0 }, /* boxa */ 403 { roff_line_ignore, NULL, NULL, 0 }, /* bp */ 404 { roff_unsupp, NULL, NULL, 0 }, /* BP */ 405 { roff_break, NULL, NULL, 0 }, /* break */ 406 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ 407 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ 408 { roff_noarg, NULL, NULL, 0 }, /* brp */ 409 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ 410 { roff_unsupp, NULL, NULL, 0 }, /* c2 */ 411 { roff_cc, NULL, NULL, 0 }, /* cc */ 412 { roff_insec, NULL, NULL, 0 }, /* cf */ 413 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ 414 { roff_line_ignore, NULL, NULL, 0 }, /* ch */ 415 { roff_char, NULL, NULL, 0 }, /* char */ 416 { roff_unsupp, NULL, NULL, 0 }, /* chop */ 417 { roff_line_ignore, NULL, NULL, 0 }, /* class */ 418 { roff_insec, NULL, NULL, 0 }, /* close */ 419 { roff_unsupp, NULL, NULL, 0 }, /* CL */ 420 { roff_line_ignore, NULL, NULL, 0 }, /* color */ 421 { roff_unsupp, NULL, NULL, 0 }, /* composite */ 422 { roff_unsupp, NULL, NULL, 0 }, /* continue */ 423 { roff_line_ignore, NULL, NULL, 0 }, /* cp */ 424 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ 425 { roff_line_ignore, NULL, NULL, 0 }, /* cs */ 426 { roff_line_ignore, NULL, NULL, 0 }, /* cu */ 427 { roff_unsupp, NULL, NULL, 0 }, /* da */ 428 { roff_unsupp, NULL, NULL, 0 }, /* dch */ 429 { roff_Dd, NULL, NULL, 0 }, /* Dd */ 430 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ 431 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ 432 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ 433 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ 434 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ 435 { roff_unsupp, NULL, NULL, 0 }, /* device */ 436 { roff_unsupp, NULL, NULL, 0 }, /* devicem */ 437 { roff_unsupp, NULL, NULL, 0 }, /* di */ 438 { roff_unsupp, NULL, NULL, 0 }, /* do */ 439 { roff_ds, NULL, NULL, 0 }, /* ds */ 440 { roff_ds, NULL, NULL, 0 }, /* ds1 */ 441 { roff_unsupp, NULL, NULL, 0 }, /* dwh */ 442 { roff_unsupp, NULL, NULL, 0 }, /* dt */ 443 { roff_ec, NULL, NULL, 0 }, /* ec */ 444 { roff_unsupp, NULL, NULL, 0 }, /* ecr */ 445 { roff_unsupp, NULL, NULL, 0 }, /* ecs */ 446 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ 447 { roff_unsupp, NULL, NULL, 0 }, /* em */ 448 { roff_EN, NULL, NULL, 0 }, /* EN */ 449 { roff_eo, NULL, NULL, 0 }, /* eo */ 450 { roff_unsupp, NULL, NULL, 0 }, /* EP */ 451 { roff_EQ, NULL, NULL, 0 }, /* EQ */ 452 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ 453 { roff_unsupp, NULL, NULL, 0 }, /* ev */ 454 { roff_unsupp, NULL, NULL, 0 }, /* evc */ 455 { roff_unsupp, NULL, NULL, 0 }, /* ex */ 456 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ 457 { roff_line_ignore, NULL, NULL, 0 }, /* fam */ 458 { roff_unsupp, NULL, NULL, 0 }, /* fc */ 459 { roff_unsupp, NULL, NULL, 0 }, /* fchar */ 460 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ 461 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ 462 { roff_line_ignore, NULL, NULL, 0 }, /* feature */ 463 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ 464 { roff_line_ignore, NULL, NULL, 0 }, /* fl */ 465 { roff_line_ignore, NULL, NULL, 0 }, /* flig */ 466 { roff_line_ignore, NULL, NULL, 0 }, /* fp */ 467 { roff_line_ignore, NULL, NULL, 0 }, /* fps */ 468 { roff_unsupp, NULL, NULL, 0 }, /* fschar */ 469 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ 470 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ 471 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ 472 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ 473 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ 474 { roff_line_ignore, NULL, NULL, 0 }, /* hc */ 475 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ 476 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ 477 { roff_line_ignore, NULL, NULL, 0 }, /* hla */ 478 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ 479 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ 480 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ 481 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ 482 { roff_line_ignore, NULL, NULL, 0 }, /* hw */ 483 { roff_line_ignore, NULL, NULL, 0 }, /* hy */ 484 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ 485 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ 486 { roff_line_ignore, NULL, NULL, 0 }, /* hym */ 487 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ 488 { roff_line_ignore, NULL, NULL, 0 }, /* hys */ 489 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ 490 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ 491 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ 492 { roff_unsupp, NULL, NULL, 0 }, /* index */ 493 { roff_it, NULL, NULL, 0 }, /* it */ 494 { roff_unsupp, NULL, NULL, 0 }, /* itc */ 495 { roff_line_ignore, NULL, NULL, 0 }, /* IX */ 496 { roff_line_ignore, NULL, NULL, 0 }, /* kern */ 497 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ 498 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ 499 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ 500 { roff_unsupp, NULL, NULL, 0 }, /* lc */ 501 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ 502 { roff_unsupp, NULL, NULL, 0 }, /* lds */ 503 { roff_unsupp, NULL, NULL, 0 }, /* length */ 504 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ 505 { roff_insec, NULL, NULL, 0 }, /* lf */ 506 { roff_line_ignore, NULL, NULL, 0 }, /* lg */ 507 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ 508 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ 509 { roff_unsupp, NULL, NULL, 0 }, /* lnr */ 510 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ 511 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ 512 { roff_line_ignore, NULL, NULL, 0 }, /* ls */ 513 { roff_unsupp, NULL, NULL, 0 }, /* lsm */ 514 { roff_line_ignore, NULL, NULL, 0 }, /* lt */ 515 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ 516 { roff_line_ignore, NULL, NULL, 0 }, /* minss */ 517 { roff_line_ignore, NULL, NULL, 0 }, /* mk */ 518 { roff_insec, NULL, NULL, 0 }, /* mso */ 519 { roff_line_ignore, NULL, NULL, 0 }, /* na */ 520 { roff_line_ignore, NULL, NULL, 0 }, /* ne */ 521 { roff_line_ignore, NULL, NULL, 0 }, /* nh */ 522 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ 523 { roff_unsupp, NULL, NULL, 0 }, /* nm */ 524 { roff_unsupp, NULL, NULL, 0 }, /* nn */ 525 { roff_nop, NULL, NULL, 0 }, /* nop */ 526 { roff_nr, NULL, NULL, 0 }, /* nr */ 527 { roff_unsupp, NULL, NULL, 0 }, /* nrf */ 528 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ 529 { roff_line_ignore, NULL, NULL, 0 }, /* ns */ 530 { roff_insec, NULL, NULL, 0 }, /* nx */ 531 { roff_insec, NULL, NULL, 0 }, /* open */ 532 { roff_insec, NULL, NULL, 0 }, /* opena */ 533 { roff_line_ignore, NULL, NULL, 0 }, /* os */ 534 { roff_unsupp, NULL, NULL, 0 }, /* output */ 535 { roff_line_ignore, NULL, NULL, 0 }, /* padj */ 536 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ 537 { roff_line_ignore, NULL, NULL, 0 }, /* pc */ 538 { roff_line_ignore, NULL, NULL, 0 }, /* pev */ 539 { roff_insec, NULL, NULL, 0 }, /* pi */ 540 { roff_unsupp, NULL, NULL, 0 }, /* PI */ 541 { roff_line_ignore, NULL, NULL, 0 }, /* pl */ 542 { roff_line_ignore, NULL, NULL, 0 }, /* pm */ 543 { roff_line_ignore, NULL, NULL, 0 }, /* pn */ 544 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ 545 { roff_line_ignore, NULL, NULL, 0 }, /* ps */ 546 { roff_unsupp, NULL, NULL, 0 }, /* psbb */ 547 { roff_unsupp, NULL, NULL, 0 }, /* pshape */ 548 { roff_insec, NULL, NULL, 0 }, /* pso */ 549 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ 550 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ 551 { roff_unsupp, NULL, NULL, 0 }, /* rchar */ 552 { roff_line_ignore, NULL, NULL, 0 }, /* rd */ 553 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ 554 { roff_return, NULL, NULL, 0 }, /* return */ 555 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ 556 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ 557 { roff_rm, NULL, NULL, 0 }, /* rm */ 558 { roff_rn, NULL, NULL, 0 }, /* rn */ 559 { roff_unsupp, NULL, NULL, 0 }, /* rnn */ 560 { roff_rr, NULL, NULL, 0 }, /* rr */ 561 { roff_line_ignore, NULL, NULL, 0 }, /* rs */ 562 { roff_line_ignore, NULL, NULL, 0 }, /* rt */ 563 { roff_unsupp, NULL, NULL, 0 }, /* schar */ 564 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ 565 { roff_line_ignore, NULL, NULL, 0 }, /* shc */ 566 { roff_shift, NULL, NULL, 0 }, /* shift */ 567 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ 568 { roff_so, NULL, NULL, 0 }, /* so */ 569 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ 570 { roff_line_ignore, NULL, NULL, 0 }, /* special */ 571 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ 572 { roff_line_ignore, NULL, NULL, 0 }, /* ss */ 573 { roff_line_ignore, NULL, NULL, 0 }, /* sty */ 574 { roff_unsupp, NULL, NULL, 0 }, /* substring */ 575 { roff_line_ignore, NULL, NULL, 0 }, /* sv */ 576 { roff_insec, NULL, NULL, 0 }, /* sy */ 577 { roff_T_, NULL, NULL, 0 }, /* T& */ 578 { roff_unsupp, NULL, NULL, 0 }, /* tc */ 579 { roff_TE, NULL, NULL, 0 }, /* TE */ 580 { roff_Dd, NULL, NULL, 0 }, /* TH */ 581 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ 582 { roff_unsupp, NULL, NULL, 0 }, /* tl */ 583 { roff_line_ignore, NULL, NULL, 0 }, /* tm */ 584 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ 585 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ 586 { roff_tr, NULL, NULL, 0 }, /* tr */ 587 { roff_line_ignore, NULL, NULL, 0 }, /* track */ 588 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ 589 { roff_insec, NULL, NULL, 0 }, /* trf */ 590 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ 591 { roff_unsupp, NULL, NULL, 0 }, /* trin */ 592 { roff_unsupp, NULL, NULL, 0 }, /* trnt */ 593 { roff_line_ignore, NULL, NULL, 0 }, /* troff */ 594 { roff_TS, NULL, NULL, 0 }, /* TS */ 595 { roff_line_ignore, NULL, NULL, 0 }, /* uf */ 596 { roff_line_ignore, NULL, NULL, 0 }, /* ul */ 597 { roff_unsupp, NULL, NULL, 0 }, /* unformat */ 598 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ 599 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ 600 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ 601 { roff_line_ignore, NULL, NULL, 0 }, /* vs */ 602 { roff_line_ignore, NULL, NULL, 0 }, /* warn */ 603 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ 604 { roff_line_ignore, NULL, NULL, 0 }, /* watch */ 605 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ 606 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ 607 { roff_unsupp, NULL, NULL, 0 }, /* wh */ 608 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/ 609 { roff_insec, NULL, NULL, 0 }, /* write */ 610 { roff_insec, NULL, NULL, 0 }, /* writec */ 611 { roff_insec, NULL, NULL, 0 }, /* writem */ 612 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ 613 { roff_cblock, NULL, NULL, 0 }, /* . */ 614 { roff_renamed, NULL, NULL, 0 }, 615 { roff_userdef, NULL, NULL, 0 } 616 }; 617 618 /* Array of injected predefined strings. */ 619 #define PREDEFS_MAX 38 620 static const struct predef predefs[PREDEFS_MAX] = { 621 #include "predefs.in" 622 }; 623 624 static int roffce_lines; /* number of input lines to center */ 625 static struct roff_node *roffce_node; /* active request */ 626 static int roffit_lines; /* number of lines to delay */ 627 static char *roffit_macro; /* nil-terminated macro line */ 628 629 630 /* --- request table ------------------------------------------------------ */ 631 632 struct ohash * 633 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) 634 { 635 struct ohash *htab; 636 struct roffreq *req; 637 enum roff_tok tok; 638 size_t sz; 639 unsigned int slot; 640 641 htab = mandoc_malloc(sizeof(*htab)); 642 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); 643 644 for (tok = mintok; tok < maxtok; tok++) { 645 if (roff_name[tok] == NULL) 646 continue; 647 sz = strlen(roff_name[tok]); 648 req = mandoc_malloc(sizeof(*req) + sz + 1); 649 req->tok = tok; 650 memcpy(req->name, roff_name[tok], sz + 1); 651 slot = ohash_qlookup(htab, req->name); 652 ohash_insert(htab, slot, req); 653 } 654 return htab; 655 } 656 657 void 658 roffhash_free(struct ohash *htab) 659 { 660 struct roffreq *req; 661 unsigned int slot; 662 663 if (htab == NULL) 664 return; 665 for (req = ohash_first(htab, &slot); req != NULL; 666 req = ohash_next(htab, &slot)) 667 free(req); 668 ohash_delete(htab); 669 free(htab); 670 } 671 672 enum roff_tok 673 roffhash_find(struct ohash *htab, const char *name, size_t sz) 674 { 675 struct roffreq *req; 676 const char *end; 677 678 if (sz) { 679 end = name + sz; 680 req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); 681 } else 682 req = ohash_find(htab, ohash_qlookup(htab, name)); 683 return req == NULL ? TOKEN_NONE : req->tok; 684 } 685 686 /* --- stack of request blocks -------------------------------------------- */ 687 688 /* 689 * Pop the current node off of the stack of roff instructions currently 690 * pending. Return 1 if it is a loop or 0 otherwise. 691 */ 692 static int 693 roffnode_pop(struct roff *r) 694 { 695 struct roffnode *p; 696 int inloop; 697 698 p = r->last; 699 inloop = p->tok == ROFF_while; 700 r->last = p->parent; 701 free(p->name); 702 free(p->end); 703 free(p); 704 return inloop; 705 } 706 707 /* 708 * Push a roff node onto the instruction stack. This must later be 709 * removed with roffnode_pop(). 710 */ 711 static void 712 roffnode_push(struct roff *r, enum roff_tok tok, const char *name, 713 int line, int col) 714 { 715 struct roffnode *p; 716 717 p = mandoc_calloc(1, sizeof(struct roffnode)); 718 p->tok = tok; 719 if (name) 720 p->name = mandoc_strdup(name); 721 p->parent = r->last; 722 p->line = line; 723 p->col = col; 724 p->rule = p->parent ? p->parent->rule : 0; 725 726 r->last = p; 727 } 728 729 /* --- roff parser state data management ---------------------------------- */ 730 731 static void 732 roff_free1(struct roff *r) 733 { 734 int i; 735 736 tbl_free(r->first_tbl); 737 r->first_tbl = r->last_tbl = r->tbl = NULL; 738 739 eqn_free(r->last_eqn); 740 r->last_eqn = r->eqn = NULL; 741 742 while (r->mstackpos >= 0) 743 roff_userret(r); 744 745 while (r->last) 746 roffnode_pop(r); 747 748 free (r->rstack); 749 r->rstack = NULL; 750 r->rstacksz = 0; 751 r->rstackpos = -1; 752 753 roff_freereg(r->regtab); 754 r->regtab = NULL; 755 756 roff_freestr(r->strtab); 757 roff_freestr(r->rentab); 758 roff_freestr(r->xmbtab); 759 r->strtab = r->rentab = r->xmbtab = NULL; 760 761 if (r->xtab) 762 for (i = 0; i < 128; i++) 763 free(r->xtab[i].p); 764 free(r->xtab); 765 r->xtab = NULL; 766 } 767 768 void 769 roff_reset(struct roff *r) 770 { 771 roff_free1(r); 772 r->options |= MPARSE_COMMENT; 773 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 774 r->control = '\0'; 775 r->escape = '\\'; 776 roffce_lines = 0; 777 roffce_node = NULL; 778 roffit_lines = 0; 779 roffit_macro = NULL; 780 } 781 782 void 783 roff_free(struct roff *r) 784 { 785 int i; 786 787 roff_free1(r); 788 for (i = 0; i < r->mstacksz; i++) 789 free(r->mstack[i].argv); 790 free(r->mstack); 791 roffhash_free(r->reqtab); 792 free(r); 793 } 794 795 struct roff * 796 roff_alloc(int options) 797 { 798 struct roff *r; 799 800 r = mandoc_calloc(1, sizeof(struct roff)); 801 r->reqtab = roffhash_alloc(0, ROFF_RENAMED); 802 r->options = options | MPARSE_COMMENT; 803 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 804 r->mstackpos = -1; 805 r->rstackpos = -1; 806 r->escape = '\\'; 807 return r; 808 } 809 810 /* --- syntax tree state data management ---------------------------------- */ 811 812 static void 813 roff_man_free1(struct roff_man *man) 814 { 815 if (man->meta.first != NULL) 816 roff_node_delete(man, man->meta.first); 817 free(man->meta.msec); 818 free(man->meta.vol); 819 free(man->meta.os); 820 free(man->meta.arch); 821 free(man->meta.title); 822 free(man->meta.name); 823 free(man->meta.date); 824 free(man->meta.sodest); 825 } 826 827 void 828 roff_state_reset(struct roff_man *man) 829 { 830 man->last = man->meta.first; 831 man->last_es = NULL; 832 man->flags = 0; 833 man->lastsec = man->lastnamed = SEC_NONE; 834 man->next = ROFF_NEXT_CHILD; 835 roff_setreg(man->roff, "nS", 0, '='); 836 } 837 838 static void 839 roff_man_alloc1(struct roff_man *man) 840 { 841 memset(&man->meta, 0, sizeof(man->meta)); 842 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first)); 843 man->meta.first->type = ROFFT_ROOT; 844 man->meta.macroset = MACROSET_NONE; 845 roff_state_reset(man); 846 } 847 848 void 849 roff_man_reset(struct roff_man *man) 850 { 851 roff_man_free1(man); 852 roff_man_alloc1(man); 853 } 854 855 void 856 roff_man_free(struct roff_man *man) 857 { 858 roff_man_free1(man); 859 free(man); 860 } 861 862 struct roff_man * 863 roff_man_alloc(struct roff *roff, const char *os_s, int quick) 864 { 865 struct roff_man *man; 866 867 man = mandoc_calloc(1, sizeof(*man)); 868 man->roff = roff; 869 man->os_s = os_s; 870 man->quick = quick; 871 roff_man_alloc1(man); 872 roff->man = man; 873 return man; 874 } 875 876 /* --- syntax tree handling ----------------------------------------------- */ 877 878 struct roff_node * 879 roff_node_alloc(struct roff_man *man, int line, int pos, 880 enum roff_type type, int tok) 881 { 882 struct roff_node *n; 883 884 n = mandoc_calloc(1, sizeof(*n)); 885 n->line = line; 886 n->pos = pos; 887 n->tok = tok; 888 n->type = type; 889 n->sec = man->lastsec; 890 891 if (man->flags & MDOC_SYNOPSIS) 892 n->flags |= NODE_SYNPRETTY; 893 else 894 n->flags &= ~NODE_SYNPRETTY; 895 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL) 896 n->flags |= NODE_NOFILL; 897 else 898 n->flags &= ~NODE_NOFILL; 899 if (man->flags & MDOC_NEWLINE) 900 n->flags |= NODE_LINE; 901 man->flags &= ~MDOC_NEWLINE; 902 903 return n; 904 } 905 906 void 907 roff_node_append(struct roff_man *man, struct roff_node *n) 908 { 909 910 switch (man->next) { 911 case ROFF_NEXT_SIBLING: 912 if (man->last->next != NULL) { 913 n->next = man->last->next; 914 man->last->next->prev = n; 915 } else 916 man->last->parent->last = n; 917 man->last->next = n; 918 n->prev = man->last; 919 n->parent = man->last->parent; 920 break; 921 case ROFF_NEXT_CHILD: 922 if (man->last->child != NULL) { 923 n->next = man->last->child; 924 man->last->child->prev = n; 925 } else 926 man->last->last = n; 927 man->last->child = n; 928 n->parent = man->last; 929 break; 930 default: 931 abort(); 932 } 933 man->last = n; 934 935 switch (n->type) { 936 case ROFFT_HEAD: 937 n->parent->head = n; 938 break; 939 case ROFFT_BODY: 940 if (n->end != ENDBODY_NOT) 941 return; 942 n->parent->body = n; 943 break; 944 case ROFFT_TAIL: 945 n->parent->tail = n; 946 break; 947 default: 948 return; 949 } 950 951 /* 952 * Copy over the normalised-data pointer of our parent. Not 953 * everybody has one, but copying a null pointer is fine. 954 */ 955 956 n->norm = n->parent->norm; 957 assert(n->parent->type == ROFFT_BLOCK); 958 } 959 960 void 961 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 962 { 963 struct roff_node *n; 964 965 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 966 n->string = roff_strdup(man->roff, word); 967 roff_node_append(man, n); 968 n->flags |= NODE_VALID | NODE_ENDED; 969 man->next = ROFF_NEXT_SIBLING; 970 } 971 972 void 973 roff_word_append(struct roff_man *man, const char *word) 974 { 975 struct roff_node *n; 976 char *addstr, *newstr; 977 978 n = man->last; 979 addstr = roff_strdup(man->roff, word); 980 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 981 free(addstr); 982 free(n->string); 983 n->string = newstr; 984 man->next = ROFF_NEXT_SIBLING; 985 } 986 987 void 988 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 989 { 990 struct roff_node *n; 991 992 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 993 roff_node_append(man, n); 994 man->next = ROFF_NEXT_CHILD; 995 } 996 997 struct roff_node * 998 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 999 { 1000 struct roff_node *n; 1001 1002 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 1003 roff_node_append(man, n); 1004 man->next = ROFF_NEXT_CHILD; 1005 return n; 1006 } 1007 1008 struct roff_node * 1009 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 1010 { 1011 struct roff_node *n; 1012 1013 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 1014 roff_node_append(man, n); 1015 man->next = ROFF_NEXT_CHILD; 1016 return n; 1017 } 1018 1019 struct roff_node * 1020 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 1021 { 1022 struct roff_node *n; 1023 1024 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 1025 roff_node_append(man, n); 1026 man->next = ROFF_NEXT_CHILD; 1027 return n; 1028 } 1029 1030 static void 1031 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl) 1032 { 1033 struct roff_node *n; 1034 struct tbl_span *span; 1035 1036 if (man->meta.macroset == MACROSET_MAN) 1037 man_breakscope(man, ROFF_TS); 1038 while ((span = tbl_span(tbl)) != NULL) { 1039 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE); 1040 n->span = span; 1041 roff_node_append(man, n); 1042 n->flags |= NODE_VALID | NODE_ENDED; 1043 man->next = ROFF_NEXT_SIBLING; 1044 } 1045 } 1046 1047 void 1048 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1049 { 1050 1051 /* Adjust siblings. */ 1052 1053 if (n->prev) 1054 n->prev->next = n->next; 1055 if (n->next) 1056 n->next->prev = n->prev; 1057 1058 /* Adjust parent. */ 1059 1060 if (n->parent != NULL) { 1061 if (n->parent->child == n) 1062 n->parent->child = n->next; 1063 if (n->parent->last == n) 1064 n->parent->last = n->prev; 1065 } 1066 1067 /* Adjust parse point. */ 1068 1069 if (man == NULL) 1070 return; 1071 if (man->last == n) { 1072 if (n->prev == NULL) { 1073 man->last = n->parent; 1074 man->next = ROFF_NEXT_CHILD; 1075 } else { 1076 man->last = n->prev; 1077 man->next = ROFF_NEXT_SIBLING; 1078 } 1079 } 1080 if (man->meta.first == n) 1081 man->meta.first = NULL; 1082 } 1083 1084 void 1085 roff_node_relink(struct roff_man *man, struct roff_node *n) 1086 { 1087 roff_node_unlink(man, n); 1088 n->prev = n->next = NULL; 1089 roff_node_append(man, n); 1090 } 1091 1092 void 1093 roff_node_free(struct roff_node *n) 1094 { 1095 1096 if (n->args != NULL) 1097 mdoc_argv_free(n->args); 1098 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1099 free(n->norm); 1100 eqn_box_free(n->eqn); 1101 free(n->string); 1102 free(n); 1103 } 1104 1105 void 1106 roff_node_delete(struct roff_man *man, struct roff_node *n) 1107 { 1108 1109 while (n->child != NULL) 1110 roff_node_delete(man, n->child); 1111 roff_node_unlink(man, n); 1112 roff_node_free(n); 1113 } 1114 1115 int 1116 roff_node_transparent(struct roff_node *n) 1117 { 1118 if (n == NULL) 1119 return 0; 1120 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) 1121 return 1; 1122 switch (n->tok) { 1123 case ROFF_ft: 1124 case ROFF_ll: 1125 case ROFF_mc: 1126 case ROFF_po: 1127 case ROFF_ta: 1128 case MDOC_Db: 1129 case MDOC_Es: 1130 case MDOC_Sm: 1131 case MDOC_Tg: 1132 case MAN_DT: 1133 case MAN_UC: 1134 case MAN_PD: 1135 case MAN_AT: 1136 return 1; 1137 default: 1138 return 0; 1139 } 1140 } 1141 1142 struct roff_node * 1143 roff_node_child(struct roff_node *n) 1144 { 1145 for (n = n->child; roff_node_transparent(n); n = n->next) 1146 continue; 1147 return n; 1148 } 1149 1150 struct roff_node * 1151 roff_node_prev(struct roff_node *n) 1152 { 1153 do { 1154 n = n->prev; 1155 } while (roff_node_transparent(n)); 1156 return n; 1157 } 1158 1159 struct roff_node * 1160 roff_node_next(struct roff_node *n) 1161 { 1162 do { 1163 n = n->next; 1164 } while (roff_node_transparent(n)); 1165 return n; 1166 } 1167 1168 void 1169 deroff(char **dest, const struct roff_node *n) 1170 { 1171 char *cp; 1172 size_t sz; 1173 1174 if (n->string == NULL) { 1175 for (n = n->child; n != NULL; n = n->next) 1176 deroff(dest, n); 1177 return; 1178 } 1179 1180 /* Skip leading whitespace. */ 1181 1182 for (cp = n->string; *cp != '\0'; cp++) { 1183 if (cp[0] == '\\' && cp[1] != '\0' && 1184 strchr(" %&0^|~", cp[1]) != NULL) 1185 cp++; 1186 else if ( ! isspace((unsigned char)*cp)) 1187 break; 1188 } 1189 1190 /* Skip trailing backslash. */ 1191 1192 sz = strlen(cp); 1193 if (sz > 0 && cp[sz - 1] == '\\') 1194 sz--; 1195 1196 /* Skip trailing whitespace. */ 1197 1198 for (; sz; sz--) 1199 if ( ! isspace((unsigned char)cp[sz-1])) 1200 break; 1201 1202 /* Skip empty strings. */ 1203 1204 if (sz == 0) 1205 return; 1206 1207 if (*dest == NULL) { 1208 *dest = mandoc_strndup(cp, sz); 1209 return; 1210 } 1211 1212 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1213 free(*dest); 1214 *dest = cp; 1215 } 1216 1217 /* --- main functions of the roff parser ---------------------------------- */ 1218 1219 /* 1220 * In the current line, expand escape sequences that produce parsable 1221 * input text. Also check the syntax of the remaining escape sequences, 1222 * which typically produce output glyphs or change formatter state. 1223 */ 1224 static int 1225 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc) 1226 { 1227 struct mctx *ctx; /* current macro call context */ 1228 char ubuf[24]; /* buffer to print the number */ 1229 struct roff_node *n; /* used for header comments */ 1230 const char *start; /* start of the string to process */ 1231 char *stesc; /* start of an escape sequence ('\\') */ 1232 const char *esct; /* type of esccape sequence */ 1233 char *ep; /* end of comment string */ 1234 const char *stnam; /* start of the name, after "[(*" */ 1235 const char *cp; /* end of the name, e.g. before ']' */ 1236 const char *res; /* the string to be substituted */ 1237 char *nbuf; /* new buffer to copy buf->buf to */ 1238 size_t maxl; /* expected length of the escape name */ 1239 size_t naml; /* actual length of the escape name */ 1240 size_t asz; /* length of the replacement */ 1241 size_t rsz; /* length of the rest of the string */ 1242 int inaml; /* length returned from mandoc_escape() */ 1243 int expand_count; /* to avoid infinite loops */ 1244 int npos; /* position in numeric expression */ 1245 int arg_complete; /* argument not interrupted by eol */ 1246 int quote_args; /* true for \\$@, false for \\$* */ 1247 int done; /* no more input available */ 1248 int deftype; /* type of definition to paste */ 1249 int rcsid; /* kind of RCS id seen */ 1250 enum mandocerr err; /* for escape sequence problems */ 1251 char sign; /* increment number register */ 1252 char term; /* character terminating the escape */ 1253 1254 /* Search forward for comments. */ 1255 1256 done = 0; 1257 start = buf->buf + pos; 1258 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { 1259 if (stesc[0] != newesc || stesc[1] == '\0') 1260 continue; 1261 stesc++; 1262 if (*stesc != '"' && *stesc != '#') 1263 continue; 1264 1265 /* Comment found, look for RCS id. */ 1266 1267 rcsid = 0; 1268 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) { 1269 rcsid = 1 << MANDOC_OS_OPENBSD; 1270 cp += 8; 1271 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) { 1272 rcsid = 1 << MANDOC_OS_NETBSD; 1273 cp += 7; 1274 } 1275 if (cp != NULL && 1276 isalnum((unsigned char)*cp) == 0 && 1277 strchr(cp, '$') != NULL) { 1278 if (r->man->meta.rcsids & rcsid) 1279 mandoc_msg(MANDOCERR_RCS_REP, ln, 1280 (int)(stesc - buf->buf) + 1, 1281 "%s", stesc + 1); 1282 r->man->meta.rcsids |= rcsid; 1283 } 1284 1285 /* Handle trailing whitespace. */ 1286 1287 ep = strchr(stesc--, '\0') - 1; 1288 if (*ep == '\n') { 1289 done = 1; 1290 ep--; 1291 } 1292 if (*ep == ' ' || *ep == '\t') 1293 mandoc_msg(MANDOCERR_SPACE_EOL, 1294 ln, (int)(ep - buf->buf), NULL); 1295 1296 /* 1297 * Save comments preceding the title macro 1298 * in the syntax tree. 1299 */ 1300 1301 if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) { 1302 while (*ep == ' ' || *ep == '\t') 1303 ep--; 1304 ep[1] = '\0'; 1305 n = roff_node_alloc(r->man, 1306 ln, stesc + 1 - buf->buf, 1307 ROFFT_COMMENT, TOKEN_NONE); 1308 n->string = mandoc_strdup(stesc + 2); 1309 roff_node_append(r->man, n); 1310 n->flags |= NODE_VALID | NODE_ENDED; 1311 r->man->next = ROFF_NEXT_SIBLING; 1312 } 1313 1314 /* Line continuation with comment. */ 1315 1316 if (stesc[1] == '#') { 1317 *stesc = '\0'; 1318 return ROFF_IGN | ROFF_APPEND; 1319 } 1320 1321 /* Discard normal comments. */ 1322 1323 while (stesc > start && stesc[-1] == ' ' && 1324 (stesc == start + 1 || stesc[-2] != '\\')) 1325 stesc--; 1326 *stesc = '\0'; 1327 break; 1328 } 1329 if (stesc == start) 1330 return ROFF_CONT; 1331 stesc--; 1332 1333 /* Notice the end of the input. */ 1334 1335 if (*stesc == '\n') { 1336 *stesc-- = '\0'; 1337 done = 1; 1338 } 1339 1340 expand_count = 0; 1341 while (stesc >= start) { 1342 if (*stesc != newesc) { 1343 1344 /* 1345 * If we have a non-standard escape character, 1346 * escape literal backslashes because all 1347 * processing in subsequent functions uses 1348 * the standard escaping rules. 1349 */ 1350 1351 if (newesc != ASCII_ESC && *stesc == '\\') { 1352 *stesc = '\0'; 1353 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", 1354 buf->buf, stesc + 1) + 1; 1355 start = nbuf + pos; 1356 stesc = nbuf + (stesc - buf->buf); 1357 free(buf->buf); 1358 buf->buf = nbuf; 1359 } 1360 1361 /* Search backwards for the next escape. */ 1362 1363 stesc--; 1364 continue; 1365 } 1366 1367 /* If it is escaped, skip it. */ 1368 1369 for (cp = stesc - 1; cp >= start; cp--) 1370 if (*cp != r->escape) 1371 break; 1372 1373 if ((stesc - cp) % 2 == 0) { 1374 while (stesc > cp) 1375 *stesc-- = '\\'; 1376 continue; 1377 } else if (stesc[1] != '\0') { 1378 *stesc = '\\'; 1379 } else { 1380 *stesc-- = '\0'; 1381 if (done) 1382 continue; 1383 else 1384 return ROFF_IGN | ROFF_APPEND; 1385 } 1386 1387 /* Decide whether to expand or to check only. */ 1388 1389 term = '\0'; 1390 cp = stesc + 1; 1391 if (*cp == 'E') 1392 cp++; 1393 esct = cp; 1394 switch (*esct) { 1395 case '*': 1396 case '$': 1397 res = NULL; 1398 break; 1399 case 'B': 1400 case 'w': 1401 term = cp[1]; 1402 /* FALLTHROUGH */ 1403 case 'n': 1404 sign = cp[1]; 1405 if (sign == '+' || sign == '-') 1406 cp++; 1407 res = ubuf; 1408 break; 1409 default: 1410 err = MANDOCERR_OK; 1411 switch(mandoc_escape(&cp, &stnam, &inaml)) { 1412 case ESCAPE_SPECIAL: 1413 if (mchars_spec2cp(stnam, inaml) >= 0) 1414 break; 1415 /* FALLTHROUGH */ 1416 case ESCAPE_ERROR: 1417 err = MANDOCERR_ESC_BAD; 1418 break; 1419 case ESCAPE_UNDEF: 1420 err = MANDOCERR_ESC_UNDEF; 1421 break; 1422 case ESCAPE_UNSUPP: 1423 err = MANDOCERR_ESC_UNSUPP; 1424 break; 1425 default: 1426 break; 1427 } 1428 if (err != MANDOCERR_OK) 1429 mandoc_msg(err, ln, (int)(stesc - buf->buf), 1430 "%.*s", (int)(cp - stesc), stesc); 1431 stesc--; 1432 continue; 1433 } 1434 1435 if (EXPAND_LIMIT < ++expand_count) { 1436 mandoc_msg(MANDOCERR_ROFFLOOP, 1437 ln, (int)(stesc - buf->buf), NULL); 1438 return ROFF_IGN; 1439 } 1440 1441 /* 1442 * The third character decides the length 1443 * of the name of the string or register. 1444 * Save a pointer to the name. 1445 */ 1446 1447 if (term == '\0') { 1448 switch (*++cp) { 1449 case '\0': 1450 maxl = 0; 1451 break; 1452 case '(': 1453 cp++; 1454 maxl = 2; 1455 break; 1456 case '[': 1457 cp++; 1458 term = ']'; 1459 maxl = 0; 1460 break; 1461 default: 1462 maxl = 1; 1463 break; 1464 } 1465 } else { 1466 cp += 2; 1467 maxl = 0; 1468 } 1469 stnam = cp; 1470 1471 /* Advance to the end of the name. */ 1472 1473 naml = 0; 1474 arg_complete = 1; 1475 while (maxl == 0 || naml < maxl) { 1476 if (*cp == '\0') { 1477 mandoc_msg(MANDOCERR_ESC_BAD, ln, 1478 (int)(stesc - buf->buf), "%s", stesc); 1479 arg_complete = 0; 1480 break; 1481 } 1482 if (maxl == 0 && *cp == term) { 1483 cp++; 1484 break; 1485 } 1486 if (*cp++ != '\\' || *esct != 'w') { 1487 naml++; 1488 continue; 1489 } 1490 switch (mandoc_escape(&cp, NULL, NULL)) { 1491 case ESCAPE_SPECIAL: 1492 case ESCAPE_UNICODE: 1493 case ESCAPE_NUMBERED: 1494 case ESCAPE_UNDEF: 1495 case ESCAPE_OVERSTRIKE: 1496 naml++; 1497 break; 1498 default: 1499 break; 1500 } 1501 } 1502 1503 /* 1504 * Retrieve the replacement string; if it is 1505 * undefined, resume searching for escapes. 1506 */ 1507 1508 switch (*esct) { 1509 case '*': 1510 if (arg_complete) { 1511 deftype = ROFFDEF_USER | ROFFDEF_PRE; 1512 res = roff_getstrn(r, stnam, naml, &deftype); 1513 1514 /* 1515 * If not overriden, let \*(.T 1516 * through to the formatters. 1517 */ 1518 1519 if (res == NULL && naml == 2 && 1520 stnam[0] == '.' && stnam[1] == 'T') { 1521 roff_setstrn(&r->strtab, 1522 ".T", 2, NULL, 0, 0); 1523 stesc--; 1524 continue; 1525 } 1526 } 1527 break; 1528 case '$': 1529 if (r->mstackpos < 0) { 1530 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, 1531 (int)(stesc - buf->buf), "%.3s", stesc); 1532 break; 1533 } 1534 ctx = r->mstack + r->mstackpos; 1535 npos = esct[1] - '1'; 1536 if (npos >= 0 && npos <= 8) { 1537 res = npos < ctx->argc ? 1538 ctx->argv[npos] : ""; 1539 break; 1540 } 1541 if (esct[1] == '*') 1542 quote_args = 0; 1543 else if (esct[1] == '@') 1544 quote_args = 1; 1545 else { 1546 mandoc_msg(MANDOCERR_ARG_NONUM, ln, 1547 (int)(stesc - buf->buf), "%.3s", stesc); 1548 break; 1549 } 1550 asz = 0; 1551 for (npos = 0; npos < ctx->argc; npos++) { 1552 if (npos) 1553 asz++; /* blank */ 1554 if (quote_args) 1555 asz += 2; /* quotes */ 1556 asz += strlen(ctx->argv[npos]); 1557 } 1558 if (asz != 3) { 1559 rsz = buf->sz - (stesc - buf->buf) - 3; 1560 if (asz < 3) 1561 memmove(stesc + asz, stesc + 3, rsz); 1562 buf->sz += asz - 3; 1563 nbuf = mandoc_realloc(buf->buf, buf->sz); 1564 start = nbuf + pos; 1565 stesc = nbuf + (stesc - buf->buf); 1566 buf->buf = nbuf; 1567 if (asz > 3) 1568 memmove(stesc + asz, stesc + 3, rsz); 1569 } 1570 for (npos = 0; npos < ctx->argc; npos++) { 1571 if (npos) 1572 *stesc++ = ' '; 1573 if (quote_args) 1574 *stesc++ = '"'; 1575 cp = ctx->argv[npos]; 1576 while (*cp != '\0') 1577 *stesc++ = *cp++; 1578 if (quote_args) 1579 *stesc++ = '"'; 1580 } 1581 continue; 1582 case 'B': 1583 npos = 0; 1584 ubuf[0] = arg_complete && 1585 roff_evalnum(r, ln, stnam, &npos, 1586 NULL, ROFFNUM_SCALE) && 1587 stnam + npos + 1 == cp ? '1' : '0'; 1588 ubuf[1] = '\0'; 1589 break; 1590 case 'n': 1591 if (arg_complete) 1592 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1593 roff_getregn(r, stnam, naml, sign)); 1594 else 1595 ubuf[0] = '\0'; 1596 break; 1597 case 'w': 1598 /* use even incomplete args */ 1599 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1600 24 * (int)naml); 1601 break; 1602 } 1603 1604 if (res == NULL) { 1605 if (*esct == '*') 1606 mandoc_msg(MANDOCERR_STR_UNDEF, 1607 ln, (int)(stesc - buf->buf), 1608 "%.*s", (int)naml, stnam); 1609 res = ""; 1610 } else if (buf->sz + strlen(res) > SHRT_MAX) { 1611 mandoc_msg(MANDOCERR_ROFFLOOP, 1612 ln, (int)(stesc - buf->buf), NULL); 1613 return ROFF_IGN; 1614 } 1615 1616 /* Replace the escape sequence by the string. */ 1617 1618 *stesc = '\0'; 1619 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", 1620 buf->buf, res, cp) + 1; 1621 1622 /* Prepare for the next replacement. */ 1623 1624 start = nbuf + pos; 1625 stesc = nbuf + (stesc - buf->buf) + strlen(res); 1626 free(buf->buf); 1627 buf->buf = nbuf; 1628 } 1629 return ROFF_CONT; 1630 } 1631 1632 /* 1633 * Parse a quoted or unquoted roff-style request or macro argument. 1634 * Return a pointer to the parsed argument, which is either the original 1635 * pointer or advanced by one byte in case the argument is quoted. 1636 * NUL-terminate the argument in place. 1637 * Collapse pairs of quotes inside quoted arguments. 1638 * Advance the argument pointer to the next argument, 1639 * or to the NUL byte terminating the argument line. 1640 */ 1641 char * 1642 roff_getarg(struct roff *r, char **cpp, int ln, int *pos) 1643 { 1644 struct buf buf; 1645 char *cp, *start; 1646 int newesc, pairs, quoted, white; 1647 1648 /* Quoting can only start with a new word. */ 1649 start = *cpp; 1650 quoted = 0; 1651 if ('"' == *start) { 1652 quoted = 1; 1653 start++; 1654 } 1655 1656 newesc = pairs = white = 0; 1657 for (cp = start; '\0' != *cp; cp++) { 1658 1659 /* 1660 * Move the following text left 1661 * after quoted quotes and after "\\" and "\t". 1662 */ 1663 if (pairs) 1664 cp[-pairs] = cp[0]; 1665 1666 if ('\\' == cp[0]) { 1667 /* 1668 * In copy mode, translate double to single 1669 * backslashes and backslash-t to literal tabs. 1670 */ 1671 switch (cp[1]) { 1672 case 'a': 1673 case 't': 1674 cp[-pairs] = '\t'; 1675 pairs++; 1676 cp++; 1677 break; 1678 case '\\': 1679 newesc = 1; 1680 cp[-pairs] = ASCII_ESC; 1681 pairs++; 1682 cp++; 1683 break; 1684 case ' ': 1685 /* Skip escaped blanks. */ 1686 if (0 == quoted) 1687 cp++; 1688 break; 1689 default: 1690 break; 1691 } 1692 } else if (0 == quoted) { 1693 if (' ' == cp[0]) { 1694 /* Unescaped blanks end unquoted args. */ 1695 white = 1; 1696 break; 1697 } 1698 } else if ('"' == cp[0]) { 1699 if ('"' == cp[1]) { 1700 /* Quoted quotes collapse. */ 1701 pairs++; 1702 cp++; 1703 } else { 1704 /* Unquoted quotes end quoted args. */ 1705 quoted = 2; 1706 break; 1707 } 1708 } 1709 } 1710 1711 /* Quoted argument without a closing quote. */ 1712 if (1 == quoted) 1713 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL); 1714 1715 /* NUL-terminate this argument and move to the next one. */ 1716 if (pairs) 1717 cp[-pairs] = '\0'; 1718 if ('\0' != *cp) { 1719 *cp++ = '\0'; 1720 while (' ' == *cp) 1721 cp++; 1722 } 1723 *pos += (int)(cp - start) + (quoted ? 1 : 0); 1724 *cpp = cp; 1725 1726 if ('\0' == *cp && (white || ' ' == cp[-1])) 1727 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); 1728 1729 start = mandoc_strdup(start); 1730 if (newesc == 0) 1731 return start; 1732 1733 buf.buf = start; 1734 buf.sz = strlen(start) + 1; 1735 buf.next = NULL; 1736 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { 1737 free(buf.buf); 1738 buf.buf = mandoc_strdup(""); 1739 } 1740 return buf.buf; 1741 } 1742 1743 1744 /* 1745 * Process text streams. 1746 */ 1747 static int 1748 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) 1749 { 1750 size_t sz; 1751 const char *start; 1752 char *p; 1753 int isz; 1754 enum mandoc_esc esc; 1755 1756 /* Spring the input line trap. */ 1757 1758 if (roffit_lines == 1) { 1759 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1760 free(buf->buf); 1761 buf->buf = p; 1762 buf->sz = isz + 1; 1763 *offs = 0; 1764 free(roffit_macro); 1765 roffit_lines = 0; 1766 return ROFF_REPARSE; 1767 } else if (roffit_lines > 1) 1768 --roffit_lines; 1769 1770 if (roffce_node != NULL && buf->buf[pos] != '\0') { 1771 if (roffce_lines < 1) { 1772 r->man->last = roffce_node; 1773 r->man->next = ROFF_NEXT_SIBLING; 1774 roffce_lines = 0; 1775 roffce_node = NULL; 1776 } else 1777 roffce_lines--; 1778 } 1779 1780 /* Convert all breakable hyphens into ASCII_HYPH. */ 1781 1782 start = p = buf->buf + pos; 1783 1784 while (*p != '\0') { 1785 sz = strcspn(p, "-\\"); 1786 p += sz; 1787 1788 if (*p == '\0') 1789 break; 1790 1791 if (*p == '\\') { 1792 /* Skip over escapes. */ 1793 p++; 1794 esc = mandoc_escape((const char **)&p, NULL, NULL); 1795 if (esc == ESCAPE_ERROR) 1796 break; 1797 while (*p == '-') 1798 p++; 1799 continue; 1800 } else if (p == start) { 1801 p++; 1802 continue; 1803 } 1804 1805 if (isalpha((unsigned char)p[-1]) && 1806 isalpha((unsigned char)p[1])) 1807 *p = ASCII_HYPH; 1808 p++; 1809 } 1810 return ROFF_CONT; 1811 } 1812 1813 int 1814 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) 1815 { 1816 enum roff_tok t; 1817 int e; 1818 int pos; /* parse point */ 1819 int spos; /* saved parse point for messages */ 1820 int ppos; /* original offset in buf->buf */ 1821 int ctl; /* macro line (boolean) */ 1822 1823 ppos = pos = *offs; 1824 1825 /* Handle in-line equation delimiters. */ 1826 1827 if (r->tbl == NULL && 1828 r->last_eqn != NULL && r->last_eqn->delim && 1829 (r->eqn == NULL || r->eqn_inline)) { 1830 e = roff_eqndelim(r, buf, pos); 1831 if (e == ROFF_REPARSE) 1832 return e; 1833 assert(e == ROFF_CONT); 1834 } 1835 1836 /* Expand some escape sequences. */ 1837 1838 e = roff_expand(r, buf, ln, pos, r->escape); 1839 if ((e & ROFF_MASK) == ROFF_IGN) 1840 return e; 1841 assert(e == ROFF_CONT); 1842 1843 ctl = roff_getcontrol(r, buf->buf, &pos); 1844 1845 /* 1846 * First, if a scope is open and we're not a macro, pass the 1847 * text through the macro's filter. 1848 * Equations process all content themselves. 1849 * Tables process almost all content themselves, but we want 1850 * to warn about macros before passing it there. 1851 */ 1852 1853 if (r->last != NULL && ! ctl) { 1854 t = r->last->tok; 1855 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1856 if ((e & ROFF_MASK) == ROFF_IGN) 1857 return e; 1858 e &= ~ROFF_MASK; 1859 } else 1860 e = ROFF_IGN; 1861 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { 1862 eqn_read(r->eqn, buf->buf + ppos); 1863 return e; 1864 } 1865 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { 1866 tbl_read(r->tbl, ln, buf->buf, ppos); 1867 roff_addtbl(r->man, ln, r->tbl); 1868 return e; 1869 } 1870 if ( ! ctl) { 1871 r->options &= ~MPARSE_COMMENT; 1872 return roff_parsetext(r, buf, pos, offs) | e; 1873 } 1874 1875 /* Skip empty request lines. */ 1876 1877 if (buf->buf[pos] == '"') { 1878 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL); 1879 return ROFF_IGN; 1880 } else if (buf->buf[pos] == '\0') 1881 return ROFF_IGN; 1882 1883 /* 1884 * If a scope is open, go to the child handler for that macro, 1885 * as it may want to preprocess before doing anything with it. 1886 * Don't do so if an equation is open. 1887 */ 1888 1889 if (r->last) { 1890 t = r->last->tok; 1891 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1892 } 1893 1894 /* No scope is open. This is a new request or macro. */ 1895 1896 r->options &= ~MPARSE_COMMENT; 1897 spos = pos; 1898 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1899 1900 /* Tables ignore most macros. */ 1901 1902 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS || 1903 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) { 1904 mandoc_msg(MANDOCERR_TBLMACRO, 1905 ln, pos, "%s", buf->buf + spos); 1906 if (t != TOKEN_NONE) 1907 return ROFF_IGN; 1908 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1909 pos++; 1910 while (buf->buf[pos] == ' ') 1911 pos++; 1912 tbl_read(r->tbl, ln, buf->buf, pos); 1913 roff_addtbl(r->man, ln, r->tbl); 1914 return ROFF_IGN; 1915 } 1916 1917 /* For now, let high level macros abort .ce mode. */ 1918 1919 if (ctl && roffce_node != NULL && 1920 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 1921 t == ROFF_TH || t == ROFF_TS)) { 1922 r->man->last = roffce_node; 1923 r->man->next = ROFF_NEXT_SIBLING; 1924 roffce_lines = 0; 1925 roffce_node = NULL; 1926 } 1927 1928 /* 1929 * This is neither a roff request nor a user-defined macro. 1930 * Let the standard macro set parsers handle it. 1931 */ 1932 1933 if (t == TOKEN_NONE) 1934 return ROFF_CONT; 1935 1936 /* Execute a roff request or a user defined macro. */ 1937 1938 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); 1939 } 1940 1941 /* 1942 * Internal interface function to tell the roff parser that execution 1943 * of the current macro ended. This is required because macro 1944 * definitions usually do not end with a .return request. 1945 */ 1946 void 1947 roff_userret(struct roff *r) 1948 { 1949 struct mctx *ctx; 1950 int i; 1951 1952 assert(r->mstackpos >= 0); 1953 ctx = r->mstack + r->mstackpos; 1954 for (i = 0; i < ctx->argc; i++) 1955 free(ctx->argv[i]); 1956 ctx->argc = 0; 1957 r->mstackpos--; 1958 } 1959 1960 void 1961 roff_endparse(struct roff *r) 1962 { 1963 if (r->last != NULL) 1964 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line, 1965 r->last->col, "%s", roff_name[r->last->tok]); 1966 1967 if (r->eqn != NULL) { 1968 mandoc_msg(MANDOCERR_BLK_NOEND, 1969 r->eqn->node->line, r->eqn->node->pos, "EQ"); 1970 eqn_parse(r->eqn); 1971 r->eqn = NULL; 1972 } 1973 1974 if (r->tbl != NULL) { 1975 tbl_end(r->tbl, 1); 1976 r->tbl = NULL; 1977 } 1978 } 1979 1980 /* 1981 * Parse a roff node's type from the input buffer. This must be in the 1982 * form of ".foo xxx" in the usual way. 1983 */ 1984 static enum roff_tok 1985 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 1986 { 1987 char *cp; 1988 const char *mac; 1989 size_t maclen; 1990 int deftype; 1991 enum roff_tok t; 1992 1993 cp = buf + *pos; 1994 1995 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 1996 return TOKEN_NONE; 1997 1998 mac = cp; 1999 maclen = roff_getname(r, &cp, ln, ppos); 2000 2001 deftype = ROFFDEF_USER | ROFFDEF_REN; 2002 r->current_string = roff_getstrn(r, mac, maclen, &deftype); 2003 switch (deftype) { 2004 case ROFFDEF_USER: 2005 t = ROFF_USERDEF; 2006 break; 2007 case ROFFDEF_REN: 2008 t = ROFF_RENAMED; 2009 break; 2010 default: 2011 t = roffhash_find(r->reqtab, mac, maclen); 2012 break; 2013 } 2014 if (t != TOKEN_NONE) 2015 *pos = cp - buf; 2016 else if (deftype == ROFFDEF_UNDEF) { 2017 /* Using an undefined macro defines it to be empty. */ 2018 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); 2019 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); 2020 } 2021 return t; 2022 } 2023 2024 /* --- handling of request blocks ----------------------------------------- */ 2025 2026 static int 2027 roff_cblock(ROFF_ARGS) 2028 { 2029 2030 /* 2031 * A block-close `..' should only be invoked as a child of an 2032 * ignore macro, otherwise raise a warning and just ignore it. 2033 */ 2034 2035 if (r->last == NULL) { 2036 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2037 return ROFF_IGN; 2038 } 2039 2040 switch (r->last->tok) { 2041 case ROFF_am: 2042 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */ 2043 case ROFF_ami: 2044 case ROFF_de: 2045 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 2046 case ROFF_dei: 2047 case ROFF_ig: 2048 break; 2049 default: 2050 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2051 return ROFF_IGN; 2052 } 2053 2054 if (buf->buf[pos] != '\0') 2055 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 2056 ".. %s", buf->buf + pos); 2057 2058 roffnode_pop(r); 2059 roffnode_cleanscope(r); 2060 return ROFF_IGN; 2061 2062 } 2063 2064 /* 2065 * Pop all nodes ending at the end of the current input line. 2066 * Return the number of loops ended. 2067 */ 2068 static int 2069 roffnode_cleanscope(struct roff *r) 2070 { 2071 int inloop; 2072 2073 inloop = 0; 2074 while (r->last != NULL) { 2075 if (--r->last->endspan != 0) 2076 break; 2077 inloop += roffnode_pop(r); 2078 } 2079 return inloop; 2080 } 2081 2082 /* 2083 * Handle the closing \} of a conditional block. 2084 * Apart from generating warnings, this only pops nodes. 2085 * Return the number of loops ended. 2086 */ 2087 static int 2088 roff_ccond(struct roff *r, int ln, int ppos) 2089 { 2090 if (NULL == r->last) { 2091 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2092 return 0; 2093 } 2094 2095 switch (r->last->tok) { 2096 case ROFF_el: 2097 case ROFF_ie: 2098 case ROFF_if: 2099 case ROFF_while: 2100 break; 2101 default: 2102 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2103 return 0; 2104 } 2105 2106 if (r->last->endspan > -1) { 2107 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2108 return 0; 2109 } 2110 2111 return roffnode_pop(r) + roffnode_cleanscope(r); 2112 } 2113 2114 static int 2115 roff_block(ROFF_ARGS) 2116 { 2117 const char *name, *value; 2118 char *call, *cp, *iname, *rname; 2119 size_t csz, namesz, rsz; 2120 int deftype; 2121 2122 /* Ignore groff compatibility mode for now. */ 2123 2124 if (tok == ROFF_de1) 2125 tok = ROFF_de; 2126 else if (tok == ROFF_dei1) 2127 tok = ROFF_dei; 2128 else if (tok == ROFF_am1) 2129 tok = ROFF_am; 2130 else if (tok == ROFF_ami1) 2131 tok = ROFF_ami; 2132 2133 /* Parse the macro name argument. */ 2134 2135 cp = buf->buf + pos; 2136 if (tok == ROFF_ig) { 2137 iname = NULL; 2138 namesz = 0; 2139 } else { 2140 iname = cp; 2141 namesz = roff_getname(r, &cp, ln, ppos); 2142 iname[namesz] = '\0'; 2143 } 2144 2145 /* Resolve the macro name argument if it is indirect. */ 2146 2147 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2148 deftype = ROFFDEF_USER; 2149 name = roff_getstrn(r, iname, namesz, &deftype); 2150 if (name == NULL) { 2151 mandoc_msg(MANDOCERR_STR_UNDEF, 2152 ln, (int)(iname - buf->buf), 2153 "%.*s", (int)namesz, iname); 2154 namesz = 0; 2155 } else 2156 namesz = strlen(name); 2157 } else 2158 name = iname; 2159 2160 if (namesz == 0 && tok != ROFF_ig) { 2161 mandoc_msg(MANDOCERR_REQ_EMPTY, 2162 ln, ppos, "%s", roff_name[tok]); 2163 return ROFF_IGN; 2164 } 2165 2166 roffnode_push(r, tok, name, ln, ppos); 2167 2168 /* 2169 * At the beginning of a `de' macro, clear the existing string 2170 * with the same name, if there is one. New content will be 2171 * appended from roff_block_text() in multiline mode. 2172 */ 2173 2174 if (tok == ROFF_de || tok == ROFF_dei) { 2175 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 2176 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2177 } else if (tok == ROFF_am || tok == ROFF_ami) { 2178 deftype = ROFFDEF_ANY; 2179 value = roff_getstrn(r, iname, namesz, &deftype); 2180 switch (deftype) { /* Before appending, ... */ 2181 case ROFFDEF_PRE: /* copy predefined to user-defined. */ 2182 roff_setstrn(&r->strtab, name, namesz, 2183 value, strlen(value), 0); 2184 break; 2185 case ROFFDEF_REN: /* call original standard macro. */ 2186 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2187 (int)strlen(value), value); 2188 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2189 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2190 free(call); 2191 break; 2192 case ROFFDEF_STD: /* rename and call standard macro. */ 2193 rsz = mandoc_asprintf(&rname, "__%s_renamed", name); 2194 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); 2195 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2196 (int)rsz, rname); 2197 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2198 free(call); 2199 free(rname); 2200 break; 2201 default: 2202 break; 2203 } 2204 } 2205 2206 if (*cp == '\0') 2207 return ROFF_IGN; 2208 2209 /* Get the custom end marker. */ 2210 2211 iname = cp; 2212 namesz = roff_getname(r, &cp, ln, ppos); 2213 2214 /* Resolve the end marker if it is indirect. */ 2215 2216 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2217 deftype = ROFFDEF_USER; 2218 name = roff_getstrn(r, iname, namesz, &deftype); 2219 if (name == NULL) { 2220 mandoc_msg(MANDOCERR_STR_UNDEF, 2221 ln, (int)(iname - buf->buf), 2222 "%.*s", (int)namesz, iname); 2223 namesz = 0; 2224 } else 2225 namesz = strlen(name); 2226 } else 2227 name = iname; 2228 2229 if (namesz) 2230 r->last->end = mandoc_strndup(name, namesz); 2231 2232 if (*cp != '\0') 2233 mandoc_msg(MANDOCERR_ARG_EXCESS, 2234 ln, pos, ".%s ... %s", roff_name[tok], cp); 2235 2236 return ROFF_IGN; 2237 } 2238 2239 static int 2240 roff_block_sub(ROFF_ARGS) 2241 { 2242 enum roff_tok t; 2243 int i, j; 2244 2245 /* 2246 * First check whether a custom macro exists at this level. If 2247 * it does, then check against it. This is some of groff's 2248 * stranger behaviours. If we encountered a custom end-scope 2249 * tag and that tag also happens to be a "real" macro, then we 2250 * need to try interpreting it again as a real macro. If it's 2251 * not, then return ignore. Else continue. 2252 */ 2253 2254 if (r->last->end) { 2255 for (i = pos, j = 0; r->last->end[j]; j++, i++) 2256 if (buf->buf[i] != r->last->end[j]) 2257 break; 2258 2259 if (r->last->end[j] == '\0' && 2260 (buf->buf[i] == '\0' || 2261 buf->buf[i] == ' ' || 2262 buf->buf[i] == '\t')) { 2263 roffnode_pop(r); 2264 roffnode_cleanscope(r); 2265 2266 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 2267 i++; 2268 2269 pos = i; 2270 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 2271 TOKEN_NONE) 2272 return ROFF_RERUN; 2273 return ROFF_IGN; 2274 } 2275 } 2276 2277 /* 2278 * If we have no custom end-query or lookup failed, then try 2279 * pulling it out of the hashtable. 2280 */ 2281 2282 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2283 2284 if (t != ROFF_cblock) { 2285 if (tok != ROFF_ig) 2286 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 2287 return ROFF_IGN; 2288 } 2289 2290 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2291 } 2292 2293 static int 2294 roff_block_text(ROFF_ARGS) 2295 { 2296 2297 if (tok != ROFF_ig) 2298 roff_setstr(r, r->last->name, buf->buf + pos, 2); 2299 2300 return ROFF_IGN; 2301 } 2302 2303 static int 2304 roff_cond_sub(ROFF_ARGS) 2305 { 2306 struct roffnode *bl; 2307 char *ep; 2308 int endloop, irc, rr; 2309 enum roff_tok t; 2310 2311 irc = ROFF_IGN; 2312 rr = r->last->rule; 2313 endloop = tok != ROFF_while ? ROFF_IGN : 2314 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2315 if (roffnode_cleanscope(r)) 2316 irc |= endloop; 2317 2318 /* 2319 * If `\}' occurs on a macro line without a preceding macro, 2320 * drop the line completely. 2321 */ 2322 2323 ep = buf->buf + pos; 2324 if (ep[0] == '\\' && ep[1] == '}') 2325 rr = 0; 2326 2327 /* 2328 * The closing delimiter `\}' rewinds the conditional scope 2329 * but is otherwise ignored when interpreting the line. 2330 */ 2331 2332 while ((ep = strchr(ep, '\\')) != NULL) { 2333 switch (ep[1]) { 2334 case '}': 2335 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2336 if (roff_ccond(r, ln, ep - buf->buf)) 2337 irc |= endloop; 2338 break; 2339 case '\0': 2340 ++ep; 2341 break; 2342 default: 2343 ep += 2; 2344 break; 2345 } 2346 } 2347 2348 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2349 2350 /* For now, let high level macros abort .ce mode. */ 2351 2352 if (roffce_node != NULL && 2353 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 2354 t == ROFF_TH || t == ROFF_TS)) { 2355 r->man->last = roffce_node; 2356 r->man->next = ROFF_NEXT_SIBLING; 2357 roffce_lines = 0; 2358 roffce_node = NULL; 2359 } 2360 2361 /* 2362 * Fully handle known macros when they are structurally 2363 * required or when the conditional evaluated to true. 2364 */ 2365 2366 if (t == ROFF_break) { 2367 if (irc & ROFF_LOOPMASK) 2368 irc = ROFF_IGN | ROFF_LOOPEXIT; 2369 else if (rr) { 2370 for (bl = r->last; bl != NULL; bl = bl->parent) { 2371 bl->rule = 0; 2372 if (bl->tok == ROFF_while) 2373 break; 2374 } 2375 } 2376 } else if (t != TOKEN_NONE && 2377 (rr || roffs[t].flags & ROFFMAC_STRUCT)) 2378 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2379 else 2380 irc |= rr ? ROFF_CONT : ROFF_IGN; 2381 return irc; 2382 } 2383 2384 static int 2385 roff_cond_text(ROFF_ARGS) 2386 { 2387 char *ep; 2388 int endloop, irc, rr; 2389 2390 irc = ROFF_IGN; 2391 rr = r->last->rule; 2392 endloop = tok != ROFF_while ? ROFF_IGN : 2393 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2394 if (roffnode_cleanscope(r)) 2395 irc |= endloop; 2396 2397 /* 2398 * If `\}' occurs on a text line with neither preceding 2399 * nor following characters, drop the line completely. 2400 */ 2401 2402 ep = buf->buf + pos; 2403 if (strcmp(ep, "\\}") == 0) 2404 rr = 0; 2405 2406 /* 2407 * The closing delimiter `\}' rewinds the conditional scope 2408 * but is otherwise ignored when interpreting the line. 2409 */ 2410 2411 while ((ep = strchr(ep, '\\')) != NULL) { 2412 switch (ep[1]) { 2413 case '}': 2414 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2415 if (roff_ccond(r, ln, ep - buf->buf)) 2416 irc |= endloop; 2417 break; 2418 case '\0': 2419 ++ep; 2420 break; 2421 default: 2422 ep += 2; 2423 break; 2424 } 2425 } 2426 if (rr) 2427 irc |= ROFF_CONT; 2428 return irc; 2429 } 2430 2431 /* --- handling of numeric and conditional expressions -------------------- */ 2432 2433 /* 2434 * Parse a single signed integer number. Stop at the first non-digit. 2435 * If there is at least one digit, return success and advance the 2436 * parse point, else return failure and let the parse point unchanged. 2437 * Ignore overflows, treat them just like the C language. 2438 */ 2439 static int 2440 roff_getnum(const char *v, int *pos, int *res, int flags) 2441 { 2442 int myres, scaled, n, p; 2443 2444 if (NULL == res) 2445 res = &myres; 2446 2447 p = *pos; 2448 n = v[p] == '-'; 2449 if (n || v[p] == '+') 2450 p++; 2451 2452 if (flags & ROFFNUM_WHITE) 2453 while (isspace((unsigned char)v[p])) 2454 p++; 2455 2456 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2457 *res = 10 * *res + v[p] - '0'; 2458 if (p == *pos + n) 2459 return 0; 2460 2461 if (n) 2462 *res = -*res; 2463 2464 /* Each number may be followed by one optional scaling unit. */ 2465 2466 switch (v[p]) { 2467 case 'f': 2468 scaled = *res * 65536; 2469 break; 2470 case 'i': 2471 scaled = *res * 240; 2472 break; 2473 case 'c': 2474 scaled = *res * 240 / 2.54; 2475 break; 2476 case 'v': 2477 case 'P': 2478 scaled = *res * 40; 2479 break; 2480 case 'm': 2481 case 'n': 2482 scaled = *res * 24; 2483 break; 2484 case 'p': 2485 scaled = *res * 10 / 3; 2486 break; 2487 case 'u': 2488 scaled = *res; 2489 break; 2490 case 'M': 2491 scaled = *res * 6 / 25; 2492 break; 2493 default: 2494 scaled = *res; 2495 p--; 2496 break; 2497 } 2498 if (flags & ROFFNUM_SCALE) 2499 *res = scaled; 2500 2501 *pos = p + 1; 2502 return 1; 2503 } 2504 2505 /* 2506 * Evaluate a string comparison condition. 2507 * The first character is the delimiter. 2508 * Succeed if the string up to its second occurrence 2509 * matches the string up to its third occurence. 2510 * Advance the cursor after the third occurrence 2511 * or lacking that, to the end of the line. 2512 */ 2513 static int 2514 roff_evalstrcond(const char *v, int *pos) 2515 { 2516 const char *s1, *s2, *s3; 2517 int match; 2518 2519 match = 0; 2520 s1 = v + *pos; /* initial delimiter */ 2521 s2 = s1 + 1; /* for scanning the first string */ 2522 s3 = strchr(s2, *s1); /* for scanning the second string */ 2523 2524 if (NULL == s3) /* found no middle delimiter */ 2525 goto out; 2526 2527 while ('\0' != *++s3) { 2528 if (*s2 != *s3) { /* mismatch */ 2529 s3 = strchr(s3, *s1); 2530 break; 2531 } 2532 if (*s3 == *s1) { /* found the final delimiter */ 2533 match = 1; 2534 break; 2535 } 2536 s2++; 2537 } 2538 2539 out: 2540 if (NULL == s3) 2541 s3 = strchr(s2, '\0'); 2542 else if (*s3 != '\0') 2543 s3++; 2544 *pos = s3 - v; 2545 return match; 2546 } 2547 2548 /* 2549 * Evaluate an optionally negated single character, numerical, 2550 * or string condition. 2551 */ 2552 static int 2553 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2554 { 2555 const char *start, *end; 2556 char *cp, *name; 2557 size_t sz; 2558 int deftype, len, number, savepos, istrue, wanttrue; 2559 2560 if ('!' == v[*pos]) { 2561 wanttrue = 0; 2562 (*pos)++; 2563 } else 2564 wanttrue = 1; 2565 2566 switch (v[*pos]) { 2567 case '\0': 2568 return 0; 2569 case 'n': 2570 case 'o': 2571 (*pos)++; 2572 return wanttrue; 2573 case 'e': 2574 case 't': 2575 case 'v': 2576 (*pos)++; 2577 return !wanttrue; 2578 case 'c': 2579 do { 2580 (*pos)++; 2581 } while (v[*pos] == ' '); 2582 2583 /* 2584 * Quirk for groff compatibility: 2585 * The horizontal tab is neither available nor unavailable. 2586 */ 2587 2588 if (v[*pos] == '\t') { 2589 (*pos)++; 2590 return 0; 2591 } 2592 2593 /* Printable ASCII characters are available. */ 2594 2595 if (v[*pos] != '\\') { 2596 (*pos)++; 2597 return wanttrue; 2598 } 2599 2600 end = v + ++*pos; 2601 switch (mandoc_escape(&end, &start, &len)) { 2602 case ESCAPE_SPECIAL: 2603 istrue = mchars_spec2cp(start, len) != -1; 2604 break; 2605 case ESCAPE_UNICODE: 2606 istrue = 1; 2607 break; 2608 case ESCAPE_NUMBERED: 2609 istrue = mchars_num2char(start, len) != -1; 2610 break; 2611 default: 2612 istrue = !wanttrue; 2613 break; 2614 } 2615 *pos = end - v; 2616 return istrue == wanttrue; 2617 case 'd': 2618 case 'r': 2619 cp = v + *pos + 1; 2620 while (*cp == ' ') 2621 cp++; 2622 name = cp; 2623 sz = roff_getname(r, &cp, ln, cp - v); 2624 if (sz == 0) 2625 istrue = 0; 2626 else if (v[*pos] == 'r') 2627 istrue = roff_hasregn(r, name, sz); 2628 else { 2629 deftype = ROFFDEF_ANY; 2630 roff_getstrn(r, name, sz, &deftype); 2631 istrue = !!deftype; 2632 } 2633 *pos = (name + sz) - v; 2634 return istrue == wanttrue; 2635 default: 2636 break; 2637 } 2638 2639 savepos = *pos; 2640 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2641 return (number > 0) == wanttrue; 2642 else if (*pos == savepos) 2643 return roff_evalstrcond(v, pos) == wanttrue; 2644 else 2645 return 0; 2646 } 2647 2648 static int 2649 roff_line_ignore(ROFF_ARGS) 2650 { 2651 2652 return ROFF_IGN; 2653 } 2654 2655 static int 2656 roff_insec(ROFF_ARGS) 2657 { 2658 2659 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]); 2660 return ROFF_IGN; 2661 } 2662 2663 static int 2664 roff_unsupp(ROFF_ARGS) 2665 { 2666 2667 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]); 2668 return ROFF_IGN; 2669 } 2670 2671 static int 2672 roff_cond(ROFF_ARGS) 2673 { 2674 int irc; 2675 2676 roffnode_push(r, tok, NULL, ln, ppos); 2677 2678 /* 2679 * An `.el' has no conditional body: it will consume the value 2680 * of the current rstack entry set in prior `ie' calls or 2681 * defaults to DENY. 2682 * 2683 * If we're not an `el', however, then evaluate the conditional. 2684 */ 2685 2686 r->last->rule = tok == ROFF_el ? 2687 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2688 roff_evalcond(r, ln, buf->buf, &pos); 2689 2690 /* 2691 * An if-else will put the NEGATION of the current evaluated 2692 * conditional into the stack of rules. 2693 */ 2694 2695 if (tok == ROFF_ie) { 2696 if (r->rstackpos + 1 == r->rstacksz) { 2697 r->rstacksz += 16; 2698 r->rstack = mandoc_reallocarray(r->rstack, 2699 r->rstacksz, sizeof(int)); 2700 } 2701 r->rstack[++r->rstackpos] = !r->last->rule; 2702 } 2703 2704 /* If the parent has false as its rule, then so do we. */ 2705 2706 if (r->last->parent && !r->last->parent->rule) 2707 r->last->rule = 0; 2708 2709 /* 2710 * Determine scope. 2711 * If there is nothing on the line after the conditional, 2712 * not even whitespace, use next-line scope. 2713 * Except that .while does not support next-line scope. 2714 */ 2715 2716 if (buf->buf[pos] == '\0' && tok != ROFF_while) { 2717 r->last->endspan = 2; 2718 goto out; 2719 } 2720 2721 while (buf->buf[pos] == ' ') 2722 pos++; 2723 2724 /* An opening brace requests multiline scope. */ 2725 2726 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2727 r->last->endspan = -1; 2728 pos += 2; 2729 while (buf->buf[pos] == ' ') 2730 pos++; 2731 goto out; 2732 } 2733 2734 /* 2735 * Anything else following the conditional causes 2736 * single-line scope. Warn if the scope contains 2737 * nothing but trailing whitespace. 2738 */ 2739 2740 if (buf->buf[pos] == '\0') 2741 mandoc_msg(MANDOCERR_COND_EMPTY, 2742 ln, ppos, "%s", roff_name[tok]); 2743 2744 r->last->endspan = 1; 2745 2746 out: 2747 *offs = pos; 2748 irc = ROFF_RERUN; 2749 if (tok == ROFF_while) 2750 irc |= ROFF_WHILE; 2751 return irc; 2752 } 2753 2754 static int 2755 roff_ds(ROFF_ARGS) 2756 { 2757 char *string; 2758 const char *name; 2759 size_t namesz; 2760 2761 /* Ignore groff compatibility mode for now. */ 2762 2763 if (tok == ROFF_ds1) 2764 tok = ROFF_ds; 2765 else if (tok == ROFF_as1) 2766 tok = ROFF_as; 2767 2768 /* 2769 * The first word is the name of the string. 2770 * If it is empty or terminated by an escape sequence, 2771 * abort the `ds' request without defining anything. 2772 */ 2773 2774 name = string = buf->buf + pos; 2775 if (*name == '\0') 2776 return ROFF_IGN; 2777 2778 namesz = roff_getname(r, &string, ln, pos); 2779 switch (name[namesz]) { 2780 case '\\': 2781 return ROFF_IGN; 2782 case '\t': 2783 string = buf->buf + pos + namesz; 2784 break; 2785 default: 2786 break; 2787 } 2788 2789 /* Read past the initial double-quote, if any. */ 2790 if (*string == '"') 2791 string++; 2792 2793 /* The rest is the value. */ 2794 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2795 ROFF_as == tok); 2796 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2797 return ROFF_IGN; 2798 } 2799 2800 /* 2801 * Parse a single operator, one or two characters long. 2802 * If the operator is recognized, return success and advance the 2803 * parse point, else return failure and let the parse point unchanged. 2804 */ 2805 static int 2806 roff_getop(const char *v, int *pos, char *res) 2807 { 2808 2809 *res = v[*pos]; 2810 2811 switch (*res) { 2812 case '+': 2813 case '-': 2814 case '*': 2815 case '/': 2816 case '%': 2817 case '&': 2818 case ':': 2819 break; 2820 case '<': 2821 switch (v[*pos + 1]) { 2822 case '=': 2823 *res = 'l'; 2824 (*pos)++; 2825 break; 2826 case '>': 2827 *res = '!'; 2828 (*pos)++; 2829 break; 2830 case '?': 2831 *res = 'i'; 2832 (*pos)++; 2833 break; 2834 default: 2835 break; 2836 } 2837 break; 2838 case '>': 2839 switch (v[*pos + 1]) { 2840 case '=': 2841 *res = 'g'; 2842 (*pos)++; 2843 break; 2844 case '?': 2845 *res = 'a'; 2846 (*pos)++; 2847 break; 2848 default: 2849 break; 2850 } 2851 break; 2852 case '=': 2853 if ('=' == v[*pos + 1]) 2854 (*pos)++; 2855 break; 2856 default: 2857 return 0; 2858 } 2859 (*pos)++; 2860 2861 return *res; 2862 } 2863 2864 /* 2865 * Evaluate either a parenthesized numeric expression 2866 * or a single signed integer number. 2867 */ 2868 static int 2869 roff_evalpar(struct roff *r, int ln, 2870 const char *v, int *pos, int *res, int flags) 2871 { 2872 2873 if ('(' != v[*pos]) 2874 return roff_getnum(v, pos, res, flags); 2875 2876 (*pos)++; 2877 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2878 return 0; 2879 2880 /* 2881 * Omission of the closing parenthesis 2882 * is an error in validation mode, 2883 * but ignored in evaluation mode. 2884 */ 2885 2886 if (')' == v[*pos]) 2887 (*pos)++; 2888 else if (NULL == res) 2889 return 0; 2890 2891 return 1; 2892 } 2893 2894 /* 2895 * Evaluate a complete numeric expression. 2896 * Proceed left to right, there is no concept of precedence. 2897 */ 2898 static int 2899 roff_evalnum(struct roff *r, int ln, const char *v, 2900 int *pos, int *res, int flags) 2901 { 2902 int mypos, operand2; 2903 char operator; 2904 2905 if (NULL == pos) { 2906 mypos = 0; 2907 pos = &mypos; 2908 } 2909 2910 if (flags & ROFFNUM_WHITE) 2911 while (isspace((unsigned char)v[*pos])) 2912 (*pos)++; 2913 2914 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2915 return 0; 2916 2917 while (1) { 2918 if (flags & ROFFNUM_WHITE) 2919 while (isspace((unsigned char)v[*pos])) 2920 (*pos)++; 2921 2922 if ( ! roff_getop(v, pos, &operator)) 2923 break; 2924 2925 if (flags & ROFFNUM_WHITE) 2926 while (isspace((unsigned char)v[*pos])) 2927 (*pos)++; 2928 2929 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2930 return 0; 2931 2932 if (flags & ROFFNUM_WHITE) 2933 while (isspace((unsigned char)v[*pos])) 2934 (*pos)++; 2935 2936 if (NULL == res) 2937 continue; 2938 2939 switch (operator) { 2940 case '+': 2941 *res += operand2; 2942 break; 2943 case '-': 2944 *res -= operand2; 2945 break; 2946 case '*': 2947 *res *= operand2; 2948 break; 2949 case '/': 2950 if (operand2 == 0) { 2951 mandoc_msg(MANDOCERR_DIVZERO, 2952 ln, *pos, "%s", v); 2953 *res = 0; 2954 break; 2955 } 2956 *res /= operand2; 2957 break; 2958 case '%': 2959 if (operand2 == 0) { 2960 mandoc_msg(MANDOCERR_DIVZERO, 2961 ln, *pos, "%s", v); 2962 *res = 0; 2963 break; 2964 } 2965 *res %= operand2; 2966 break; 2967 case '<': 2968 *res = *res < operand2; 2969 break; 2970 case '>': 2971 *res = *res > operand2; 2972 break; 2973 case 'l': 2974 *res = *res <= operand2; 2975 break; 2976 case 'g': 2977 *res = *res >= operand2; 2978 break; 2979 case '=': 2980 *res = *res == operand2; 2981 break; 2982 case '!': 2983 *res = *res != operand2; 2984 break; 2985 case '&': 2986 *res = *res && operand2; 2987 break; 2988 case ':': 2989 *res = *res || operand2; 2990 break; 2991 case 'i': 2992 if (operand2 < *res) 2993 *res = operand2; 2994 break; 2995 case 'a': 2996 if (operand2 > *res) 2997 *res = operand2; 2998 break; 2999 default: 3000 abort(); 3001 } 3002 } 3003 return 1; 3004 } 3005 3006 /* --- register management ------------------------------------------------ */ 3007 3008 void 3009 roff_setreg(struct roff *r, const char *name, int val, char sign) 3010 { 3011 roff_setregn(r, name, strlen(name), val, sign, INT_MIN); 3012 } 3013 3014 static void 3015 roff_setregn(struct roff *r, const char *name, size_t len, 3016 int val, char sign, int step) 3017 { 3018 struct roffreg *reg; 3019 3020 /* Search for an existing register with the same name. */ 3021 reg = r->regtab; 3022 3023 while (reg != NULL && (reg->key.sz != len || 3024 strncmp(reg->key.p, name, len) != 0)) 3025 reg = reg->next; 3026 3027 if (NULL == reg) { 3028 /* Create a new register. */ 3029 reg = mandoc_malloc(sizeof(struct roffreg)); 3030 reg->key.p = mandoc_strndup(name, len); 3031 reg->key.sz = len; 3032 reg->val = 0; 3033 reg->step = 0; 3034 reg->next = r->regtab; 3035 r->regtab = reg; 3036 } 3037 3038 if ('+' == sign) 3039 reg->val += val; 3040 else if ('-' == sign) 3041 reg->val -= val; 3042 else 3043 reg->val = val; 3044 if (step != INT_MIN) 3045 reg->step = step; 3046 } 3047 3048 /* 3049 * Handle some predefined read-only number registers. 3050 * For now, return -1 if the requested register is not predefined; 3051 * in case a predefined read-only register having the value -1 3052 * were to turn up, another special value would have to be chosen. 3053 */ 3054 static int 3055 roff_getregro(const struct roff *r, const char *name) 3056 { 3057 3058 switch (*name) { 3059 case '$': /* Number of arguments of the last macro evaluated. */ 3060 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; 3061 case 'A': /* ASCII approximation mode is always off. */ 3062 return 0; 3063 case 'g': /* Groff compatibility mode is always on. */ 3064 return 1; 3065 case 'H': /* Fixed horizontal resolution. */ 3066 return 24; 3067 case 'j': /* Always adjust left margin only. */ 3068 return 0; 3069 case 'T': /* Some output device is always defined. */ 3070 return 1; 3071 case 'V': /* Fixed vertical resolution. */ 3072 return 40; 3073 default: 3074 return -1; 3075 } 3076 } 3077 3078 int 3079 roff_getreg(struct roff *r, const char *name) 3080 { 3081 return roff_getregn(r, name, strlen(name), '\0'); 3082 } 3083 3084 static int 3085 roff_getregn(struct roff *r, const char *name, size_t len, char sign) 3086 { 3087 struct roffreg *reg; 3088 int val; 3089 3090 if ('.' == name[0] && 2 == len) { 3091 val = roff_getregro(r, name + 1); 3092 if (-1 != val) 3093 return val; 3094 } 3095 3096 for (reg = r->regtab; reg; reg = reg->next) { 3097 if (len == reg->key.sz && 3098 0 == strncmp(name, reg->key.p, len)) { 3099 switch (sign) { 3100 case '+': 3101 reg->val += reg->step; 3102 break; 3103 case '-': 3104 reg->val -= reg->step; 3105 break; 3106 default: 3107 break; 3108 } 3109 return reg->val; 3110 } 3111 } 3112 3113 roff_setregn(r, name, len, 0, '\0', INT_MIN); 3114 return 0; 3115 } 3116 3117 static int 3118 roff_hasregn(const struct roff *r, const char *name, size_t len) 3119 { 3120 struct roffreg *reg; 3121 int val; 3122 3123 if ('.' == name[0] && 2 == len) { 3124 val = roff_getregro(r, name + 1); 3125 if (-1 != val) 3126 return 1; 3127 } 3128 3129 for (reg = r->regtab; reg; reg = reg->next) 3130 if (len == reg->key.sz && 3131 0 == strncmp(name, reg->key.p, len)) 3132 return 1; 3133 3134 return 0; 3135 } 3136 3137 static void 3138 roff_freereg(struct roffreg *reg) 3139 { 3140 struct roffreg *old_reg; 3141 3142 while (NULL != reg) { 3143 free(reg->key.p); 3144 old_reg = reg; 3145 reg = reg->next; 3146 free(old_reg); 3147 } 3148 } 3149 3150 static int 3151 roff_nr(ROFF_ARGS) 3152 { 3153 char *key, *val, *step; 3154 size_t keysz; 3155 int iv, is, len; 3156 char sign; 3157 3158 key = val = buf->buf + pos; 3159 if (*key == '\0') 3160 return ROFF_IGN; 3161 3162 keysz = roff_getname(r, &val, ln, pos); 3163 if (key[keysz] == '\\' || key[keysz] == '\t') 3164 return ROFF_IGN; 3165 3166 sign = *val; 3167 if (sign == '+' || sign == '-') 3168 val++; 3169 3170 len = 0; 3171 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) 3172 return ROFF_IGN; 3173 3174 step = val + len; 3175 while (isspace((unsigned char)*step)) 3176 step++; 3177 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) 3178 is = INT_MIN; 3179 3180 roff_setregn(r, key, keysz, iv, sign, is); 3181 return ROFF_IGN; 3182 } 3183 3184 static int 3185 roff_rr(ROFF_ARGS) 3186 { 3187 struct roffreg *reg, **prev; 3188 char *name, *cp; 3189 size_t namesz; 3190 3191 name = cp = buf->buf + pos; 3192 if (*name == '\0') 3193 return ROFF_IGN; 3194 namesz = roff_getname(r, &cp, ln, pos); 3195 name[namesz] = '\0'; 3196 3197 prev = &r->regtab; 3198 while (1) { 3199 reg = *prev; 3200 if (reg == NULL || !strcmp(name, reg->key.p)) 3201 break; 3202 prev = ®->next; 3203 } 3204 if (reg != NULL) { 3205 *prev = reg->next; 3206 free(reg->key.p); 3207 free(reg); 3208 } 3209 return ROFF_IGN; 3210 } 3211 3212 /* --- handler functions for roff requests -------------------------------- */ 3213 3214 static int 3215 roff_rm(ROFF_ARGS) 3216 { 3217 const char *name; 3218 char *cp; 3219 size_t namesz; 3220 3221 cp = buf->buf + pos; 3222 while (*cp != '\0') { 3223 name = cp; 3224 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 3225 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 3226 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3227 if (name[namesz] == '\\' || name[namesz] == '\t') 3228 break; 3229 } 3230 return ROFF_IGN; 3231 } 3232 3233 static int 3234 roff_it(ROFF_ARGS) 3235 { 3236 int iv; 3237 3238 /* Parse the number of lines. */ 3239 3240 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 3241 mandoc_msg(MANDOCERR_IT_NONUM, 3242 ln, ppos, "%s", buf->buf + 1); 3243 return ROFF_IGN; 3244 } 3245 3246 while (isspace((unsigned char)buf->buf[pos])) 3247 pos++; 3248 3249 /* 3250 * Arm the input line trap. 3251 * Special-casing "an-trap" is an ugly workaround to cope 3252 * with DocBook stupidly fiddling with man(7) internals. 3253 */ 3254 3255 roffit_lines = iv; 3256 roffit_macro = mandoc_strdup(iv != 1 || 3257 strcmp(buf->buf + pos, "an-trap") ? 3258 buf->buf + pos : "br"); 3259 return ROFF_IGN; 3260 } 3261 3262 static int 3263 roff_Dd(ROFF_ARGS) 3264 { 3265 int mask; 3266 enum roff_tok t, te; 3267 3268 switch (tok) { 3269 case ROFF_Dd: 3270 tok = MDOC_Dd; 3271 te = MDOC_MAX; 3272 if (r->format == 0) 3273 r->format = MPARSE_MDOC; 3274 mask = MPARSE_MDOC | MPARSE_QUICK; 3275 break; 3276 case ROFF_TH: 3277 tok = MAN_TH; 3278 te = MAN_MAX; 3279 if (r->format == 0) 3280 r->format = MPARSE_MAN; 3281 mask = MPARSE_QUICK; 3282 break; 3283 default: 3284 abort(); 3285 } 3286 if ((r->options & mask) == 0) 3287 for (t = tok; t < te; t++) 3288 roff_setstr(r, roff_name[t], NULL, 0); 3289 return ROFF_CONT; 3290 } 3291 3292 static int 3293 roff_TE(ROFF_ARGS) 3294 { 3295 r->man->flags &= ~ROFF_NONOFILL; 3296 if (r->tbl == NULL) { 3297 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE"); 3298 return ROFF_IGN; 3299 } 3300 if (tbl_end(r->tbl, 0) == 0) { 3301 r->tbl = NULL; 3302 free(buf->buf); 3303 buf->buf = mandoc_strdup(".sp"); 3304 buf->sz = 4; 3305 *offs = 0; 3306 return ROFF_REPARSE; 3307 } 3308 r->tbl = NULL; 3309 return ROFF_IGN; 3310 } 3311 3312 static int 3313 roff_T_(ROFF_ARGS) 3314 { 3315 3316 if (NULL == r->tbl) 3317 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&"); 3318 else 3319 tbl_restart(ln, ppos, r->tbl); 3320 3321 return ROFF_IGN; 3322 } 3323 3324 /* 3325 * Handle in-line equation delimiters. 3326 */ 3327 static int 3328 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 3329 { 3330 char *cp1, *cp2; 3331 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 3332 3333 /* 3334 * Outside equations, look for an opening delimiter. 3335 * If we are inside an equation, we already know it is 3336 * in-line, or this function wouldn't have been called; 3337 * so look for a closing delimiter. 3338 */ 3339 3340 cp1 = buf->buf + pos; 3341 cp2 = strchr(cp1, r->eqn == NULL ? 3342 r->last_eqn->odelim : r->last_eqn->cdelim); 3343 if (cp2 == NULL) 3344 return ROFF_CONT; 3345 3346 *cp2++ = '\0'; 3347 bef_pr = bef_nl = aft_nl = aft_pr = ""; 3348 3349 /* Handle preceding text, protecting whitespace. */ 3350 3351 if (*buf->buf != '\0') { 3352 if (r->eqn == NULL) 3353 bef_pr = "\\&"; 3354 bef_nl = "\n"; 3355 } 3356 3357 /* 3358 * Prepare replacing the delimiter with an equation macro 3359 * and drop leading white space from the equation. 3360 */ 3361 3362 if (r->eqn == NULL) { 3363 while (*cp2 == ' ') 3364 cp2++; 3365 mac = ".EQ"; 3366 } else 3367 mac = ".EN"; 3368 3369 /* Handle following text, protecting whitespace. */ 3370 3371 if (*cp2 != '\0') { 3372 aft_nl = "\n"; 3373 if (r->eqn != NULL) 3374 aft_pr = "\\&"; 3375 } 3376 3377 /* Do the actual replacement. */ 3378 3379 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 3380 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 3381 free(buf->buf); 3382 buf->buf = cp1; 3383 3384 /* Toggle the in-line state of the eqn subsystem. */ 3385 3386 r->eqn_inline = r->eqn == NULL; 3387 return ROFF_REPARSE; 3388 } 3389 3390 static int 3391 roff_EQ(ROFF_ARGS) 3392 { 3393 struct roff_node *n; 3394 3395 if (r->man->meta.macroset == MACROSET_MAN) 3396 man_breakscope(r->man, ROFF_EQ); 3397 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); 3398 if (ln > r->man->last->line) 3399 n->flags |= NODE_LINE; 3400 n->eqn = eqn_box_new(); 3401 roff_node_append(r->man, n); 3402 r->man->next = ROFF_NEXT_SIBLING; 3403 3404 assert(r->eqn == NULL); 3405 if (r->last_eqn == NULL) 3406 r->last_eqn = eqn_alloc(); 3407 else 3408 eqn_reset(r->last_eqn); 3409 r->eqn = r->last_eqn; 3410 r->eqn->node = n; 3411 3412 if (buf->buf[pos] != '\0') 3413 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3414 ".EQ %s", buf->buf + pos); 3415 3416 return ROFF_IGN; 3417 } 3418 3419 static int 3420 roff_EN(ROFF_ARGS) 3421 { 3422 if (r->eqn != NULL) { 3423 eqn_parse(r->eqn); 3424 r->eqn = NULL; 3425 } else 3426 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN"); 3427 if (buf->buf[pos] != '\0') 3428 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3429 "EN %s", buf->buf + pos); 3430 return ROFF_IGN; 3431 } 3432 3433 static int 3434 roff_TS(ROFF_ARGS) 3435 { 3436 if (r->tbl != NULL) { 3437 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); 3438 tbl_end(r->tbl, 0); 3439 } 3440 r->man->flags |= ROFF_NONOFILL; 3441 r->tbl = tbl_alloc(ppos, ln, r->last_tbl); 3442 if (r->last_tbl == NULL) 3443 r->first_tbl = r->tbl; 3444 r->last_tbl = r->tbl; 3445 return ROFF_IGN; 3446 } 3447 3448 static int 3449 roff_noarg(ROFF_ARGS) 3450 { 3451 if (r->man->flags & (MAN_BLINE | MAN_ELINE)) 3452 man_breakscope(r->man, tok); 3453 if (tok == ROFF_brp) 3454 tok = ROFF_br; 3455 roff_elem_alloc(r->man, ln, ppos, tok); 3456 if (buf->buf[pos] != '\0') 3457 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3458 "%s %s", roff_name[tok], buf->buf + pos); 3459 if (tok == ROFF_nf) 3460 r->man->flags |= ROFF_NOFILL; 3461 else if (tok == ROFF_fi) 3462 r->man->flags &= ~ROFF_NOFILL; 3463 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3464 r->man->next = ROFF_NEXT_SIBLING; 3465 return ROFF_IGN; 3466 } 3467 3468 static int 3469 roff_onearg(ROFF_ARGS) 3470 { 3471 struct roff_node *n; 3472 char *cp; 3473 int npos; 3474 3475 if (r->man->flags & (MAN_BLINE | MAN_ELINE) && 3476 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || 3477 tok == ROFF_ti)) 3478 man_breakscope(r->man, tok); 3479 3480 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { 3481 r->man->last = roffce_node; 3482 r->man->next = ROFF_NEXT_SIBLING; 3483 } 3484 3485 roff_elem_alloc(r->man, ln, ppos, tok); 3486 n = r->man->last; 3487 3488 cp = buf->buf + pos; 3489 if (*cp != '\0') { 3490 while (*cp != '\0' && *cp != ' ') 3491 cp++; 3492 while (*cp == ' ') 3493 *cp++ = '\0'; 3494 if (*cp != '\0') 3495 mandoc_msg(MANDOCERR_ARG_EXCESS, 3496 ln, (int)(cp - buf->buf), 3497 "%s ... %s", roff_name[tok], cp); 3498 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3499 } 3500 3501 if (tok == ROFF_ce || tok == ROFF_rj) { 3502 if (r->man->last->type == ROFFT_ELEM) { 3503 roff_word_alloc(r->man, ln, pos, "1"); 3504 r->man->last->flags |= NODE_NOSRC; 3505 } 3506 npos = 0; 3507 if (roff_evalnum(r, ln, r->man->last->string, &npos, 3508 &roffce_lines, 0) == 0) { 3509 mandoc_msg(MANDOCERR_CE_NONUM, 3510 ln, pos, "ce %s", buf->buf + pos); 3511 roffce_lines = 1; 3512 } 3513 if (roffce_lines < 1) { 3514 r->man->last = r->man->last->parent; 3515 roffce_node = NULL; 3516 roffce_lines = 0; 3517 } else 3518 roffce_node = r->man->last->parent; 3519 } else { 3520 n->flags |= NODE_VALID | NODE_ENDED; 3521 r->man->last = n; 3522 } 3523 n->flags |= NODE_LINE; 3524 r->man->next = ROFF_NEXT_SIBLING; 3525 return ROFF_IGN; 3526 } 3527 3528 static int 3529 roff_manyarg(ROFF_ARGS) 3530 { 3531 struct roff_node *n; 3532 char *sp, *ep; 3533 3534 roff_elem_alloc(r->man, ln, ppos, tok); 3535 n = r->man->last; 3536 3537 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { 3538 while (*ep != '\0' && *ep != ' ') 3539 ep++; 3540 while (*ep == ' ') 3541 *ep++ = '\0'; 3542 roff_word_alloc(r->man, ln, sp - buf->buf, sp); 3543 } 3544 3545 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3546 r->man->last = n; 3547 r->man->next = ROFF_NEXT_SIBLING; 3548 return ROFF_IGN; 3549 } 3550 3551 static int 3552 roff_als(ROFF_ARGS) 3553 { 3554 char *oldn, *newn, *end, *value; 3555 size_t oldsz, newsz, valsz; 3556 3557 newn = oldn = buf->buf + pos; 3558 if (*newn == '\0') 3559 return ROFF_IGN; 3560 3561 newsz = roff_getname(r, &oldn, ln, pos); 3562 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0') 3563 return ROFF_IGN; 3564 3565 end = oldn; 3566 oldsz = roff_getname(r, &end, ln, oldn - buf->buf); 3567 if (oldsz == 0) 3568 return ROFF_IGN; 3569 3570 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", 3571 (int)oldsz, oldn); 3572 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); 3573 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3574 free(value); 3575 return ROFF_IGN; 3576 } 3577 3578 /* 3579 * The .break request only makes sense inside conditionals, 3580 * and that case is already handled in roff_cond_sub(). 3581 */ 3582 static int 3583 roff_break(ROFF_ARGS) 3584 { 3585 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break"); 3586 return ROFF_IGN; 3587 } 3588 3589 static int 3590 roff_cc(ROFF_ARGS) 3591 { 3592 const char *p; 3593 3594 p = buf->buf + pos; 3595 3596 if (*p == '\0' || (r->control = *p++) == '.') 3597 r->control = '\0'; 3598 3599 if (*p != '\0') 3600 mandoc_msg(MANDOCERR_ARG_EXCESS, 3601 ln, p - buf->buf, "cc ... %s", p); 3602 3603 return ROFF_IGN; 3604 } 3605 3606 static int 3607 roff_char(ROFF_ARGS) 3608 { 3609 const char *p, *kp, *vp; 3610 size_t ksz, vsz; 3611 int font; 3612 3613 /* Parse the character to be replaced. */ 3614 3615 kp = buf->buf + pos; 3616 p = kp + 1; 3617 if (*kp == '\0' || (*kp == '\\' && 3618 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) || 3619 (*p != ' ' && *p != '\0')) { 3620 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp); 3621 return ROFF_IGN; 3622 } 3623 ksz = p - kp; 3624 while (*p == ' ') 3625 p++; 3626 3627 /* 3628 * If the replacement string contains a font escape sequence, 3629 * we have to restore the font at the end. 3630 */ 3631 3632 vp = p; 3633 vsz = strlen(p); 3634 font = 0; 3635 while (*p != '\0') { 3636 if (*p++ != '\\') 3637 continue; 3638 switch (mandoc_escape(&p, NULL, NULL)) { 3639 case ESCAPE_FONT: 3640 case ESCAPE_FONTROMAN: 3641 case ESCAPE_FONTITALIC: 3642 case ESCAPE_FONTBOLD: 3643 case ESCAPE_FONTBI: 3644 case ESCAPE_FONTCW: 3645 case ESCAPE_FONTPREV: 3646 font++; 3647 break; 3648 default: 3649 break; 3650 } 3651 } 3652 if (font > 1) 3653 mandoc_msg(MANDOCERR_CHAR_FONT, 3654 ln, (int)(vp - buf->buf), "%s", vp); 3655 3656 /* 3657 * Approximate the effect of .char using the .tr tables. 3658 * XXX In groff, .char and .tr interact differently. 3659 */ 3660 3661 if (ksz == 1) { 3662 if (r->xtab == NULL) 3663 r->xtab = mandoc_calloc(128, sizeof(*r->xtab)); 3664 assert((unsigned int)*kp < 128); 3665 free(r->xtab[(int)*kp].p); 3666 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p, 3667 "%s%s", vp, font ? "\fP" : ""); 3668 } else { 3669 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0); 3670 if (font) 3671 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1); 3672 } 3673 return ROFF_IGN; 3674 } 3675 3676 static int 3677 roff_ec(ROFF_ARGS) 3678 { 3679 const char *p; 3680 3681 p = buf->buf + pos; 3682 if (*p == '\0') 3683 r->escape = '\\'; 3684 else { 3685 r->escape = *p; 3686 if (*++p != '\0') 3687 mandoc_msg(MANDOCERR_ARG_EXCESS, ln, 3688 (int)(p - buf->buf), "ec ... %s", p); 3689 } 3690 return ROFF_IGN; 3691 } 3692 3693 static int 3694 roff_eo(ROFF_ARGS) 3695 { 3696 r->escape = '\0'; 3697 if (buf->buf[pos] != '\0') 3698 mandoc_msg(MANDOCERR_ARG_SKIP, 3699 ln, pos, "eo %s", buf->buf + pos); 3700 return ROFF_IGN; 3701 } 3702 3703 static int 3704 roff_nop(ROFF_ARGS) 3705 { 3706 while (buf->buf[pos] == ' ') 3707 pos++; 3708 *offs = pos; 3709 return ROFF_RERUN; 3710 } 3711 3712 static int 3713 roff_tr(ROFF_ARGS) 3714 { 3715 const char *p, *first, *second; 3716 size_t fsz, ssz; 3717 enum mandoc_esc esc; 3718 3719 p = buf->buf + pos; 3720 3721 if (*p == '\0') { 3722 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr"); 3723 return ROFF_IGN; 3724 } 3725 3726 while (*p != '\0') { 3727 fsz = ssz = 1; 3728 3729 first = p++; 3730 if (*first == '\\') { 3731 esc = mandoc_escape(&p, NULL, NULL); 3732 if (esc == ESCAPE_ERROR) { 3733 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3734 (int)(p - buf->buf), "%s", first); 3735 return ROFF_IGN; 3736 } 3737 fsz = (size_t)(p - first); 3738 } 3739 3740 second = p++; 3741 if (*second == '\\') { 3742 esc = mandoc_escape(&p, NULL, NULL); 3743 if (esc == ESCAPE_ERROR) { 3744 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3745 (int)(p - buf->buf), "%s", second); 3746 return ROFF_IGN; 3747 } 3748 ssz = (size_t)(p - second); 3749 } else if (*second == '\0') { 3750 mandoc_msg(MANDOCERR_TR_ODD, ln, 3751 (int)(first - buf->buf), "tr %s", first); 3752 second = " "; 3753 p--; 3754 } 3755 3756 if (fsz > 1) { 3757 roff_setstrn(&r->xmbtab, first, fsz, 3758 second, ssz, 0); 3759 continue; 3760 } 3761 3762 if (r->xtab == NULL) 3763 r->xtab = mandoc_calloc(128, 3764 sizeof(struct roffstr)); 3765 3766 free(r->xtab[(int)*first].p); 3767 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 3768 r->xtab[(int)*first].sz = ssz; 3769 } 3770 3771 return ROFF_IGN; 3772 } 3773 3774 /* 3775 * Implementation of the .return request. 3776 * There is no need to call roff_userret() from here. 3777 * The read module will call that after rewinding the reader stack 3778 * to the place from where the current macro was called. 3779 */ 3780 static int 3781 roff_return(ROFF_ARGS) 3782 { 3783 if (r->mstackpos >= 0) 3784 return ROFF_IGN | ROFF_USERRET; 3785 3786 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return"); 3787 return ROFF_IGN; 3788 } 3789 3790 static int 3791 roff_rn(ROFF_ARGS) 3792 { 3793 const char *value; 3794 char *oldn, *newn, *end; 3795 size_t oldsz, newsz; 3796 int deftype; 3797 3798 oldn = newn = buf->buf + pos; 3799 if (*oldn == '\0') 3800 return ROFF_IGN; 3801 3802 oldsz = roff_getname(r, &newn, ln, pos); 3803 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0') 3804 return ROFF_IGN; 3805 3806 end = newn; 3807 newsz = roff_getname(r, &end, ln, newn - buf->buf); 3808 if (newsz == 0) 3809 return ROFF_IGN; 3810 3811 deftype = ROFFDEF_ANY; 3812 value = roff_getstrn(r, oldn, oldsz, &deftype); 3813 switch (deftype) { 3814 case ROFFDEF_USER: 3815 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3816 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); 3817 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3818 break; 3819 case ROFFDEF_PRE: 3820 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3821 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3822 break; 3823 case ROFFDEF_REN: 3824 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); 3825 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); 3826 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3827 break; 3828 case ROFFDEF_STD: 3829 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); 3830 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3831 break; 3832 default: 3833 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3834 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3835 break; 3836 } 3837 return ROFF_IGN; 3838 } 3839 3840 static int 3841 roff_shift(ROFF_ARGS) 3842 { 3843 struct mctx *ctx; 3844 int levels, i; 3845 3846 levels = 1; 3847 if (buf->buf[pos] != '\0' && 3848 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { 3849 mandoc_msg(MANDOCERR_CE_NONUM, 3850 ln, pos, "shift %s", buf->buf + pos); 3851 levels = 1; 3852 } 3853 if (r->mstackpos < 0) { 3854 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift"); 3855 return ROFF_IGN; 3856 } 3857 ctx = r->mstack + r->mstackpos; 3858 if (levels > ctx->argc) { 3859 mandoc_msg(MANDOCERR_SHIFT, 3860 ln, pos, "%d, but max is %d", levels, ctx->argc); 3861 levels = ctx->argc; 3862 } 3863 if (levels == 0) 3864 return ROFF_IGN; 3865 for (i = 0; i < levels; i++) 3866 free(ctx->argv[i]); 3867 ctx->argc -= levels; 3868 for (i = 0; i < ctx->argc; i++) 3869 ctx->argv[i] = ctx->argv[i + levels]; 3870 return ROFF_IGN; 3871 } 3872 3873 static int 3874 roff_so(ROFF_ARGS) 3875 { 3876 char *name, *cp; 3877 3878 name = buf->buf + pos; 3879 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name); 3880 3881 /* 3882 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3883 * opening anything that's not in our cwd or anything beneath 3884 * it. Thus, explicitly disallow traversing up the file-system 3885 * or using absolute paths. 3886 */ 3887 3888 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3889 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name); 3890 buf->sz = mandoc_asprintf(&cp, 3891 ".sp\nSee the file %s.\n.sp", name) + 1; 3892 free(buf->buf); 3893 buf->buf = cp; 3894 *offs = 0; 3895 return ROFF_REPARSE; 3896 } 3897 3898 *offs = pos; 3899 return ROFF_SO; 3900 } 3901 3902 /* --- user defined strings and macros ------------------------------------ */ 3903 3904 static int 3905 roff_userdef(ROFF_ARGS) 3906 { 3907 struct mctx *ctx; 3908 char *arg, *ap, *dst, *src; 3909 size_t sz; 3910 3911 /* If the macro is empty, ignore it altogether. */ 3912 3913 if (*r->current_string == '\0') 3914 return ROFF_IGN; 3915 3916 /* Initialize a new macro stack context. */ 3917 3918 if (++r->mstackpos == r->mstacksz) { 3919 r->mstack = mandoc_recallocarray(r->mstack, 3920 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); 3921 r->mstacksz += 8; 3922 } 3923 ctx = r->mstack + r->mstackpos; 3924 ctx->argsz = 0; 3925 ctx->argc = 0; 3926 ctx->argv = NULL; 3927 3928 /* 3929 * Collect pointers to macro argument strings, 3930 * NUL-terminating them and escaping quotes. 3931 */ 3932 3933 src = buf->buf + pos; 3934 while (*src != '\0') { 3935 if (ctx->argc == ctx->argsz) { 3936 ctx->argsz += 8; 3937 ctx->argv = mandoc_reallocarray(ctx->argv, 3938 ctx->argsz, sizeof(*ctx->argv)); 3939 } 3940 arg = roff_getarg(r, &src, ln, &pos); 3941 sz = 1; /* For the terminating NUL. */ 3942 for (ap = arg; *ap != '\0'; ap++) 3943 sz += *ap == '"' ? 4 : 1; 3944 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); 3945 for (ap = arg; *ap != '\0'; ap++) { 3946 if (*ap == '"') { 3947 memcpy(dst, "\\(dq", 4); 3948 dst += 4; 3949 } else 3950 *dst++ = *ap; 3951 } 3952 *dst = '\0'; 3953 free(arg); 3954 } 3955 3956 /* Replace the macro invocation by the macro definition. */ 3957 3958 free(buf->buf); 3959 buf->buf = mandoc_strdup(r->current_string); 3960 buf->sz = strlen(buf->buf) + 1; 3961 *offs = 0; 3962 3963 return buf->buf[buf->sz - 2] == '\n' ? 3964 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND; 3965 } 3966 3967 /* 3968 * Calling a high-level macro that was renamed with .rn. 3969 * r->current_string has already been set up by roff_parse(). 3970 */ 3971 static int 3972 roff_renamed(ROFF_ARGS) 3973 { 3974 char *nbuf; 3975 3976 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, 3977 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; 3978 free(buf->buf); 3979 buf->buf = nbuf; 3980 *offs = 0; 3981 return ROFF_CONT; 3982 } 3983 3984 /* 3985 * Measure the length in bytes of the roff identifier at *cpp 3986 * and advance the pointer to the next word. 3987 */ 3988 static size_t 3989 roff_getname(struct roff *r, char **cpp, int ln, int pos) 3990 { 3991 char *name, *cp; 3992 size_t namesz; 3993 3994 name = *cpp; 3995 if (*name == '\0') 3996 return 0; 3997 3998 /* Advance cp to the byte after the end of the name. */ 3999 4000 for (cp = name; 1; cp++) { 4001 namesz = cp - name; 4002 if (*cp == '\0') 4003 break; 4004 if (*cp == ' ' || *cp == '\t') { 4005 cp++; 4006 break; 4007 } 4008 if (*cp != '\\') 4009 continue; 4010 if (cp[1] == '{' || cp[1] == '}') 4011 break; 4012 if (*++cp == '\\') 4013 continue; 4014 mandoc_msg(MANDOCERR_NAMESC, ln, pos, 4015 "%.*s", (int)(cp - name + 1), name); 4016 mandoc_escape((const char **)&cp, NULL, NULL); 4017 break; 4018 } 4019 4020 /* Read past spaces. */ 4021 4022 while (*cp == ' ') 4023 cp++; 4024 4025 *cpp = cp; 4026 return namesz; 4027 } 4028 4029 /* 4030 * Store *string into the user-defined string called *name. 4031 * To clear an existing entry, call with (*r, *name, NULL, 0). 4032 * append == 0: replace mode 4033 * append == 1: single-line append mode 4034 * append == 2: multiline append mode, append '\n' after each call 4035 */ 4036 static void 4037 roff_setstr(struct roff *r, const char *name, const char *string, 4038 int append) 4039 { 4040 size_t namesz; 4041 4042 namesz = strlen(name); 4043 roff_setstrn(&r->strtab, name, namesz, string, 4044 string ? strlen(string) : 0, append); 4045 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 4046 } 4047 4048 static void 4049 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 4050 const char *string, size_t stringsz, int append) 4051 { 4052 struct roffkv *n; 4053 char *c; 4054 int i; 4055 size_t oldch, newch; 4056 4057 /* Search for an existing string with the same name. */ 4058 n = *r; 4059 4060 while (n && (namesz != n->key.sz || 4061 strncmp(n->key.p, name, namesz))) 4062 n = n->next; 4063 4064 if (NULL == n) { 4065 /* Create a new string table entry. */ 4066 n = mandoc_malloc(sizeof(struct roffkv)); 4067 n->key.p = mandoc_strndup(name, namesz); 4068 n->key.sz = namesz; 4069 n->val.p = NULL; 4070 n->val.sz = 0; 4071 n->next = *r; 4072 *r = n; 4073 } else if (0 == append) { 4074 free(n->val.p); 4075 n->val.p = NULL; 4076 n->val.sz = 0; 4077 } 4078 4079 if (NULL == string) 4080 return; 4081 4082 /* 4083 * One additional byte for the '\n' in multiline mode, 4084 * and one for the terminating '\0'. 4085 */ 4086 newch = stringsz + (1 < append ? 2u : 1u); 4087 4088 if (NULL == n->val.p) { 4089 n->val.p = mandoc_malloc(newch); 4090 *n->val.p = '\0'; 4091 oldch = 0; 4092 } else { 4093 oldch = n->val.sz; 4094 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 4095 } 4096 4097 /* Skip existing content in the destination buffer. */ 4098 c = n->val.p + (int)oldch; 4099 4100 /* Append new content to the destination buffer. */ 4101 i = 0; 4102 while (i < (int)stringsz) { 4103 /* 4104 * Rudimentary roff copy mode: 4105 * Handle escaped backslashes. 4106 */ 4107 if ('\\' == string[i] && '\\' == string[i + 1]) 4108 i++; 4109 *c++ = string[i++]; 4110 } 4111 4112 /* Append terminating bytes. */ 4113 if (1 < append) 4114 *c++ = '\n'; 4115 4116 *c = '\0'; 4117 n->val.sz = (int)(c - n->val.p); 4118 } 4119 4120 static const char * 4121 roff_getstrn(struct roff *r, const char *name, size_t len, 4122 int *deftype) 4123 { 4124 const struct roffkv *n; 4125 int found, i; 4126 enum roff_tok tok; 4127 4128 found = 0; 4129 for (n = r->strtab; n != NULL; n = n->next) { 4130 if (strncmp(name, n->key.p, len) != 0 || 4131 n->key.p[len] != '\0' || n->val.p == NULL) 4132 continue; 4133 if (*deftype & ROFFDEF_USER) { 4134 *deftype = ROFFDEF_USER; 4135 return n->val.p; 4136 } else { 4137 found = 1; 4138 break; 4139 } 4140 } 4141 for (n = r->rentab; n != NULL; n = n->next) { 4142 if (strncmp(name, n->key.p, len) != 0 || 4143 n->key.p[len] != '\0' || n->val.p == NULL) 4144 continue; 4145 if (*deftype & ROFFDEF_REN) { 4146 *deftype = ROFFDEF_REN; 4147 return n->val.p; 4148 } else { 4149 found = 1; 4150 break; 4151 } 4152 } 4153 for (i = 0; i < PREDEFS_MAX; i++) { 4154 if (strncmp(name, predefs[i].name, len) != 0 || 4155 predefs[i].name[len] != '\0') 4156 continue; 4157 if (*deftype & ROFFDEF_PRE) { 4158 *deftype = ROFFDEF_PRE; 4159 return predefs[i].str; 4160 } else { 4161 found = 1; 4162 break; 4163 } 4164 } 4165 if (r->man->meta.macroset != MACROSET_MAN) { 4166 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { 4167 if (strncmp(name, roff_name[tok], len) != 0 || 4168 roff_name[tok][len] != '\0') 4169 continue; 4170 if (*deftype & ROFFDEF_STD) { 4171 *deftype = ROFFDEF_STD; 4172 return NULL; 4173 } else { 4174 found = 1; 4175 break; 4176 } 4177 } 4178 } 4179 if (r->man->meta.macroset != MACROSET_MDOC) { 4180 for (tok = MAN_TH; tok < MAN_MAX; tok++) { 4181 if (strncmp(name, roff_name[tok], len) != 0 || 4182 roff_name[tok][len] != '\0') 4183 continue; 4184 if (*deftype & ROFFDEF_STD) { 4185 *deftype = ROFFDEF_STD; 4186 return NULL; 4187 } else { 4188 found = 1; 4189 break; 4190 } 4191 } 4192 } 4193 4194 if (found == 0 && *deftype != ROFFDEF_ANY) { 4195 if (*deftype & ROFFDEF_REN) { 4196 /* 4197 * This might still be a request, 4198 * so do not treat it as undefined yet. 4199 */ 4200 *deftype = ROFFDEF_UNDEF; 4201 return NULL; 4202 } 4203 4204 /* Using an undefined string defines it to be empty. */ 4205 4206 roff_setstrn(&r->strtab, name, len, "", 0, 0); 4207 roff_setstrn(&r->rentab, name, len, NULL, 0, 0); 4208 } 4209 4210 *deftype = 0; 4211 return NULL; 4212 } 4213 4214 static void 4215 roff_freestr(struct roffkv *r) 4216 { 4217 struct roffkv *n, *nn; 4218 4219 for (n = r; n; n = nn) { 4220 free(n->key.p); 4221 free(n->val.p); 4222 nn = n->next; 4223 free(n); 4224 } 4225 } 4226 4227 /* --- accessors and utility functions ------------------------------------ */ 4228 4229 /* 4230 * Duplicate an input string, making the appropriate character 4231 * conversations (as stipulated by `tr') along the way. 4232 * Returns a heap-allocated string with all the replacements made. 4233 */ 4234 char * 4235 roff_strdup(const struct roff *r, const char *p) 4236 { 4237 const struct roffkv *cp; 4238 char *res; 4239 const char *pp; 4240 size_t ssz, sz; 4241 enum mandoc_esc esc; 4242 4243 if (NULL == r->xmbtab && NULL == r->xtab) 4244 return mandoc_strdup(p); 4245 else if ('\0' == *p) 4246 return mandoc_strdup(""); 4247 4248 /* 4249 * Step through each character looking for term matches 4250 * (remember that a `tr' can be invoked with an escape, which is 4251 * a glyph but the escape is multi-character). 4252 * We only do this if the character hash has been initialised 4253 * and the string is >0 length. 4254 */ 4255 4256 res = NULL; 4257 ssz = 0; 4258 4259 while ('\0' != *p) { 4260 assert((unsigned int)*p < 128); 4261 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { 4262 sz = r->xtab[(int)*p].sz; 4263 res = mandoc_realloc(res, ssz + sz + 1); 4264 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 4265 ssz += sz; 4266 p++; 4267 continue; 4268 } else if ('\\' != *p) { 4269 res = mandoc_realloc(res, ssz + 2); 4270 res[ssz++] = *p++; 4271 continue; 4272 } 4273 4274 /* Search for term matches. */ 4275 for (cp = r->xmbtab; cp; cp = cp->next) 4276 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 4277 break; 4278 4279 if (NULL != cp) { 4280 /* 4281 * A match has been found. 4282 * Append the match to the array and move 4283 * forward by its keysize. 4284 */ 4285 res = mandoc_realloc(res, 4286 ssz + cp->val.sz + 1); 4287 memcpy(res + ssz, cp->val.p, cp->val.sz); 4288 ssz += cp->val.sz; 4289 p += (int)cp->key.sz; 4290 continue; 4291 } 4292 4293 /* 4294 * Handle escapes carefully: we need to copy 4295 * over just the escape itself, or else we might 4296 * do replacements within the escape itself. 4297 * Make sure to pass along the bogus string. 4298 */ 4299 pp = p++; 4300 esc = mandoc_escape(&p, NULL, NULL); 4301 if (ESCAPE_ERROR == esc) { 4302 sz = strlen(pp); 4303 res = mandoc_realloc(res, ssz + sz + 1); 4304 memcpy(res + ssz, pp, sz); 4305 break; 4306 } 4307 /* 4308 * We bail out on bad escapes. 4309 * No need to warn: we already did so when 4310 * roff_expand() was called. 4311 */ 4312 sz = (int)(p - pp); 4313 res = mandoc_realloc(res, ssz + sz + 1); 4314 memcpy(res + ssz, pp, sz); 4315 ssz += sz; 4316 } 4317 4318 res[(int)ssz] = '\0'; 4319 return res; 4320 } 4321 4322 int 4323 roff_getformat(const struct roff *r) 4324 { 4325 4326 return r->format; 4327 } 4328 4329 /* 4330 * Find out whether a line is a macro line or not. 4331 * If it is, adjust the current position and return one; if it isn't, 4332 * return zero and don't change the current position. 4333 * If the control character has been set with `.cc', then let that grain 4334 * precedence. 4335 * This is slighly contrary to groff, where using the non-breaking 4336 * control character when `cc' has been invoked will cause the 4337 * non-breaking macro contents to be printed verbatim. 4338 */ 4339 int 4340 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 4341 { 4342 int pos; 4343 4344 pos = *ppos; 4345 4346 if (r->control != '\0' && cp[pos] == r->control) 4347 pos++; 4348 else if (r->control != '\0') 4349 return 0; 4350 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 4351 pos += 2; 4352 else if ('.' == cp[pos] || '\'' == cp[pos]) 4353 pos++; 4354 else 4355 return 0; 4356 4357 while (' ' == cp[pos] || '\t' == cp[pos]) 4358 pos++; 4359 4360 *ppos = pos; 4361 return 1; 4362 } 4363