1 /* $OpenBSD: roff.c,v 1.235 2019/02/06 20:54:28 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <limits.h> 23 #include <stddef.h> 24 #include <stdint.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 29 #include "mandoc_aux.h" 30 #include "mandoc_ohash.h" 31 #include "mandoc.h" 32 #include "roff.h" 33 #include "mandoc_parse.h" 34 #include "libmandoc.h" 35 #include "roff_int.h" 36 #include "tbl_parse.h" 37 #include "eqn_parse.h" 38 39 /* 40 * ASCII_ESC is used to signal from roff_getarg() to roff_expand() 41 * that an escape sequence resulted from copy-in processing and 42 * needs to be checked or interpolated. As it is used nowhere 43 * else, it is defined here rather than in a header file. 44 */ 45 #define ASCII_ESC 27 46 47 /* Maximum number of string expansions per line, to break infinite loops. */ 48 #define EXPAND_LIMIT 1000 49 50 /* Types of definitions of macros and strings. */ 51 #define ROFFDEF_USER (1 << 1) /* User-defined. */ 52 #define ROFFDEF_PRE (1 << 2) /* Predefined. */ 53 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ 54 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ 55 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ 56 ROFFDEF_REN | ROFFDEF_STD) 57 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ 58 59 /* --- data types --------------------------------------------------------- */ 60 61 /* 62 * An incredibly-simple string buffer. 63 */ 64 struct roffstr { 65 char *p; /* nil-terminated buffer */ 66 size_t sz; /* saved strlen(p) */ 67 }; 68 69 /* 70 * A key-value roffstr pair as part of a singly-linked list. 71 */ 72 struct roffkv { 73 struct roffstr key; 74 struct roffstr val; 75 struct roffkv *next; /* next in list */ 76 }; 77 78 /* 79 * A single number register as part of a singly-linked list. 80 */ 81 struct roffreg { 82 struct roffstr key; 83 int val; 84 int step; 85 struct roffreg *next; 86 }; 87 88 /* 89 * Association of request and macro names with token IDs. 90 */ 91 struct roffreq { 92 enum roff_tok tok; 93 char name[]; 94 }; 95 96 /* 97 * A macro processing context. 98 * More than one is needed when macro calls are nested. 99 */ 100 struct mctx { 101 char **argv; 102 int argc; 103 int argsz; 104 }; 105 106 struct roff { 107 struct roff_man *man; /* mdoc or man parser */ 108 struct roffnode *last; /* leaf of stack */ 109 struct mctx *mstack; /* stack of macro contexts */ 110 int *rstack; /* stack of inverted `ie' values */ 111 struct ohash *reqtab; /* request lookup table */ 112 struct roffreg *regtab; /* number registers */ 113 struct roffkv *strtab; /* user-defined strings & macros */ 114 struct roffkv *rentab; /* renamed strings & macros */ 115 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 116 struct roffstr *xtab; /* single-byte trans table (`tr') */ 117 const char *current_string; /* value of last called user macro */ 118 struct tbl_node *first_tbl; /* first table parsed */ 119 struct tbl_node *last_tbl; /* last table parsed */ 120 struct tbl_node *tbl; /* current table being parsed */ 121 struct eqn_node *last_eqn; /* equation parser */ 122 struct eqn_node *eqn; /* active equation parser */ 123 int eqn_inline; /* current equation is inline */ 124 int options; /* parse options */ 125 int mstacksz; /* current size of mstack */ 126 int mstackpos; /* position in mstack */ 127 int rstacksz; /* current size limit of rstack */ 128 int rstackpos; /* position in rstack */ 129 int format; /* current file in mdoc or man format */ 130 char control; /* control character */ 131 char escape; /* escape character */ 132 }; 133 134 struct roffnode { 135 enum roff_tok tok; /* type of node */ 136 struct roffnode *parent; /* up one in stack */ 137 int line; /* parse line */ 138 int col; /* parse col */ 139 char *name; /* node name, e.g. macro name */ 140 char *end; /* end-rules: custom token */ 141 int endspan; /* end-rules: next-line or infty */ 142 int rule; /* current evaluation rule */ 143 }; 144 145 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 146 enum roff_tok tok, /* tok of macro */ \ 147 struct buf *buf, /* input buffer */ \ 148 int ln, /* parse line */ \ 149 int ppos, /* original pos in buffer */ \ 150 int pos, /* current pos in buffer */ \ 151 int *offs /* reset offset of buffer data */ 152 153 typedef int (*roffproc)(ROFF_ARGS); 154 155 struct roffmac { 156 roffproc proc; /* process new macro */ 157 roffproc text; /* process as child text of macro */ 158 roffproc sub; /* process as child of macro */ 159 int flags; 160 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 161 }; 162 163 struct predef { 164 const char *name; /* predefined input name */ 165 const char *str; /* replacement symbol */ 166 }; 167 168 #define PREDEF(__name, __str) \ 169 { (__name), (__str) }, 170 171 /* --- function prototypes ------------------------------------------------ */ 172 173 static int roffnode_cleanscope(struct roff *); 174 static int roffnode_pop(struct roff *); 175 static void roffnode_push(struct roff *, enum roff_tok, 176 const char *, int, int); 177 static void roff_addtbl(struct roff_man *, int, struct tbl_node *); 178 static int roff_als(ROFF_ARGS); 179 static int roff_block(ROFF_ARGS); 180 static int roff_block_text(ROFF_ARGS); 181 static int roff_block_sub(ROFF_ARGS); 182 static int roff_cblock(ROFF_ARGS); 183 static int roff_cc(ROFF_ARGS); 184 static int roff_ccond(struct roff *, int, int); 185 static int roff_char(ROFF_ARGS); 186 static int roff_cond(ROFF_ARGS); 187 static int roff_cond_text(ROFF_ARGS); 188 static int roff_cond_sub(ROFF_ARGS); 189 static int roff_ds(ROFF_ARGS); 190 static int roff_ec(ROFF_ARGS); 191 static int roff_eo(ROFF_ARGS); 192 static int roff_eqndelim(struct roff *, struct buf *, int); 193 static int roff_evalcond(struct roff *r, int, char *, int *); 194 static int roff_evalnum(struct roff *, int, 195 const char *, int *, int *, int); 196 static int roff_evalpar(struct roff *, int, 197 const char *, int *, int *, int); 198 static int roff_evalstrcond(const char *, int *); 199 static int roff_expand(struct roff *, struct buf *, 200 int, int, char); 201 static void roff_free1(struct roff *); 202 static void roff_freereg(struct roffreg *); 203 static void roff_freestr(struct roffkv *); 204 static size_t roff_getname(struct roff *, char **, int, int); 205 static int roff_getnum(const char *, int *, int *, int); 206 static int roff_getop(const char *, int *, char *); 207 static int roff_getregn(struct roff *, 208 const char *, size_t, char); 209 static int roff_getregro(const struct roff *, 210 const char *name); 211 static const char *roff_getstrn(struct roff *, 212 const char *, size_t, int *); 213 static int roff_hasregn(const struct roff *, 214 const char *, size_t); 215 static int roff_insec(ROFF_ARGS); 216 static int roff_it(ROFF_ARGS); 217 static int roff_line_ignore(ROFF_ARGS); 218 static void roff_man_alloc1(struct roff_man *); 219 static void roff_man_free1(struct roff_man *); 220 static int roff_manyarg(ROFF_ARGS); 221 static int roff_noarg(ROFF_ARGS); 222 static int roff_nop(ROFF_ARGS); 223 static int roff_nr(ROFF_ARGS); 224 static int roff_onearg(ROFF_ARGS); 225 static enum roff_tok roff_parse(struct roff *, char *, int *, 226 int, int); 227 static int roff_parsetext(struct roff *, struct buf *, 228 int, int *); 229 static int roff_renamed(ROFF_ARGS); 230 static int roff_return(ROFF_ARGS); 231 static int roff_rm(ROFF_ARGS); 232 static int roff_rn(ROFF_ARGS); 233 static int roff_rr(ROFF_ARGS); 234 static void roff_setregn(struct roff *, const char *, 235 size_t, int, char, int); 236 static void roff_setstr(struct roff *, 237 const char *, const char *, int); 238 static void roff_setstrn(struct roffkv **, const char *, 239 size_t, const char *, size_t, int); 240 static int roff_shift(ROFF_ARGS); 241 static int roff_so(ROFF_ARGS); 242 static int roff_tr(ROFF_ARGS); 243 static int roff_Dd(ROFF_ARGS); 244 static int roff_TE(ROFF_ARGS); 245 static int roff_TS(ROFF_ARGS); 246 static int roff_EQ(ROFF_ARGS); 247 static int roff_EN(ROFF_ARGS); 248 static int roff_T_(ROFF_ARGS); 249 static int roff_unsupp(ROFF_ARGS); 250 static int roff_userdef(ROFF_ARGS); 251 252 /* --- constant data ------------------------------------------------------ */ 253 254 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 255 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 256 257 const char *__roff_name[MAN_MAX + 1] = { 258 "br", "ce", "fi", "ft", 259 "ll", "mc", "nf", 260 "po", "rj", "sp", 261 "ta", "ti", NULL, 262 "ab", "ad", "af", "aln", 263 "als", "am", "am1", "ami", 264 "ami1", "as", "as1", "asciify", 265 "backtrace", "bd", "bleedat", "blm", 266 "box", "boxa", "bp", "BP", 267 "break", "breakchar", "brnl", "brp", 268 "brpnl", "c2", "cc", 269 "cf", "cflags", "ch", "char", 270 "chop", "class", "close", "CL", 271 "color", "composite", "continue", "cp", 272 "cropat", "cs", "cu", "da", 273 "dch", "Dd", "de", "de1", 274 "defcolor", "dei", "dei1", "device", 275 "devicem", "di", "do", "ds", 276 "ds1", "dwh", "dt", "ec", 277 "ecr", "ecs", "el", "em", 278 "EN", "eo", "EP", "EQ", 279 "errprint", "ev", "evc", "ex", 280 "fallback", "fam", "fc", "fchar", 281 "fcolor", "fdeferlig", "feature", "fkern", 282 "fl", "flig", "fp", "fps", 283 "fschar", "fspacewidth", "fspecial", "ftr", 284 "fzoom", "gcolor", "hc", "hcode", 285 "hidechar", "hla", "hlm", "hpf", 286 "hpfa", "hpfcode", "hw", "hy", 287 "hylang", "hylen", "hym", "hypp", 288 "hys", "ie", "if", "ig", 289 "index", "it", "itc", "IX", 290 "kern", "kernafter", "kernbefore", "kernpair", 291 "lc", "lc_ctype", "lds", "length", 292 "letadj", "lf", "lg", "lhang", 293 "linetabs", "lnr", "lnrf", "lpfx", 294 "ls", "lsm", "lt", 295 "mediasize", "minss", "mk", "mso", 296 "na", "ne", "nh", "nhychar", 297 "nm", "nn", "nop", "nr", 298 "nrf", "nroff", "ns", "nx", 299 "open", "opena", "os", "output", 300 "padj", "papersize", "pc", "pev", 301 "pi", "PI", "pl", "pm", 302 "pn", "pnr", "ps", 303 "psbb", "pshape", "pso", "ptr", 304 "pvs", "rchar", "rd", "recursionlimit", 305 "return", "rfschar", "rhang", 306 "rm", "rn", "rnn", "rr", 307 "rs", "rt", "schar", "sentchar", 308 "shc", "shift", "sizes", "so", 309 "spacewidth", "special", "spreadwarn", "ss", 310 "sty", "substring", "sv", "sy", 311 "T&", "tc", "TE", 312 "TH", "tkf", "tl", 313 "tm", "tm1", "tmc", "tr", 314 "track", "transchar", "trf", "trimat", 315 "trin", "trnt", "troff", "TS", 316 "uf", "ul", "unformat", "unwatch", 317 "unwatchn", "vpt", "vs", "warn", 318 "warnscale", "watch", "watchlength", "watchn", 319 "wh", "while", "write", "writec", 320 "writem", "xflag", ".", NULL, 321 NULL, "text", 322 "Dd", "Dt", "Os", "Sh", 323 "Ss", "Pp", "D1", "Dl", 324 "Bd", "Ed", "Bl", "El", 325 "It", "Ad", "An", "Ap", 326 "Ar", "Cd", "Cm", "Dv", 327 "Er", "Ev", "Ex", "Fa", 328 "Fd", "Fl", "Fn", "Ft", 329 "Ic", "In", "Li", "Nd", 330 "Nm", "Op", "Ot", "Pa", 331 "Rv", "St", "Va", "Vt", 332 "Xr", "%A", "%B", "%D", 333 "%I", "%J", "%N", "%O", 334 "%P", "%R", "%T", "%V", 335 "Ac", "Ao", "Aq", "At", 336 "Bc", "Bf", "Bo", "Bq", 337 "Bsx", "Bx", "Db", "Dc", 338 "Do", "Dq", "Ec", "Ef", 339 "Em", "Eo", "Fx", "Ms", 340 "No", "Ns", "Nx", "Ox", 341 "Pc", "Pf", "Po", "Pq", 342 "Qc", "Ql", "Qo", "Qq", 343 "Re", "Rs", "Sc", "So", 344 "Sq", "Sm", "Sx", "Sy", 345 "Tn", "Ux", "Xc", "Xo", 346 "Fo", "Fc", "Oo", "Oc", 347 "Bk", "Ek", "Bt", "Hf", 348 "Fr", "Ud", "Lb", "Lp", 349 "Lk", "Mt", "Brq", "Bro", 350 "Brc", "%C", "Es", "En", 351 "Dx", "%Q", "%U", "Ta", 352 NULL, 353 "TH", "SH", "SS", "TP", 354 "TQ", 355 "LP", "PP", "P", "IP", 356 "HP", "SM", "SB", "BI", 357 "IB", "BR", "RB", "R", 358 "B", "I", "IR", "RI", 359 "RE", "RS", "DT", "UC", 360 "PD", "AT", "in", 361 "SY", "YS", "OP", 362 "EX", "EE", "UR", 363 "UE", "MT", "ME", NULL 364 }; 365 const char *const *roff_name = __roff_name; 366 367 static struct roffmac roffs[TOKEN_NONE] = { 368 { roff_noarg, NULL, NULL, 0 }, /* br */ 369 { roff_onearg, NULL, NULL, 0 }, /* ce */ 370 { roff_noarg, NULL, NULL, 0 }, /* fi */ 371 { roff_onearg, NULL, NULL, 0 }, /* ft */ 372 { roff_onearg, NULL, NULL, 0 }, /* ll */ 373 { roff_onearg, NULL, NULL, 0 }, /* mc */ 374 { roff_noarg, NULL, NULL, 0 }, /* nf */ 375 { roff_onearg, NULL, NULL, 0 }, /* po */ 376 { roff_onearg, NULL, NULL, 0 }, /* rj */ 377 { roff_onearg, NULL, NULL, 0 }, /* sp */ 378 { roff_manyarg, NULL, NULL, 0 }, /* ta */ 379 { roff_onearg, NULL, NULL, 0 }, /* ti */ 380 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ 381 { roff_unsupp, NULL, NULL, 0 }, /* ab */ 382 { roff_line_ignore, NULL, NULL, 0 }, /* ad */ 383 { roff_line_ignore, NULL, NULL, 0 }, /* af */ 384 { roff_unsupp, NULL, NULL, 0 }, /* aln */ 385 { roff_als, NULL, NULL, 0 }, /* als */ 386 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ 387 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ 388 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ 389 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ 390 { roff_ds, NULL, NULL, 0 }, /* as */ 391 { roff_ds, NULL, NULL, 0 }, /* as1 */ 392 { roff_unsupp, NULL, NULL, 0 }, /* asciify */ 393 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ 394 { roff_line_ignore, NULL, NULL, 0 }, /* bd */ 395 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ 396 { roff_unsupp, NULL, NULL, 0 }, /* blm */ 397 { roff_unsupp, NULL, NULL, 0 }, /* box */ 398 { roff_unsupp, NULL, NULL, 0 }, /* boxa */ 399 { roff_line_ignore, NULL, NULL, 0 }, /* bp */ 400 { roff_unsupp, NULL, NULL, 0 }, /* BP */ 401 { roff_unsupp, NULL, NULL, 0 }, /* break */ 402 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ 403 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ 404 { roff_noarg, NULL, NULL, 0 }, /* brp */ 405 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ 406 { roff_unsupp, NULL, NULL, 0 }, /* c2 */ 407 { roff_cc, NULL, NULL, 0 }, /* cc */ 408 { roff_insec, NULL, NULL, 0 }, /* cf */ 409 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ 410 { roff_line_ignore, NULL, NULL, 0 }, /* ch */ 411 { roff_char, NULL, NULL, 0 }, /* char */ 412 { roff_unsupp, NULL, NULL, 0 }, /* chop */ 413 { roff_line_ignore, NULL, NULL, 0 }, /* class */ 414 { roff_insec, NULL, NULL, 0 }, /* close */ 415 { roff_unsupp, NULL, NULL, 0 }, /* CL */ 416 { roff_line_ignore, NULL, NULL, 0 }, /* color */ 417 { roff_unsupp, NULL, NULL, 0 }, /* composite */ 418 { roff_unsupp, NULL, NULL, 0 }, /* continue */ 419 { roff_line_ignore, NULL, NULL, 0 }, /* cp */ 420 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ 421 { roff_line_ignore, NULL, NULL, 0 }, /* cs */ 422 { roff_line_ignore, NULL, NULL, 0 }, /* cu */ 423 { roff_unsupp, NULL, NULL, 0 }, /* da */ 424 { roff_unsupp, NULL, NULL, 0 }, /* dch */ 425 { roff_Dd, NULL, NULL, 0 }, /* Dd */ 426 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ 427 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ 428 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ 429 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ 430 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ 431 { roff_unsupp, NULL, NULL, 0 }, /* device */ 432 { roff_unsupp, NULL, NULL, 0 }, /* devicem */ 433 { roff_unsupp, NULL, NULL, 0 }, /* di */ 434 { roff_unsupp, NULL, NULL, 0 }, /* do */ 435 { roff_ds, NULL, NULL, 0 }, /* ds */ 436 { roff_ds, NULL, NULL, 0 }, /* ds1 */ 437 { roff_unsupp, NULL, NULL, 0 }, /* dwh */ 438 { roff_unsupp, NULL, NULL, 0 }, /* dt */ 439 { roff_ec, NULL, NULL, 0 }, /* ec */ 440 { roff_unsupp, NULL, NULL, 0 }, /* ecr */ 441 { roff_unsupp, NULL, NULL, 0 }, /* ecs */ 442 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ 443 { roff_unsupp, NULL, NULL, 0 }, /* em */ 444 { roff_EN, NULL, NULL, 0 }, /* EN */ 445 { roff_eo, NULL, NULL, 0 }, /* eo */ 446 { roff_unsupp, NULL, NULL, 0 }, /* EP */ 447 { roff_EQ, NULL, NULL, 0 }, /* EQ */ 448 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ 449 { roff_unsupp, NULL, NULL, 0 }, /* ev */ 450 { roff_unsupp, NULL, NULL, 0 }, /* evc */ 451 { roff_unsupp, NULL, NULL, 0 }, /* ex */ 452 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ 453 { roff_line_ignore, NULL, NULL, 0 }, /* fam */ 454 { roff_unsupp, NULL, NULL, 0 }, /* fc */ 455 { roff_unsupp, NULL, NULL, 0 }, /* fchar */ 456 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ 457 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ 458 { roff_line_ignore, NULL, NULL, 0 }, /* feature */ 459 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ 460 { roff_line_ignore, NULL, NULL, 0 }, /* fl */ 461 { roff_line_ignore, NULL, NULL, 0 }, /* flig */ 462 { roff_line_ignore, NULL, NULL, 0 }, /* fp */ 463 { roff_line_ignore, NULL, NULL, 0 }, /* fps */ 464 { roff_unsupp, NULL, NULL, 0 }, /* fschar */ 465 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ 466 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ 467 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ 468 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ 469 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ 470 { roff_line_ignore, NULL, NULL, 0 }, /* hc */ 471 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ 472 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ 473 { roff_line_ignore, NULL, NULL, 0 }, /* hla */ 474 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ 475 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ 476 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ 477 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ 478 { roff_line_ignore, NULL, NULL, 0 }, /* hw */ 479 { roff_line_ignore, NULL, NULL, 0 }, /* hy */ 480 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ 481 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ 482 { roff_line_ignore, NULL, NULL, 0 }, /* hym */ 483 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ 484 { roff_line_ignore, NULL, NULL, 0 }, /* hys */ 485 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ 486 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ 487 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ 488 { roff_unsupp, NULL, NULL, 0 }, /* index */ 489 { roff_it, NULL, NULL, 0 }, /* it */ 490 { roff_unsupp, NULL, NULL, 0 }, /* itc */ 491 { roff_line_ignore, NULL, NULL, 0 }, /* IX */ 492 { roff_line_ignore, NULL, NULL, 0 }, /* kern */ 493 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ 494 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ 495 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ 496 { roff_unsupp, NULL, NULL, 0 }, /* lc */ 497 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ 498 { roff_unsupp, NULL, NULL, 0 }, /* lds */ 499 { roff_unsupp, NULL, NULL, 0 }, /* length */ 500 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ 501 { roff_insec, NULL, NULL, 0 }, /* lf */ 502 { roff_line_ignore, NULL, NULL, 0 }, /* lg */ 503 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ 504 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ 505 { roff_unsupp, NULL, NULL, 0 }, /* lnr */ 506 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ 507 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ 508 { roff_line_ignore, NULL, NULL, 0 }, /* ls */ 509 { roff_unsupp, NULL, NULL, 0 }, /* lsm */ 510 { roff_line_ignore, NULL, NULL, 0 }, /* lt */ 511 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ 512 { roff_line_ignore, NULL, NULL, 0 }, /* minss */ 513 { roff_line_ignore, NULL, NULL, 0 }, /* mk */ 514 { roff_insec, NULL, NULL, 0 }, /* mso */ 515 { roff_line_ignore, NULL, NULL, 0 }, /* na */ 516 { roff_line_ignore, NULL, NULL, 0 }, /* ne */ 517 { roff_line_ignore, NULL, NULL, 0 }, /* nh */ 518 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ 519 { roff_unsupp, NULL, NULL, 0 }, /* nm */ 520 { roff_unsupp, NULL, NULL, 0 }, /* nn */ 521 { roff_nop, NULL, NULL, 0 }, /* nop */ 522 { roff_nr, NULL, NULL, 0 }, /* nr */ 523 { roff_unsupp, NULL, NULL, 0 }, /* nrf */ 524 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ 525 { roff_line_ignore, NULL, NULL, 0 }, /* ns */ 526 { roff_insec, NULL, NULL, 0 }, /* nx */ 527 { roff_insec, NULL, NULL, 0 }, /* open */ 528 { roff_insec, NULL, NULL, 0 }, /* opena */ 529 { roff_line_ignore, NULL, NULL, 0 }, /* os */ 530 { roff_unsupp, NULL, NULL, 0 }, /* output */ 531 { roff_line_ignore, NULL, NULL, 0 }, /* padj */ 532 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ 533 { roff_line_ignore, NULL, NULL, 0 }, /* pc */ 534 { roff_line_ignore, NULL, NULL, 0 }, /* pev */ 535 { roff_insec, NULL, NULL, 0 }, /* pi */ 536 { roff_unsupp, NULL, NULL, 0 }, /* PI */ 537 { roff_line_ignore, NULL, NULL, 0 }, /* pl */ 538 { roff_line_ignore, NULL, NULL, 0 }, /* pm */ 539 { roff_line_ignore, NULL, NULL, 0 }, /* pn */ 540 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ 541 { roff_line_ignore, NULL, NULL, 0 }, /* ps */ 542 { roff_unsupp, NULL, NULL, 0 }, /* psbb */ 543 { roff_unsupp, NULL, NULL, 0 }, /* pshape */ 544 { roff_insec, NULL, NULL, 0 }, /* pso */ 545 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ 546 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ 547 { roff_unsupp, NULL, NULL, 0 }, /* rchar */ 548 { roff_line_ignore, NULL, NULL, 0 }, /* rd */ 549 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ 550 { roff_return, NULL, NULL, 0 }, /* return */ 551 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ 552 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ 553 { roff_rm, NULL, NULL, 0 }, /* rm */ 554 { roff_rn, NULL, NULL, 0 }, /* rn */ 555 { roff_unsupp, NULL, NULL, 0 }, /* rnn */ 556 { roff_rr, NULL, NULL, 0 }, /* rr */ 557 { roff_line_ignore, NULL, NULL, 0 }, /* rs */ 558 { roff_line_ignore, NULL, NULL, 0 }, /* rt */ 559 { roff_unsupp, NULL, NULL, 0 }, /* schar */ 560 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ 561 { roff_line_ignore, NULL, NULL, 0 }, /* shc */ 562 { roff_shift, NULL, NULL, 0 }, /* shift */ 563 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ 564 { roff_so, NULL, NULL, 0 }, /* so */ 565 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ 566 { roff_line_ignore, NULL, NULL, 0 }, /* special */ 567 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ 568 { roff_line_ignore, NULL, NULL, 0 }, /* ss */ 569 { roff_line_ignore, NULL, NULL, 0 }, /* sty */ 570 { roff_unsupp, NULL, NULL, 0 }, /* substring */ 571 { roff_line_ignore, NULL, NULL, 0 }, /* sv */ 572 { roff_insec, NULL, NULL, 0 }, /* sy */ 573 { roff_T_, NULL, NULL, 0 }, /* T& */ 574 { roff_unsupp, NULL, NULL, 0 }, /* tc */ 575 { roff_TE, NULL, NULL, 0 }, /* TE */ 576 { roff_Dd, NULL, NULL, 0 }, /* TH */ 577 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ 578 { roff_unsupp, NULL, NULL, 0 }, /* tl */ 579 { roff_line_ignore, NULL, NULL, 0 }, /* tm */ 580 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ 581 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ 582 { roff_tr, NULL, NULL, 0 }, /* tr */ 583 { roff_line_ignore, NULL, NULL, 0 }, /* track */ 584 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ 585 { roff_insec, NULL, NULL, 0 }, /* trf */ 586 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ 587 { roff_unsupp, NULL, NULL, 0 }, /* trin */ 588 { roff_unsupp, NULL, NULL, 0 }, /* trnt */ 589 { roff_line_ignore, NULL, NULL, 0 }, /* troff */ 590 { roff_TS, NULL, NULL, 0 }, /* TS */ 591 { roff_line_ignore, NULL, NULL, 0 }, /* uf */ 592 { roff_line_ignore, NULL, NULL, 0 }, /* ul */ 593 { roff_unsupp, NULL, NULL, 0 }, /* unformat */ 594 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ 595 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ 596 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ 597 { roff_line_ignore, NULL, NULL, 0 }, /* vs */ 598 { roff_line_ignore, NULL, NULL, 0 }, /* warn */ 599 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ 600 { roff_line_ignore, NULL, NULL, 0 }, /* watch */ 601 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ 602 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ 603 { roff_unsupp, NULL, NULL, 0 }, /* wh */ 604 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/ 605 { roff_insec, NULL, NULL, 0 }, /* write */ 606 { roff_insec, NULL, NULL, 0 }, /* writec */ 607 { roff_insec, NULL, NULL, 0 }, /* writem */ 608 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ 609 { roff_cblock, NULL, NULL, 0 }, /* . */ 610 { roff_renamed, NULL, NULL, 0 }, 611 { roff_userdef, NULL, NULL, 0 } 612 }; 613 614 /* Array of injected predefined strings. */ 615 #define PREDEFS_MAX 38 616 static const struct predef predefs[PREDEFS_MAX] = { 617 #include "predefs.in" 618 }; 619 620 static int roffce_lines; /* number of input lines to center */ 621 static struct roff_node *roffce_node; /* active request */ 622 static int roffit_lines; /* number of lines to delay */ 623 static char *roffit_macro; /* nil-terminated macro line */ 624 625 626 /* --- request table ------------------------------------------------------ */ 627 628 struct ohash * 629 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) 630 { 631 struct ohash *htab; 632 struct roffreq *req; 633 enum roff_tok tok; 634 size_t sz; 635 unsigned int slot; 636 637 htab = mandoc_malloc(sizeof(*htab)); 638 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); 639 640 for (tok = mintok; tok < maxtok; tok++) { 641 if (roff_name[tok] == NULL) 642 continue; 643 sz = strlen(roff_name[tok]); 644 req = mandoc_malloc(sizeof(*req) + sz + 1); 645 req->tok = tok; 646 memcpy(req->name, roff_name[tok], sz + 1); 647 slot = ohash_qlookup(htab, req->name); 648 ohash_insert(htab, slot, req); 649 } 650 return htab; 651 } 652 653 void 654 roffhash_free(struct ohash *htab) 655 { 656 struct roffreq *req; 657 unsigned int slot; 658 659 if (htab == NULL) 660 return; 661 for (req = ohash_first(htab, &slot); req != NULL; 662 req = ohash_next(htab, &slot)) 663 free(req); 664 ohash_delete(htab); 665 free(htab); 666 } 667 668 enum roff_tok 669 roffhash_find(struct ohash *htab, const char *name, size_t sz) 670 { 671 struct roffreq *req; 672 const char *end; 673 674 if (sz) { 675 end = name + sz; 676 req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); 677 } else 678 req = ohash_find(htab, ohash_qlookup(htab, name)); 679 return req == NULL ? TOKEN_NONE : req->tok; 680 } 681 682 /* --- stack of request blocks -------------------------------------------- */ 683 684 /* 685 * Pop the current node off of the stack of roff instructions currently 686 * pending. 687 */ 688 static int 689 roffnode_pop(struct roff *r) 690 { 691 struct roffnode *p; 692 int inloop; 693 694 p = r->last; 695 inloop = p->tok == ROFF_while; 696 r->last = p->parent; 697 free(p->name); 698 free(p->end); 699 free(p); 700 return inloop; 701 } 702 703 /* 704 * Push a roff node onto the instruction stack. This must later be 705 * removed with roffnode_pop(). 706 */ 707 static void 708 roffnode_push(struct roff *r, enum roff_tok tok, const char *name, 709 int line, int col) 710 { 711 struct roffnode *p; 712 713 p = mandoc_calloc(1, sizeof(struct roffnode)); 714 p->tok = tok; 715 if (name) 716 p->name = mandoc_strdup(name); 717 p->parent = r->last; 718 p->line = line; 719 p->col = col; 720 p->rule = p->parent ? p->parent->rule : 0; 721 722 r->last = p; 723 } 724 725 /* --- roff parser state data management ---------------------------------- */ 726 727 static void 728 roff_free1(struct roff *r) 729 { 730 int i; 731 732 tbl_free(r->first_tbl); 733 r->first_tbl = r->last_tbl = r->tbl = NULL; 734 735 eqn_free(r->last_eqn); 736 r->last_eqn = r->eqn = NULL; 737 738 while (r->mstackpos >= 0) 739 roff_userret(r); 740 741 while (r->last) 742 roffnode_pop(r); 743 744 free (r->rstack); 745 r->rstack = NULL; 746 r->rstacksz = 0; 747 r->rstackpos = -1; 748 749 roff_freereg(r->regtab); 750 r->regtab = NULL; 751 752 roff_freestr(r->strtab); 753 roff_freestr(r->rentab); 754 roff_freestr(r->xmbtab); 755 r->strtab = r->rentab = r->xmbtab = NULL; 756 757 if (r->xtab) 758 for (i = 0; i < 128; i++) 759 free(r->xtab[i].p); 760 free(r->xtab); 761 r->xtab = NULL; 762 } 763 764 void 765 roff_reset(struct roff *r) 766 { 767 roff_free1(r); 768 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 769 r->control = '\0'; 770 r->escape = '\\'; 771 roffce_lines = 0; 772 roffce_node = NULL; 773 roffit_lines = 0; 774 roffit_macro = NULL; 775 } 776 777 void 778 roff_free(struct roff *r) 779 { 780 int i; 781 782 roff_free1(r); 783 for (i = 0; i < r->mstacksz; i++) 784 free(r->mstack[i].argv); 785 free(r->mstack); 786 roffhash_free(r->reqtab); 787 free(r); 788 } 789 790 struct roff * 791 roff_alloc(int options) 792 { 793 struct roff *r; 794 795 r = mandoc_calloc(1, sizeof(struct roff)); 796 r->reqtab = roffhash_alloc(0, ROFF_RENAMED); 797 r->options = options; 798 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 799 r->mstackpos = -1; 800 r->rstackpos = -1; 801 r->escape = '\\'; 802 return r; 803 } 804 805 /* --- syntax tree state data management ---------------------------------- */ 806 807 static void 808 roff_man_free1(struct roff_man *man) 809 { 810 if (man->meta.first != NULL) 811 roff_node_delete(man, man->meta.first); 812 free(man->meta.msec); 813 free(man->meta.vol); 814 free(man->meta.os); 815 free(man->meta.arch); 816 free(man->meta.title); 817 free(man->meta.name); 818 free(man->meta.date); 819 free(man->meta.sodest); 820 } 821 822 void 823 roff_state_reset(struct roff_man *man) 824 { 825 man->last = man->meta.first; 826 man->last_es = NULL; 827 man->flags = 0; 828 man->lastsec = man->lastnamed = SEC_NONE; 829 man->next = ROFF_NEXT_CHILD; 830 roff_setreg(man->roff, "nS", 0, '='); 831 } 832 833 static void 834 roff_man_alloc1(struct roff_man *man) 835 { 836 memset(&man->meta, 0, sizeof(man->meta)); 837 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first)); 838 man->meta.first->type = ROFFT_ROOT; 839 man->meta.macroset = MACROSET_NONE; 840 roff_state_reset(man); 841 } 842 843 void 844 roff_man_reset(struct roff_man *man) 845 { 846 roff_man_free1(man); 847 roff_man_alloc1(man); 848 } 849 850 void 851 roff_man_free(struct roff_man *man) 852 { 853 roff_man_free1(man); 854 free(man); 855 } 856 857 struct roff_man * 858 roff_man_alloc(struct roff *roff, const char *os_s, int quick) 859 { 860 struct roff_man *man; 861 862 man = mandoc_calloc(1, sizeof(*man)); 863 man->roff = roff; 864 man->os_s = os_s; 865 man->quick = quick; 866 roff_man_alloc1(man); 867 roff->man = man; 868 return man; 869 } 870 871 /* --- syntax tree handling ----------------------------------------------- */ 872 873 struct roff_node * 874 roff_node_alloc(struct roff_man *man, int line, int pos, 875 enum roff_type type, int tok) 876 { 877 struct roff_node *n; 878 879 n = mandoc_calloc(1, sizeof(*n)); 880 n->line = line; 881 n->pos = pos; 882 n->tok = tok; 883 n->type = type; 884 n->sec = man->lastsec; 885 886 if (man->flags & MDOC_SYNOPSIS) 887 n->flags |= NODE_SYNPRETTY; 888 else 889 n->flags &= ~NODE_SYNPRETTY; 890 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL) 891 n->flags |= NODE_NOFILL; 892 else 893 n->flags &= ~NODE_NOFILL; 894 if (man->flags & MDOC_NEWLINE) 895 n->flags |= NODE_LINE; 896 man->flags &= ~MDOC_NEWLINE; 897 898 return n; 899 } 900 901 void 902 roff_node_append(struct roff_man *man, struct roff_node *n) 903 { 904 905 switch (man->next) { 906 case ROFF_NEXT_SIBLING: 907 if (man->last->next != NULL) { 908 n->next = man->last->next; 909 man->last->next->prev = n; 910 } else 911 man->last->parent->last = n; 912 man->last->next = n; 913 n->prev = man->last; 914 n->parent = man->last->parent; 915 break; 916 case ROFF_NEXT_CHILD: 917 if (man->last->child != NULL) { 918 n->next = man->last->child; 919 man->last->child->prev = n; 920 } else 921 man->last->last = n; 922 man->last->child = n; 923 n->parent = man->last; 924 break; 925 default: 926 abort(); 927 } 928 man->last = n; 929 930 switch (n->type) { 931 case ROFFT_HEAD: 932 n->parent->head = n; 933 break; 934 case ROFFT_BODY: 935 if (n->end != ENDBODY_NOT) 936 return; 937 n->parent->body = n; 938 break; 939 case ROFFT_TAIL: 940 n->parent->tail = n; 941 break; 942 default: 943 return; 944 } 945 946 /* 947 * Copy over the normalised-data pointer of our parent. Not 948 * everybody has one, but copying a null pointer is fine. 949 */ 950 951 n->norm = n->parent->norm; 952 assert(n->parent->type == ROFFT_BLOCK); 953 } 954 955 void 956 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 957 { 958 struct roff_node *n; 959 960 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 961 n->string = roff_strdup(man->roff, word); 962 roff_node_append(man, n); 963 n->flags |= NODE_VALID | NODE_ENDED; 964 man->next = ROFF_NEXT_SIBLING; 965 } 966 967 void 968 roff_word_append(struct roff_man *man, const char *word) 969 { 970 struct roff_node *n; 971 char *addstr, *newstr; 972 973 n = man->last; 974 addstr = roff_strdup(man->roff, word); 975 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 976 free(addstr); 977 free(n->string); 978 n->string = newstr; 979 man->next = ROFF_NEXT_SIBLING; 980 } 981 982 void 983 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 984 { 985 struct roff_node *n; 986 987 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 988 roff_node_append(man, n); 989 man->next = ROFF_NEXT_CHILD; 990 } 991 992 struct roff_node * 993 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 994 { 995 struct roff_node *n; 996 997 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 998 roff_node_append(man, n); 999 man->next = ROFF_NEXT_CHILD; 1000 return n; 1001 } 1002 1003 struct roff_node * 1004 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 1005 { 1006 struct roff_node *n; 1007 1008 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 1009 roff_node_append(man, n); 1010 man->next = ROFF_NEXT_CHILD; 1011 return n; 1012 } 1013 1014 struct roff_node * 1015 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 1016 { 1017 struct roff_node *n; 1018 1019 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 1020 roff_node_append(man, n); 1021 man->next = ROFF_NEXT_CHILD; 1022 return n; 1023 } 1024 1025 static void 1026 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl) 1027 { 1028 struct roff_node *n; 1029 struct tbl_span *span; 1030 1031 if (man->meta.macroset == MACROSET_MAN) 1032 man_breakscope(man, ROFF_TS); 1033 while ((span = tbl_span(tbl)) != NULL) { 1034 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE); 1035 n->span = span; 1036 roff_node_append(man, n); 1037 n->flags |= NODE_VALID | NODE_ENDED; 1038 man->next = ROFF_NEXT_SIBLING; 1039 } 1040 } 1041 1042 void 1043 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1044 { 1045 1046 /* Adjust siblings. */ 1047 1048 if (n->prev) 1049 n->prev->next = n->next; 1050 if (n->next) 1051 n->next->prev = n->prev; 1052 1053 /* Adjust parent. */ 1054 1055 if (n->parent != NULL) { 1056 if (n->parent->child == n) 1057 n->parent->child = n->next; 1058 if (n->parent->last == n) 1059 n->parent->last = n->prev; 1060 } 1061 1062 /* Adjust parse point. */ 1063 1064 if (man == NULL) 1065 return; 1066 if (man->last == n) { 1067 if (n->prev == NULL) { 1068 man->last = n->parent; 1069 man->next = ROFF_NEXT_CHILD; 1070 } else { 1071 man->last = n->prev; 1072 man->next = ROFF_NEXT_SIBLING; 1073 } 1074 } 1075 if (man->meta.first == n) 1076 man->meta.first = NULL; 1077 } 1078 1079 void 1080 roff_node_relink(struct roff_man *man, struct roff_node *n) 1081 { 1082 roff_node_unlink(man, n); 1083 n->prev = n->next = NULL; 1084 roff_node_append(man, n); 1085 } 1086 1087 void 1088 roff_node_free(struct roff_node *n) 1089 { 1090 1091 if (n->args != NULL) 1092 mdoc_argv_free(n->args); 1093 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1094 free(n->norm); 1095 eqn_box_free(n->eqn); 1096 free(n->string); 1097 free(n); 1098 } 1099 1100 void 1101 roff_node_delete(struct roff_man *man, struct roff_node *n) 1102 { 1103 1104 while (n->child != NULL) 1105 roff_node_delete(man, n->child); 1106 roff_node_unlink(man, n); 1107 roff_node_free(n); 1108 } 1109 1110 void 1111 deroff(char **dest, const struct roff_node *n) 1112 { 1113 char *cp; 1114 size_t sz; 1115 1116 if (n->type != ROFFT_TEXT) { 1117 for (n = n->child; n != NULL; n = n->next) 1118 deroff(dest, n); 1119 return; 1120 } 1121 1122 /* Skip leading whitespace. */ 1123 1124 for (cp = n->string; *cp != '\0'; cp++) { 1125 if (cp[0] == '\\' && cp[1] != '\0' && 1126 strchr(" %&0^|~", cp[1]) != NULL) 1127 cp++; 1128 else if ( ! isspace((unsigned char)*cp)) 1129 break; 1130 } 1131 1132 /* Skip trailing backslash. */ 1133 1134 sz = strlen(cp); 1135 if (sz > 0 && cp[sz - 1] == '\\') 1136 sz--; 1137 1138 /* Skip trailing whitespace. */ 1139 1140 for (; sz; sz--) 1141 if ( ! isspace((unsigned char)cp[sz-1])) 1142 break; 1143 1144 /* Skip empty strings. */ 1145 1146 if (sz == 0) 1147 return; 1148 1149 if (*dest == NULL) { 1150 *dest = mandoc_strndup(cp, sz); 1151 return; 1152 } 1153 1154 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1155 free(*dest); 1156 *dest = cp; 1157 } 1158 1159 /* --- main functions of the roff parser ---------------------------------- */ 1160 1161 /* 1162 * In the current line, expand escape sequences that produce parsable 1163 * input text. Also check the syntax of the remaining escape sequences, 1164 * which typically produce output glyphs or change formatter state. 1165 */ 1166 static int 1167 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc) 1168 { 1169 struct mctx *ctx; /* current macro call context */ 1170 char ubuf[24]; /* buffer to print the number */ 1171 struct roff_node *n; /* used for header comments */ 1172 const char *start; /* start of the string to process */ 1173 char *stesc; /* start of an escape sequence ('\\') */ 1174 const char *esct; /* type of esccape sequence */ 1175 char *ep; /* end of comment string */ 1176 const char *stnam; /* start of the name, after "[(*" */ 1177 const char *cp; /* end of the name, e.g. before ']' */ 1178 const char *res; /* the string to be substituted */ 1179 char *nbuf; /* new buffer to copy buf->buf to */ 1180 size_t maxl; /* expected length of the escape name */ 1181 size_t naml; /* actual length of the escape name */ 1182 size_t asz; /* length of the replacement */ 1183 size_t rsz; /* length of the rest of the string */ 1184 int inaml; /* length returned from mandoc_escape() */ 1185 int expand_count; /* to avoid infinite loops */ 1186 int npos; /* position in numeric expression */ 1187 int arg_complete; /* argument not interrupted by eol */ 1188 int quote_args; /* true for \\$@, false for \\$* */ 1189 int done; /* no more input available */ 1190 int deftype; /* type of definition to paste */ 1191 int rcsid; /* kind of RCS id seen */ 1192 enum mandocerr err; /* for escape sequence problems */ 1193 char sign; /* increment number register */ 1194 char term; /* character terminating the escape */ 1195 1196 /* Search forward for comments. */ 1197 1198 done = 0; 1199 start = buf->buf + pos; 1200 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { 1201 if (stesc[0] != newesc || stesc[1] == '\0') 1202 continue; 1203 stesc++; 1204 if (*stesc != '"' && *stesc != '#') 1205 continue; 1206 1207 /* Comment found, look for RCS id. */ 1208 1209 rcsid = 0; 1210 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) { 1211 rcsid = 1 << MANDOC_OS_OPENBSD; 1212 cp += 8; 1213 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) { 1214 rcsid = 1 << MANDOC_OS_NETBSD; 1215 cp += 7; 1216 } 1217 if (cp != NULL && 1218 isalnum((unsigned char)*cp) == 0 && 1219 strchr(cp, '$') != NULL) { 1220 if (r->man->meta.rcsids & rcsid) 1221 mandoc_msg(MANDOCERR_RCS_REP, ln, 1222 (int)(stesc - buf->buf) + 1, 1223 "%s", stesc + 1); 1224 r->man->meta.rcsids |= rcsid; 1225 } 1226 1227 /* Handle trailing whitespace. */ 1228 1229 ep = strchr(stesc--, '\0') - 1; 1230 if (*ep == '\n') { 1231 done = 1; 1232 ep--; 1233 } 1234 if (*ep == ' ' || *ep == '\t') 1235 mandoc_msg(MANDOCERR_SPACE_EOL, 1236 ln, (int)(ep - buf->buf), NULL); 1237 1238 /* 1239 * Save comments preceding the title macro 1240 * in the syntax tree. 1241 */ 1242 1243 if (newesc != ASCII_ESC && r->format == 0) { 1244 while (*ep == ' ' || *ep == '\t') 1245 ep--; 1246 ep[1] = '\0'; 1247 n = roff_node_alloc(r->man, 1248 ln, stesc + 1 - buf->buf, 1249 ROFFT_COMMENT, TOKEN_NONE); 1250 n->string = mandoc_strdup(stesc + 2); 1251 roff_node_append(r->man, n); 1252 n->flags |= NODE_VALID | NODE_ENDED; 1253 r->man->next = ROFF_NEXT_SIBLING; 1254 } 1255 1256 /* Line continuation with comment. */ 1257 1258 if (stesc[1] == '#') { 1259 *stesc = '\0'; 1260 return ROFF_IGN | ROFF_APPEND; 1261 } 1262 1263 /* Discard normal comments. */ 1264 1265 while (stesc > start && stesc[-1] == ' ' && 1266 (stesc == start + 1 || stesc[-2] != '\\')) 1267 stesc--; 1268 *stesc = '\0'; 1269 break; 1270 } 1271 if (stesc == start) 1272 return ROFF_CONT; 1273 stesc--; 1274 1275 /* Notice the end of the input. */ 1276 1277 if (*stesc == '\n') { 1278 *stesc-- = '\0'; 1279 done = 1; 1280 } 1281 1282 expand_count = 0; 1283 while (stesc >= start) { 1284 if (*stesc != newesc) { 1285 1286 /* 1287 * If we have a non-standard escape character, 1288 * escape literal backslashes because all 1289 * processing in subsequent functions uses 1290 * the standard escaping rules. 1291 */ 1292 1293 if (newesc != ASCII_ESC && *stesc == '\\') { 1294 *stesc = '\0'; 1295 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", 1296 buf->buf, stesc + 1) + 1; 1297 start = nbuf + pos; 1298 stesc = nbuf + (stesc - buf->buf); 1299 free(buf->buf); 1300 buf->buf = nbuf; 1301 } 1302 1303 /* Search backwards for the next escape. */ 1304 1305 stesc--; 1306 continue; 1307 } 1308 1309 /* If it is escaped, skip it. */ 1310 1311 for (cp = stesc - 1; cp >= start; cp--) 1312 if (*cp != r->escape) 1313 break; 1314 1315 if ((stesc - cp) % 2 == 0) { 1316 while (stesc > cp) 1317 *stesc-- = '\\'; 1318 continue; 1319 } else if (stesc[1] != '\0') { 1320 *stesc = '\\'; 1321 } else { 1322 *stesc-- = '\0'; 1323 if (done) 1324 continue; 1325 else 1326 return ROFF_IGN | ROFF_APPEND; 1327 } 1328 1329 /* Decide whether to expand or to check only. */ 1330 1331 term = '\0'; 1332 cp = stesc + 1; 1333 if (*cp == 'E') 1334 cp++; 1335 esct = cp; 1336 switch (*esct) { 1337 case '*': 1338 case '$': 1339 res = NULL; 1340 break; 1341 case 'B': 1342 case 'w': 1343 term = cp[1]; 1344 /* FALLTHROUGH */ 1345 case 'n': 1346 sign = cp[1]; 1347 if (sign == '+' || sign == '-') 1348 cp++; 1349 res = ubuf; 1350 break; 1351 default: 1352 err = MANDOCERR_OK; 1353 switch(mandoc_escape(&cp, &stnam, &inaml)) { 1354 case ESCAPE_SPECIAL: 1355 if (mchars_spec2cp(stnam, inaml) >= 0) 1356 break; 1357 /* FALLTHROUGH */ 1358 case ESCAPE_ERROR: 1359 err = MANDOCERR_ESC_BAD; 1360 break; 1361 case ESCAPE_UNDEF: 1362 err = MANDOCERR_ESC_UNDEF; 1363 break; 1364 case ESCAPE_UNSUPP: 1365 err = MANDOCERR_ESC_UNSUPP; 1366 break; 1367 default: 1368 break; 1369 } 1370 if (err != MANDOCERR_OK) 1371 mandoc_msg(err, ln, (int)(stesc - buf->buf), 1372 "%.*s", (int)(cp - stesc), stesc); 1373 stesc--; 1374 continue; 1375 } 1376 1377 if (EXPAND_LIMIT < ++expand_count) { 1378 mandoc_msg(MANDOCERR_ROFFLOOP, 1379 ln, (int)(stesc - buf->buf), NULL); 1380 return ROFF_IGN; 1381 } 1382 1383 /* 1384 * The third character decides the length 1385 * of the name of the string or register. 1386 * Save a pointer to the name. 1387 */ 1388 1389 if (term == '\0') { 1390 switch (*++cp) { 1391 case '\0': 1392 maxl = 0; 1393 break; 1394 case '(': 1395 cp++; 1396 maxl = 2; 1397 break; 1398 case '[': 1399 cp++; 1400 term = ']'; 1401 maxl = 0; 1402 break; 1403 default: 1404 maxl = 1; 1405 break; 1406 } 1407 } else { 1408 cp += 2; 1409 maxl = 0; 1410 } 1411 stnam = cp; 1412 1413 /* Advance to the end of the name. */ 1414 1415 naml = 0; 1416 arg_complete = 1; 1417 while (maxl == 0 || naml < maxl) { 1418 if (*cp == '\0') { 1419 mandoc_msg(MANDOCERR_ESC_BAD, ln, 1420 (int)(stesc - buf->buf), "%s", stesc); 1421 arg_complete = 0; 1422 break; 1423 } 1424 if (maxl == 0 && *cp == term) { 1425 cp++; 1426 break; 1427 } 1428 if (*cp++ != '\\' || *esct != 'w') { 1429 naml++; 1430 continue; 1431 } 1432 switch (mandoc_escape(&cp, NULL, NULL)) { 1433 case ESCAPE_SPECIAL: 1434 case ESCAPE_UNICODE: 1435 case ESCAPE_NUMBERED: 1436 case ESCAPE_UNDEF: 1437 case ESCAPE_OVERSTRIKE: 1438 naml++; 1439 break; 1440 default: 1441 break; 1442 } 1443 } 1444 1445 /* 1446 * Retrieve the replacement string; if it is 1447 * undefined, resume searching for escapes. 1448 */ 1449 1450 switch (*esct) { 1451 case '*': 1452 if (arg_complete) { 1453 deftype = ROFFDEF_USER | ROFFDEF_PRE; 1454 res = roff_getstrn(r, stnam, naml, &deftype); 1455 1456 /* 1457 * If not overriden, let \*(.T 1458 * through to the formatters. 1459 */ 1460 1461 if (res == NULL && naml == 2 && 1462 stnam[0] == '.' && stnam[1] == 'T') { 1463 roff_setstrn(&r->strtab, 1464 ".T", 2, NULL, 0, 0); 1465 stesc--; 1466 continue; 1467 } 1468 } 1469 break; 1470 case '$': 1471 if (r->mstackpos < 0) { 1472 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, 1473 (int)(stesc - buf->buf), "%.3s", stesc); 1474 break; 1475 } 1476 ctx = r->mstack + r->mstackpos; 1477 npos = esct[1] - '1'; 1478 if (npos >= 0 && npos <= 8) { 1479 res = npos < ctx->argc ? 1480 ctx->argv[npos] : ""; 1481 break; 1482 } 1483 if (esct[1] == '*') 1484 quote_args = 0; 1485 else if (esct[1] == '@') 1486 quote_args = 1; 1487 else { 1488 mandoc_msg(MANDOCERR_ARG_NONUM, ln, 1489 (int)(stesc - buf->buf), "%.3s", stesc); 1490 break; 1491 } 1492 asz = 0; 1493 for (npos = 0; npos < ctx->argc; npos++) { 1494 if (npos) 1495 asz++; /* blank */ 1496 if (quote_args) 1497 asz += 2; /* quotes */ 1498 asz += strlen(ctx->argv[npos]); 1499 } 1500 if (asz != 3) { 1501 rsz = buf->sz - (stesc - buf->buf) - 3; 1502 if (asz < 3) 1503 memmove(stesc + asz, stesc + 3, rsz); 1504 buf->sz += asz - 3; 1505 nbuf = mandoc_realloc(buf->buf, buf->sz); 1506 start = nbuf + pos; 1507 stesc = nbuf + (stesc - buf->buf); 1508 buf->buf = nbuf; 1509 if (asz > 3) 1510 memmove(stesc + asz, stesc + 3, rsz); 1511 } 1512 for (npos = 0; npos < ctx->argc; npos++) { 1513 if (npos) 1514 *stesc++ = ' '; 1515 if (quote_args) 1516 *stesc++ = '"'; 1517 cp = ctx->argv[npos]; 1518 while (*cp != '\0') 1519 *stesc++ = *cp++; 1520 if (quote_args) 1521 *stesc++ = '"'; 1522 } 1523 continue; 1524 case 'B': 1525 npos = 0; 1526 ubuf[0] = arg_complete && 1527 roff_evalnum(r, ln, stnam, &npos, 1528 NULL, ROFFNUM_SCALE) && 1529 stnam + npos + 1 == cp ? '1' : '0'; 1530 ubuf[1] = '\0'; 1531 break; 1532 case 'n': 1533 if (arg_complete) 1534 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1535 roff_getregn(r, stnam, naml, sign)); 1536 else 1537 ubuf[0] = '\0'; 1538 break; 1539 case 'w': 1540 /* use even incomplete args */ 1541 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1542 24 * (int)naml); 1543 break; 1544 } 1545 1546 if (res == NULL) { 1547 if (*esct == '*') 1548 mandoc_msg(MANDOCERR_STR_UNDEF, 1549 ln, (int)(stesc - buf->buf), 1550 "%.*s", (int)naml, stnam); 1551 res = ""; 1552 } else if (buf->sz + strlen(res) > SHRT_MAX) { 1553 mandoc_msg(MANDOCERR_ROFFLOOP, 1554 ln, (int)(stesc - buf->buf), NULL); 1555 return ROFF_IGN; 1556 } 1557 1558 /* Replace the escape sequence by the string. */ 1559 1560 *stesc = '\0'; 1561 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", 1562 buf->buf, res, cp) + 1; 1563 1564 /* Prepare for the next replacement. */ 1565 1566 start = nbuf + pos; 1567 stesc = nbuf + (stesc - buf->buf) + strlen(res); 1568 free(buf->buf); 1569 buf->buf = nbuf; 1570 } 1571 return ROFF_CONT; 1572 } 1573 1574 /* 1575 * Parse a quoted or unquoted roff-style request or macro argument. 1576 * Return a pointer to the parsed argument, which is either the original 1577 * pointer or advanced by one byte in case the argument is quoted. 1578 * NUL-terminate the argument in place. 1579 * Collapse pairs of quotes inside quoted arguments. 1580 * Advance the argument pointer to the next argument, 1581 * or to the NUL byte terminating the argument line. 1582 */ 1583 char * 1584 roff_getarg(struct roff *r, char **cpp, int ln, int *pos) 1585 { 1586 struct buf buf; 1587 char *cp, *start; 1588 int newesc, pairs, quoted, white; 1589 1590 /* Quoting can only start with a new word. */ 1591 start = *cpp; 1592 quoted = 0; 1593 if ('"' == *start) { 1594 quoted = 1; 1595 start++; 1596 } 1597 1598 newesc = pairs = white = 0; 1599 for (cp = start; '\0' != *cp; cp++) { 1600 1601 /* 1602 * Move the following text left 1603 * after quoted quotes and after "\\" and "\t". 1604 */ 1605 if (pairs) 1606 cp[-pairs] = cp[0]; 1607 1608 if ('\\' == cp[0]) { 1609 /* 1610 * In copy mode, translate double to single 1611 * backslashes and backslash-t to literal tabs. 1612 */ 1613 switch (cp[1]) { 1614 case 'a': 1615 case 't': 1616 cp[-pairs] = '\t'; 1617 pairs++; 1618 cp++; 1619 break; 1620 case '\\': 1621 newesc = 1; 1622 cp[-pairs] = ASCII_ESC; 1623 pairs++; 1624 cp++; 1625 break; 1626 case ' ': 1627 /* Skip escaped blanks. */ 1628 if (0 == quoted) 1629 cp++; 1630 break; 1631 default: 1632 break; 1633 } 1634 } else if (0 == quoted) { 1635 if (' ' == cp[0]) { 1636 /* Unescaped blanks end unquoted args. */ 1637 white = 1; 1638 break; 1639 } 1640 } else if ('"' == cp[0]) { 1641 if ('"' == cp[1]) { 1642 /* Quoted quotes collapse. */ 1643 pairs++; 1644 cp++; 1645 } else { 1646 /* Unquoted quotes end quoted args. */ 1647 quoted = 2; 1648 break; 1649 } 1650 } 1651 } 1652 1653 /* Quoted argument without a closing quote. */ 1654 if (1 == quoted) 1655 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL); 1656 1657 /* NUL-terminate this argument and move to the next one. */ 1658 if (pairs) 1659 cp[-pairs] = '\0'; 1660 if ('\0' != *cp) { 1661 *cp++ = '\0'; 1662 while (' ' == *cp) 1663 cp++; 1664 } 1665 *pos += (int)(cp - start) + (quoted ? 1 : 0); 1666 *cpp = cp; 1667 1668 if ('\0' == *cp && (white || ' ' == cp[-1])) 1669 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); 1670 1671 start = mandoc_strdup(start); 1672 if (newesc == 0) 1673 return start; 1674 1675 buf.buf = start; 1676 buf.sz = strlen(start) + 1; 1677 buf.next = NULL; 1678 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { 1679 free(buf.buf); 1680 buf.buf = mandoc_strdup(""); 1681 } 1682 return buf.buf; 1683 } 1684 1685 1686 /* 1687 * Process text streams. 1688 */ 1689 static int 1690 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) 1691 { 1692 size_t sz; 1693 const char *start; 1694 char *p; 1695 int isz; 1696 enum mandoc_esc esc; 1697 1698 /* Spring the input line trap. */ 1699 1700 if (roffit_lines == 1) { 1701 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1702 free(buf->buf); 1703 buf->buf = p; 1704 buf->sz = isz + 1; 1705 *offs = 0; 1706 free(roffit_macro); 1707 roffit_lines = 0; 1708 return ROFF_REPARSE; 1709 } else if (roffit_lines > 1) 1710 --roffit_lines; 1711 1712 if (roffce_node != NULL && buf->buf[pos] != '\0') { 1713 if (roffce_lines < 1) { 1714 r->man->last = roffce_node; 1715 r->man->next = ROFF_NEXT_SIBLING; 1716 roffce_lines = 0; 1717 roffce_node = NULL; 1718 } else 1719 roffce_lines--; 1720 } 1721 1722 /* Convert all breakable hyphens into ASCII_HYPH. */ 1723 1724 start = p = buf->buf + pos; 1725 1726 while (*p != '\0') { 1727 sz = strcspn(p, "-\\"); 1728 p += sz; 1729 1730 if (*p == '\0') 1731 break; 1732 1733 if (*p == '\\') { 1734 /* Skip over escapes. */ 1735 p++; 1736 esc = mandoc_escape((const char **)&p, NULL, NULL); 1737 if (esc == ESCAPE_ERROR) 1738 break; 1739 while (*p == '-') 1740 p++; 1741 continue; 1742 } else if (p == start) { 1743 p++; 1744 continue; 1745 } 1746 1747 if (isalpha((unsigned char)p[-1]) && 1748 isalpha((unsigned char)p[1])) 1749 *p = ASCII_HYPH; 1750 p++; 1751 } 1752 return ROFF_CONT; 1753 } 1754 1755 int 1756 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) 1757 { 1758 enum roff_tok t; 1759 int e; 1760 int pos; /* parse point */ 1761 int spos; /* saved parse point for messages */ 1762 int ppos; /* original offset in buf->buf */ 1763 int ctl; /* macro line (boolean) */ 1764 1765 ppos = pos = *offs; 1766 1767 /* Handle in-line equation delimiters. */ 1768 1769 if (r->tbl == NULL && 1770 r->last_eqn != NULL && r->last_eqn->delim && 1771 (r->eqn == NULL || r->eqn_inline)) { 1772 e = roff_eqndelim(r, buf, pos); 1773 if (e == ROFF_REPARSE) 1774 return e; 1775 assert(e == ROFF_CONT); 1776 } 1777 1778 /* Expand some escape sequences. */ 1779 1780 e = roff_expand(r, buf, ln, pos, r->escape); 1781 if ((e & ROFF_MASK) == ROFF_IGN) 1782 return e; 1783 assert(e == ROFF_CONT); 1784 1785 ctl = roff_getcontrol(r, buf->buf, &pos); 1786 1787 /* 1788 * First, if a scope is open and we're not a macro, pass the 1789 * text through the macro's filter. 1790 * Equations process all content themselves. 1791 * Tables process almost all content themselves, but we want 1792 * to warn about macros before passing it there. 1793 */ 1794 1795 if (r->last != NULL && ! ctl) { 1796 t = r->last->tok; 1797 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1798 if ((e & ROFF_MASK) == ROFF_IGN) 1799 return e; 1800 e &= ~ROFF_MASK; 1801 } else 1802 e = ROFF_IGN; 1803 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { 1804 eqn_read(r->eqn, buf->buf + ppos); 1805 return e; 1806 } 1807 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { 1808 tbl_read(r->tbl, ln, buf->buf, ppos); 1809 roff_addtbl(r->man, ln, r->tbl); 1810 return e; 1811 } 1812 if ( ! ctl) 1813 return roff_parsetext(r, buf, pos, offs) | e; 1814 1815 /* Skip empty request lines. */ 1816 1817 if (buf->buf[pos] == '"') { 1818 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL); 1819 return ROFF_IGN; 1820 } else if (buf->buf[pos] == '\0') 1821 return ROFF_IGN; 1822 1823 /* 1824 * If a scope is open, go to the child handler for that macro, 1825 * as it may want to preprocess before doing anything with it. 1826 * Don't do so if an equation is open. 1827 */ 1828 1829 if (r->last) { 1830 t = r->last->tok; 1831 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1832 } 1833 1834 /* No scope is open. This is a new request or macro. */ 1835 1836 spos = pos; 1837 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1838 1839 /* Tables ignore most macros. */ 1840 1841 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS || 1842 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) { 1843 mandoc_msg(MANDOCERR_TBLMACRO, 1844 ln, pos, "%s", buf->buf + spos); 1845 if (t != TOKEN_NONE) 1846 return ROFF_IGN; 1847 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1848 pos++; 1849 while (buf->buf[pos] == ' ') 1850 pos++; 1851 tbl_read(r->tbl, ln, buf->buf, pos); 1852 roff_addtbl(r->man, ln, r->tbl); 1853 return ROFF_IGN; 1854 } 1855 1856 /* For now, let high level macros abort .ce mode. */ 1857 1858 if (ctl && roffce_node != NULL && 1859 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 1860 t == ROFF_TH || t == ROFF_TS)) { 1861 r->man->last = roffce_node; 1862 r->man->next = ROFF_NEXT_SIBLING; 1863 roffce_lines = 0; 1864 roffce_node = NULL; 1865 } 1866 1867 /* 1868 * This is neither a roff request nor a user-defined macro. 1869 * Let the standard macro set parsers handle it. 1870 */ 1871 1872 if (t == TOKEN_NONE) 1873 return ROFF_CONT; 1874 1875 /* Execute a roff request or a user defined macro. */ 1876 1877 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); 1878 } 1879 1880 /* 1881 * Internal interface function to tell the roff parser that execution 1882 * of the current macro ended. This is required because macro 1883 * definitions usually do not end with a .return request. 1884 */ 1885 void 1886 roff_userret(struct roff *r) 1887 { 1888 struct mctx *ctx; 1889 int i; 1890 1891 assert(r->mstackpos >= 0); 1892 ctx = r->mstack + r->mstackpos; 1893 for (i = 0; i < ctx->argc; i++) 1894 free(ctx->argv[i]); 1895 ctx->argc = 0; 1896 r->mstackpos--; 1897 } 1898 1899 void 1900 roff_endparse(struct roff *r) 1901 { 1902 if (r->last != NULL) 1903 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line, 1904 r->last->col, "%s", roff_name[r->last->tok]); 1905 1906 if (r->eqn != NULL) { 1907 mandoc_msg(MANDOCERR_BLK_NOEND, 1908 r->eqn->node->line, r->eqn->node->pos, "EQ"); 1909 eqn_parse(r->eqn); 1910 r->eqn = NULL; 1911 } 1912 1913 if (r->tbl != NULL) { 1914 tbl_end(r->tbl, 1); 1915 r->tbl = NULL; 1916 } 1917 } 1918 1919 /* 1920 * Parse a roff node's type from the input buffer. This must be in the 1921 * form of ".foo xxx" in the usual way. 1922 */ 1923 static enum roff_tok 1924 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 1925 { 1926 char *cp; 1927 const char *mac; 1928 size_t maclen; 1929 int deftype; 1930 enum roff_tok t; 1931 1932 cp = buf + *pos; 1933 1934 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 1935 return TOKEN_NONE; 1936 1937 mac = cp; 1938 maclen = roff_getname(r, &cp, ln, ppos); 1939 1940 deftype = ROFFDEF_USER | ROFFDEF_REN; 1941 r->current_string = roff_getstrn(r, mac, maclen, &deftype); 1942 switch (deftype) { 1943 case ROFFDEF_USER: 1944 t = ROFF_USERDEF; 1945 break; 1946 case ROFFDEF_REN: 1947 t = ROFF_RENAMED; 1948 break; 1949 default: 1950 t = roffhash_find(r->reqtab, mac, maclen); 1951 break; 1952 } 1953 if (t != TOKEN_NONE) 1954 *pos = cp - buf; 1955 else if (deftype == ROFFDEF_UNDEF) { 1956 /* Using an undefined macro defines it to be empty. */ 1957 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); 1958 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); 1959 } 1960 return t; 1961 } 1962 1963 /* --- handling of request blocks ----------------------------------------- */ 1964 1965 static int 1966 roff_cblock(ROFF_ARGS) 1967 { 1968 1969 /* 1970 * A block-close `..' should only be invoked as a child of an 1971 * ignore macro, otherwise raise a warning and just ignore it. 1972 */ 1973 1974 if (r->last == NULL) { 1975 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 1976 return ROFF_IGN; 1977 } 1978 1979 switch (r->last->tok) { 1980 case ROFF_am: 1981 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */ 1982 case ROFF_ami: 1983 case ROFF_de: 1984 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 1985 case ROFF_dei: 1986 case ROFF_ig: 1987 break; 1988 default: 1989 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 1990 return ROFF_IGN; 1991 } 1992 1993 if (buf->buf[pos] != '\0') 1994 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 1995 ".. %s", buf->buf + pos); 1996 1997 roffnode_pop(r); 1998 roffnode_cleanscope(r); 1999 return ROFF_IGN; 2000 2001 } 2002 2003 static int 2004 roffnode_cleanscope(struct roff *r) 2005 { 2006 int inloop; 2007 2008 inloop = 0; 2009 while (r->last != NULL) { 2010 if (--r->last->endspan != 0) 2011 break; 2012 inloop += roffnode_pop(r); 2013 } 2014 return inloop; 2015 } 2016 2017 static int 2018 roff_ccond(struct roff *r, int ln, int ppos) 2019 { 2020 if (NULL == r->last) { 2021 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2022 return 0; 2023 } 2024 2025 switch (r->last->tok) { 2026 case ROFF_el: 2027 case ROFF_ie: 2028 case ROFF_if: 2029 case ROFF_while: 2030 break; 2031 default: 2032 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2033 return 0; 2034 } 2035 2036 if (r->last->endspan > -1) { 2037 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2038 return 0; 2039 } 2040 2041 return roffnode_pop(r) + roffnode_cleanscope(r); 2042 } 2043 2044 static int 2045 roff_block(ROFF_ARGS) 2046 { 2047 const char *name, *value; 2048 char *call, *cp, *iname, *rname; 2049 size_t csz, namesz, rsz; 2050 int deftype; 2051 2052 /* Ignore groff compatibility mode for now. */ 2053 2054 if (tok == ROFF_de1) 2055 tok = ROFF_de; 2056 else if (tok == ROFF_dei1) 2057 tok = ROFF_dei; 2058 else if (tok == ROFF_am1) 2059 tok = ROFF_am; 2060 else if (tok == ROFF_ami1) 2061 tok = ROFF_ami; 2062 2063 /* Parse the macro name argument. */ 2064 2065 cp = buf->buf + pos; 2066 if (tok == ROFF_ig) { 2067 iname = NULL; 2068 namesz = 0; 2069 } else { 2070 iname = cp; 2071 namesz = roff_getname(r, &cp, ln, ppos); 2072 iname[namesz] = '\0'; 2073 } 2074 2075 /* Resolve the macro name argument if it is indirect. */ 2076 2077 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2078 deftype = ROFFDEF_USER; 2079 name = roff_getstrn(r, iname, namesz, &deftype); 2080 if (name == NULL) { 2081 mandoc_msg(MANDOCERR_STR_UNDEF, 2082 ln, (int)(iname - buf->buf), 2083 "%.*s", (int)namesz, iname); 2084 namesz = 0; 2085 } else 2086 namesz = strlen(name); 2087 } else 2088 name = iname; 2089 2090 if (namesz == 0 && tok != ROFF_ig) { 2091 mandoc_msg(MANDOCERR_REQ_EMPTY, 2092 ln, ppos, "%s", roff_name[tok]); 2093 return ROFF_IGN; 2094 } 2095 2096 roffnode_push(r, tok, name, ln, ppos); 2097 2098 /* 2099 * At the beginning of a `de' macro, clear the existing string 2100 * with the same name, if there is one. New content will be 2101 * appended from roff_block_text() in multiline mode. 2102 */ 2103 2104 if (tok == ROFF_de || tok == ROFF_dei) { 2105 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 2106 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2107 } else if (tok == ROFF_am || tok == ROFF_ami) { 2108 deftype = ROFFDEF_ANY; 2109 value = roff_getstrn(r, iname, namesz, &deftype); 2110 switch (deftype) { /* Before appending, ... */ 2111 case ROFFDEF_PRE: /* copy predefined to user-defined. */ 2112 roff_setstrn(&r->strtab, name, namesz, 2113 value, strlen(value), 0); 2114 break; 2115 case ROFFDEF_REN: /* call original standard macro. */ 2116 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2117 (int)strlen(value), value); 2118 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2119 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2120 free(call); 2121 break; 2122 case ROFFDEF_STD: /* rename and call standard macro. */ 2123 rsz = mandoc_asprintf(&rname, "__%s_renamed", name); 2124 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); 2125 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2126 (int)rsz, rname); 2127 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2128 free(call); 2129 free(rname); 2130 break; 2131 default: 2132 break; 2133 } 2134 } 2135 2136 if (*cp == '\0') 2137 return ROFF_IGN; 2138 2139 /* Get the custom end marker. */ 2140 2141 iname = cp; 2142 namesz = roff_getname(r, &cp, ln, ppos); 2143 2144 /* Resolve the end marker if it is indirect. */ 2145 2146 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2147 deftype = ROFFDEF_USER; 2148 name = roff_getstrn(r, iname, namesz, &deftype); 2149 if (name == NULL) { 2150 mandoc_msg(MANDOCERR_STR_UNDEF, 2151 ln, (int)(iname - buf->buf), 2152 "%.*s", (int)namesz, iname); 2153 namesz = 0; 2154 } else 2155 namesz = strlen(name); 2156 } else 2157 name = iname; 2158 2159 if (namesz) 2160 r->last->end = mandoc_strndup(name, namesz); 2161 2162 if (*cp != '\0') 2163 mandoc_msg(MANDOCERR_ARG_EXCESS, 2164 ln, pos, ".%s ... %s", roff_name[tok], cp); 2165 2166 return ROFF_IGN; 2167 } 2168 2169 static int 2170 roff_block_sub(ROFF_ARGS) 2171 { 2172 enum roff_tok t; 2173 int i, j; 2174 2175 /* 2176 * First check whether a custom macro exists at this level. If 2177 * it does, then check against it. This is some of groff's 2178 * stranger behaviours. If we encountered a custom end-scope 2179 * tag and that tag also happens to be a "real" macro, then we 2180 * need to try interpreting it again as a real macro. If it's 2181 * not, then return ignore. Else continue. 2182 */ 2183 2184 if (r->last->end) { 2185 for (i = pos, j = 0; r->last->end[j]; j++, i++) 2186 if (buf->buf[i] != r->last->end[j]) 2187 break; 2188 2189 if (r->last->end[j] == '\0' && 2190 (buf->buf[i] == '\0' || 2191 buf->buf[i] == ' ' || 2192 buf->buf[i] == '\t')) { 2193 roffnode_pop(r); 2194 roffnode_cleanscope(r); 2195 2196 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 2197 i++; 2198 2199 pos = i; 2200 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 2201 TOKEN_NONE) 2202 return ROFF_RERUN; 2203 return ROFF_IGN; 2204 } 2205 } 2206 2207 /* 2208 * If we have no custom end-query or lookup failed, then try 2209 * pulling it out of the hashtable. 2210 */ 2211 2212 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2213 2214 if (t != ROFF_cblock) { 2215 if (tok != ROFF_ig) 2216 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 2217 return ROFF_IGN; 2218 } 2219 2220 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2221 } 2222 2223 static int 2224 roff_block_text(ROFF_ARGS) 2225 { 2226 2227 if (tok != ROFF_ig) 2228 roff_setstr(r, r->last->name, buf->buf + pos, 2); 2229 2230 return ROFF_IGN; 2231 } 2232 2233 static int 2234 roff_cond_sub(ROFF_ARGS) 2235 { 2236 char *ep; 2237 int endloop, irc, rr; 2238 enum roff_tok t; 2239 2240 irc = ROFF_IGN; 2241 rr = r->last->rule; 2242 endloop = tok != ROFF_while ? ROFF_IGN : 2243 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2244 if (roffnode_cleanscope(r)) 2245 irc |= endloop; 2246 2247 /* 2248 * If `\}' occurs on a macro line without a preceding macro, 2249 * drop the line completely. 2250 */ 2251 2252 ep = buf->buf + pos; 2253 if (ep[0] == '\\' && ep[1] == '}') 2254 rr = 0; 2255 2256 /* 2257 * The closing delimiter `\}' rewinds the conditional scope 2258 * but is otherwise ignored when interpreting the line. 2259 */ 2260 2261 while ((ep = strchr(ep, '\\')) != NULL) { 2262 switch (ep[1]) { 2263 case '}': 2264 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2265 if (roff_ccond(r, ln, ep - buf->buf)) 2266 irc |= endloop; 2267 break; 2268 case '\0': 2269 ++ep; 2270 break; 2271 default: 2272 ep += 2; 2273 break; 2274 } 2275 } 2276 2277 /* 2278 * Fully handle known macros when they are structurally 2279 * required or when the conditional evaluated to true. 2280 */ 2281 2282 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2283 irc |= t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT) ? 2284 (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) : 2285 rr ? ROFF_CONT : ROFF_IGN; 2286 return irc; 2287 } 2288 2289 static int 2290 roff_cond_text(ROFF_ARGS) 2291 { 2292 char *ep; 2293 int endloop, irc, rr; 2294 2295 irc = ROFF_IGN; 2296 rr = r->last->rule; 2297 endloop = tok != ROFF_while ? ROFF_IGN : 2298 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2299 if (roffnode_cleanscope(r)) 2300 irc |= endloop; 2301 2302 /* 2303 * If `\}' occurs on a text line with neither preceding 2304 * nor following characters, drop the line completely. 2305 */ 2306 2307 ep = buf->buf + pos; 2308 if (strcmp(ep, "\\}") == 0) 2309 rr = 0; 2310 2311 /* 2312 * The closing delimiter `\}' rewinds the conditional scope 2313 * but is otherwise ignored when interpreting the line. 2314 */ 2315 2316 while ((ep = strchr(ep, '\\')) != NULL) { 2317 switch (ep[1]) { 2318 case '}': 2319 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2320 if (roff_ccond(r, ln, ep - buf->buf)) 2321 irc |= endloop; 2322 break; 2323 case '\0': 2324 ++ep; 2325 break; 2326 default: 2327 ep += 2; 2328 break; 2329 } 2330 } 2331 if (rr) 2332 irc |= ROFF_CONT; 2333 return irc; 2334 } 2335 2336 /* --- handling of numeric and conditional expressions -------------------- */ 2337 2338 /* 2339 * Parse a single signed integer number. Stop at the first non-digit. 2340 * If there is at least one digit, return success and advance the 2341 * parse point, else return failure and let the parse point unchanged. 2342 * Ignore overflows, treat them just like the C language. 2343 */ 2344 static int 2345 roff_getnum(const char *v, int *pos, int *res, int flags) 2346 { 2347 int myres, scaled, n, p; 2348 2349 if (NULL == res) 2350 res = &myres; 2351 2352 p = *pos; 2353 n = v[p] == '-'; 2354 if (n || v[p] == '+') 2355 p++; 2356 2357 if (flags & ROFFNUM_WHITE) 2358 while (isspace((unsigned char)v[p])) 2359 p++; 2360 2361 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2362 *res = 10 * *res + v[p] - '0'; 2363 if (p == *pos + n) 2364 return 0; 2365 2366 if (n) 2367 *res = -*res; 2368 2369 /* Each number may be followed by one optional scaling unit. */ 2370 2371 switch (v[p]) { 2372 case 'f': 2373 scaled = *res * 65536; 2374 break; 2375 case 'i': 2376 scaled = *res * 240; 2377 break; 2378 case 'c': 2379 scaled = *res * 240 / 2.54; 2380 break; 2381 case 'v': 2382 case 'P': 2383 scaled = *res * 40; 2384 break; 2385 case 'm': 2386 case 'n': 2387 scaled = *res * 24; 2388 break; 2389 case 'p': 2390 scaled = *res * 10 / 3; 2391 break; 2392 case 'u': 2393 scaled = *res; 2394 break; 2395 case 'M': 2396 scaled = *res * 6 / 25; 2397 break; 2398 default: 2399 scaled = *res; 2400 p--; 2401 break; 2402 } 2403 if (flags & ROFFNUM_SCALE) 2404 *res = scaled; 2405 2406 *pos = p + 1; 2407 return 1; 2408 } 2409 2410 /* 2411 * Evaluate a string comparison condition. 2412 * The first character is the delimiter. 2413 * Succeed if the string up to its second occurrence 2414 * matches the string up to its third occurence. 2415 * Advance the cursor after the third occurrence 2416 * or lacking that, to the end of the line. 2417 */ 2418 static int 2419 roff_evalstrcond(const char *v, int *pos) 2420 { 2421 const char *s1, *s2, *s3; 2422 int match; 2423 2424 match = 0; 2425 s1 = v + *pos; /* initial delimiter */ 2426 s2 = s1 + 1; /* for scanning the first string */ 2427 s3 = strchr(s2, *s1); /* for scanning the second string */ 2428 2429 if (NULL == s3) /* found no middle delimiter */ 2430 goto out; 2431 2432 while ('\0' != *++s3) { 2433 if (*s2 != *s3) { /* mismatch */ 2434 s3 = strchr(s3, *s1); 2435 break; 2436 } 2437 if (*s3 == *s1) { /* found the final delimiter */ 2438 match = 1; 2439 break; 2440 } 2441 s2++; 2442 } 2443 2444 out: 2445 if (NULL == s3) 2446 s3 = strchr(s2, '\0'); 2447 else if (*s3 != '\0') 2448 s3++; 2449 *pos = s3 - v; 2450 return match; 2451 } 2452 2453 /* 2454 * Evaluate an optionally negated single character, numerical, 2455 * or string condition. 2456 */ 2457 static int 2458 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2459 { 2460 const char *start, *end; 2461 char *cp, *name; 2462 size_t sz; 2463 int deftype, len, number, savepos, istrue, wanttrue; 2464 2465 if ('!' == v[*pos]) { 2466 wanttrue = 0; 2467 (*pos)++; 2468 } else 2469 wanttrue = 1; 2470 2471 switch (v[*pos]) { 2472 case '\0': 2473 return 0; 2474 case 'n': 2475 case 'o': 2476 (*pos)++; 2477 return wanttrue; 2478 case 'e': 2479 case 't': 2480 case 'v': 2481 (*pos)++; 2482 return !wanttrue; 2483 case 'c': 2484 do { 2485 (*pos)++; 2486 } while (v[*pos] == ' '); 2487 2488 /* 2489 * Quirk for groff compatibility: 2490 * The horizontal tab is neither available nor unavailable. 2491 */ 2492 2493 if (v[*pos] == '\t') { 2494 (*pos)++; 2495 return 0; 2496 } 2497 2498 /* Printable ASCII characters are available. */ 2499 2500 if (v[*pos] != '\\') { 2501 (*pos)++; 2502 return wanttrue; 2503 } 2504 2505 end = v + ++*pos; 2506 switch (mandoc_escape(&end, &start, &len)) { 2507 case ESCAPE_SPECIAL: 2508 istrue = mchars_spec2cp(start, len) != -1; 2509 break; 2510 case ESCAPE_UNICODE: 2511 istrue = 1; 2512 break; 2513 case ESCAPE_NUMBERED: 2514 istrue = mchars_num2char(start, len) != -1; 2515 break; 2516 default: 2517 istrue = !wanttrue; 2518 break; 2519 } 2520 *pos = end - v; 2521 return istrue == wanttrue; 2522 case 'd': 2523 case 'r': 2524 cp = v + *pos + 1; 2525 while (*cp == ' ') 2526 cp++; 2527 name = cp; 2528 sz = roff_getname(r, &cp, ln, cp - v); 2529 if (sz == 0) 2530 istrue = 0; 2531 else if (v[*pos] == 'r') 2532 istrue = roff_hasregn(r, name, sz); 2533 else { 2534 deftype = ROFFDEF_ANY; 2535 roff_getstrn(r, name, sz, &deftype); 2536 istrue = !!deftype; 2537 } 2538 *pos = (name + sz) - v; 2539 return istrue == wanttrue; 2540 default: 2541 break; 2542 } 2543 2544 savepos = *pos; 2545 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2546 return (number > 0) == wanttrue; 2547 else if (*pos == savepos) 2548 return roff_evalstrcond(v, pos) == wanttrue; 2549 else 2550 return 0; 2551 } 2552 2553 static int 2554 roff_line_ignore(ROFF_ARGS) 2555 { 2556 2557 return ROFF_IGN; 2558 } 2559 2560 static int 2561 roff_insec(ROFF_ARGS) 2562 { 2563 2564 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]); 2565 return ROFF_IGN; 2566 } 2567 2568 static int 2569 roff_unsupp(ROFF_ARGS) 2570 { 2571 2572 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]); 2573 return ROFF_IGN; 2574 } 2575 2576 static int 2577 roff_cond(ROFF_ARGS) 2578 { 2579 int irc; 2580 2581 roffnode_push(r, tok, NULL, ln, ppos); 2582 2583 /* 2584 * An `.el' has no conditional body: it will consume the value 2585 * of the current rstack entry set in prior `ie' calls or 2586 * defaults to DENY. 2587 * 2588 * If we're not an `el', however, then evaluate the conditional. 2589 */ 2590 2591 r->last->rule = tok == ROFF_el ? 2592 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2593 roff_evalcond(r, ln, buf->buf, &pos); 2594 2595 /* 2596 * An if-else will put the NEGATION of the current evaluated 2597 * conditional into the stack of rules. 2598 */ 2599 2600 if (tok == ROFF_ie) { 2601 if (r->rstackpos + 1 == r->rstacksz) { 2602 r->rstacksz += 16; 2603 r->rstack = mandoc_reallocarray(r->rstack, 2604 r->rstacksz, sizeof(int)); 2605 } 2606 r->rstack[++r->rstackpos] = !r->last->rule; 2607 } 2608 2609 /* If the parent has false as its rule, then so do we. */ 2610 2611 if (r->last->parent && !r->last->parent->rule) 2612 r->last->rule = 0; 2613 2614 /* 2615 * Determine scope. 2616 * If there is nothing on the line after the conditional, 2617 * not even whitespace, use next-line scope. 2618 * Except that .while does not support next-line scope. 2619 */ 2620 2621 if (buf->buf[pos] == '\0' && tok != ROFF_while) { 2622 r->last->endspan = 2; 2623 goto out; 2624 } 2625 2626 while (buf->buf[pos] == ' ') 2627 pos++; 2628 2629 /* An opening brace requests multiline scope. */ 2630 2631 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2632 r->last->endspan = -1; 2633 pos += 2; 2634 while (buf->buf[pos] == ' ') 2635 pos++; 2636 goto out; 2637 } 2638 2639 /* 2640 * Anything else following the conditional causes 2641 * single-line scope. Warn if the scope contains 2642 * nothing but trailing whitespace. 2643 */ 2644 2645 if (buf->buf[pos] == '\0') 2646 mandoc_msg(MANDOCERR_COND_EMPTY, 2647 ln, ppos, "%s", roff_name[tok]); 2648 2649 r->last->endspan = 1; 2650 2651 out: 2652 *offs = pos; 2653 irc = ROFF_RERUN; 2654 if (tok == ROFF_while) 2655 irc |= ROFF_WHILE; 2656 return irc; 2657 } 2658 2659 static int 2660 roff_ds(ROFF_ARGS) 2661 { 2662 char *string; 2663 const char *name; 2664 size_t namesz; 2665 2666 /* Ignore groff compatibility mode for now. */ 2667 2668 if (tok == ROFF_ds1) 2669 tok = ROFF_ds; 2670 else if (tok == ROFF_as1) 2671 tok = ROFF_as; 2672 2673 /* 2674 * The first word is the name of the string. 2675 * If it is empty or terminated by an escape sequence, 2676 * abort the `ds' request without defining anything. 2677 */ 2678 2679 name = string = buf->buf + pos; 2680 if (*name == '\0') 2681 return ROFF_IGN; 2682 2683 namesz = roff_getname(r, &string, ln, pos); 2684 switch (name[namesz]) { 2685 case '\\': 2686 return ROFF_IGN; 2687 case '\t': 2688 string = buf->buf + pos + namesz; 2689 break; 2690 default: 2691 break; 2692 } 2693 2694 /* Read past the initial double-quote, if any. */ 2695 if (*string == '"') 2696 string++; 2697 2698 /* The rest is the value. */ 2699 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2700 ROFF_as == tok); 2701 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2702 return ROFF_IGN; 2703 } 2704 2705 /* 2706 * Parse a single operator, one or two characters long. 2707 * If the operator is recognized, return success and advance the 2708 * parse point, else return failure and let the parse point unchanged. 2709 */ 2710 static int 2711 roff_getop(const char *v, int *pos, char *res) 2712 { 2713 2714 *res = v[*pos]; 2715 2716 switch (*res) { 2717 case '+': 2718 case '-': 2719 case '*': 2720 case '/': 2721 case '%': 2722 case '&': 2723 case ':': 2724 break; 2725 case '<': 2726 switch (v[*pos + 1]) { 2727 case '=': 2728 *res = 'l'; 2729 (*pos)++; 2730 break; 2731 case '>': 2732 *res = '!'; 2733 (*pos)++; 2734 break; 2735 case '?': 2736 *res = 'i'; 2737 (*pos)++; 2738 break; 2739 default: 2740 break; 2741 } 2742 break; 2743 case '>': 2744 switch (v[*pos + 1]) { 2745 case '=': 2746 *res = 'g'; 2747 (*pos)++; 2748 break; 2749 case '?': 2750 *res = 'a'; 2751 (*pos)++; 2752 break; 2753 default: 2754 break; 2755 } 2756 break; 2757 case '=': 2758 if ('=' == v[*pos + 1]) 2759 (*pos)++; 2760 break; 2761 default: 2762 return 0; 2763 } 2764 (*pos)++; 2765 2766 return *res; 2767 } 2768 2769 /* 2770 * Evaluate either a parenthesized numeric expression 2771 * or a single signed integer number. 2772 */ 2773 static int 2774 roff_evalpar(struct roff *r, int ln, 2775 const char *v, int *pos, int *res, int flags) 2776 { 2777 2778 if ('(' != v[*pos]) 2779 return roff_getnum(v, pos, res, flags); 2780 2781 (*pos)++; 2782 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2783 return 0; 2784 2785 /* 2786 * Omission of the closing parenthesis 2787 * is an error in validation mode, 2788 * but ignored in evaluation mode. 2789 */ 2790 2791 if (')' == v[*pos]) 2792 (*pos)++; 2793 else if (NULL == res) 2794 return 0; 2795 2796 return 1; 2797 } 2798 2799 /* 2800 * Evaluate a complete numeric expression. 2801 * Proceed left to right, there is no concept of precedence. 2802 */ 2803 static int 2804 roff_evalnum(struct roff *r, int ln, const char *v, 2805 int *pos, int *res, int flags) 2806 { 2807 int mypos, operand2; 2808 char operator; 2809 2810 if (NULL == pos) { 2811 mypos = 0; 2812 pos = &mypos; 2813 } 2814 2815 if (flags & ROFFNUM_WHITE) 2816 while (isspace((unsigned char)v[*pos])) 2817 (*pos)++; 2818 2819 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2820 return 0; 2821 2822 while (1) { 2823 if (flags & ROFFNUM_WHITE) 2824 while (isspace((unsigned char)v[*pos])) 2825 (*pos)++; 2826 2827 if ( ! roff_getop(v, pos, &operator)) 2828 break; 2829 2830 if (flags & ROFFNUM_WHITE) 2831 while (isspace((unsigned char)v[*pos])) 2832 (*pos)++; 2833 2834 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2835 return 0; 2836 2837 if (flags & ROFFNUM_WHITE) 2838 while (isspace((unsigned char)v[*pos])) 2839 (*pos)++; 2840 2841 if (NULL == res) 2842 continue; 2843 2844 switch (operator) { 2845 case '+': 2846 *res += operand2; 2847 break; 2848 case '-': 2849 *res -= operand2; 2850 break; 2851 case '*': 2852 *res *= operand2; 2853 break; 2854 case '/': 2855 if (operand2 == 0) { 2856 mandoc_msg(MANDOCERR_DIVZERO, 2857 ln, *pos, "%s", v); 2858 *res = 0; 2859 break; 2860 } 2861 *res /= operand2; 2862 break; 2863 case '%': 2864 if (operand2 == 0) { 2865 mandoc_msg(MANDOCERR_DIVZERO, 2866 ln, *pos, "%s", v); 2867 *res = 0; 2868 break; 2869 } 2870 *res %= operand2; 2871 break; 2872 case '<': 2873 *res = *res < operand2; 2874 break; 2875 case '>': 2876 *res = *res > operand2; 2877 break; 2878 case 'l': 2879 *res = *res <= operand2; 2880 break; 2881 case 'g': 2882 *res = *res >= operand2; 2883 break; 2884 case '=': 2885 *res = *res == operand2; 2886 break; 2887 case '!': 2888 *res = *res != operand2; 2889 break; 2890 case '&': 2891 *res = *res && operand2; 2892 break; 2893 case ':': 2894 *res = *res || operand2; 2895 break; 2896 case 'i': 2897 if (operand2 < *res) 2898 *res = operand2; 2899 break; 2900 case 'a': 2901 if (operand2 > *res) 2902 *res = operand2; 2903 break; 2904 default: 2905 abort(); 2906 } 2907 } 2908 return 1; 2909 } 2910 2911 /* --- register management ------------------------------------------------ */ 2912 2913 void 2914 roff_setreg(struct roff *r, const char *name, int val, char sign) 2915 { 2916 roff_setregn(r, name, strlen(name), val, sign, INT_MIN); 2917 } 2918 2919 static void 2920 roff_setregn(struct roff *r, const char *name, size_t len, 2921 int val, char sign, int step) 2922 { 2923 struct roffreg *reg; 2924 2925 /* Search for an existing register with the same name. */ 2926 reg = r->regtab; 2927 2928 while (reg != NULL && (reg->key.sz != len || 2929 strncmp(reg->key.p, name, len) != 0)) 2930 reg = reg->next; 2931 2932 if (NULL == reg) { 2933 /* Create a new register. */ 2934 reg = mandoc_malloc(sizeof(struct roffreg)); 2935 reg->key.p = mandoc_strndup(name, len); 2936 reg->key.sz = len; 2937 reg->val = 0; 2938 reg->step = 0; 2939 reg->next = r->regtab; 2940 r->regtab = reg; 2941 } 2942 2943 if ('+' == sign) 2944 reg->val += val; 2945 else if ('-' == sign) 2946 reg->val -= val; 2947 else 2948 reg->val = val; 2949 if (step != INT_MIN) 2950 reg->step = step; 2951 } 2952 2953 /* 2954 * Handle some predefined read-only number registers. 2955 * For now, return -1 if the requested register is not predefined; 2956 * in case a predefined read-only register having the value -1 2957 * were to turn up, another special value would have to be chosen. 2958 */ 2959 static int 2960 roff_getregro(const struct roff *r, const char *name) 2961 { 2962 2963 switch (*name) { 2964 case '$': /* Number of arguments of the last macro evaluated. */ 2965 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; 2966 case 'A': /* ASCII approximation mode is always off. */ 2967 return 0; 2968 case 'g': /* Groff compatibility mode is always on. */ 2969 return 1; 2970 case 'H': /* Fixed horizontal resolution. */ 2971 return 24; 2972 case 'j': /* Always adjust left margin only. */ 2973 return 0; 2974 case 'T': /* Some output device is always defined. */ 2975 return 1; 2976 case 'V': /* Fixed vertical resolution. */ 2977 return 40; 2978 default: 2979 return -1; 2980 } 2981 } 2982 2983 int 2984 roff_getreg(struct roff *r, const char *name) 2985 { 2986 return roff_getregn(r, name, strlen(name), '\0'); 2987 } 2988 2989 static int 2990 roff_getregn(struct roff *r, const char *name, size_t len, char sign) 2991 { 2992 struct roffreg *reg; 2993 int val; 2994 2995 if ('.' == name[0] && 2 == len) { 2996 val = roff_getregro(r, name + 1); 2997 if (-1 != val) 2998 return val; 2999 } 3000 3001 for (reg = r->regtab; reg; reg = reg->next) { 3002 if (len == reg->key.sz && 3003 0 == strncmp(name, reg->key.p, len)) { 3004 switch (sign) { 3005 case '+': 3006 reg->val += reg->step; 3007 break; 3008 case '-': 3009 reg->val -= reg->step; 3010 break; 3011 default: 3012 break; 3013 } 3014 return reg->val; 3015 } 3016 } 3017 3018 roff_setregn(r, name, len, 0, '\0', INT_MIN); 3019 return 0; 3020 } 3021 3022 static int 3023 roff_hasregn(const struct roff *r, const char *name, size_t len) 3024 { 3025 struct roffreg *reg; 3026 int val; 3027 3028 if ('.' == name[0] && 2 == len) { 3029 val = roff_getregro(r, name + 1); 3030 if (-1 != val) 3031 return 1; 3032 } 3033 3034 for (reg = r->regtab; reg; reg = reg->next) 3035 if (len == reg->key.sz && 3036 0 == strncmp(name, reg->key.p, len)) 3037 return 1; 3038 3039 return 0; 3040 } 3041 3042 static void 3043 roff_freereg(struct roffreg *reg) 3044 { 3045 struct roffreg *old_reg; 3046 3047 while (NULL != reg) { 3048 free(reg->key.p); 3049 old_reg = reg; 3050 reg = reg->next; 3051 free(old_reg); 3052 } 3053 } 3054 3055 static int 3056 roff_nr(ROFF_ARGS) 3057 { 3058 char *key, *val, *step; 3059 size_t keysz; 3060 int iv, is, len; 3061 char sign; 3062 3063 key = val = buf->buf + pos; 3064 if (*key == '\0') 3065 return ROFF_IGN; 3066 3067 keysz = roff_getname(r, &val, ln, pos); 3068 if (key[keysz] == '\\' || key[keysz] == '\t') 3069 return ROFF_IGN; 3070 3071 sign = *val; 3072 if (sign == '+' || sign == '-') 3073 val++; 3074 3075 len = 0; 3076 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) 3077 return ROFF_IGN; 3078 3079 step = val + len; 3080 while (isspace((unsigned char)*step)) 3081 step++; 3082 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) 3083 is = INT_MIN; 3084 3085 roff_setregn(r, key, keysz, iv, sign, is); 3086 return ROFF_IGN; 3087 } 3088 3089 static int 3090 roff_rr(ROFF_ARGS) 3091 { 3092 struct roffreg *reg, **prev; 3093 char *name, *cp; 3094 size_t namesz; 3095 3096 name = cp = buf->buf + pos; 3097 if (*name == '\0') 3098 return ROFF_IGN; 3099 namesz = roff_getname(r, &cp, ln, pos); 3100 name[namesz] = '\0'; 3101 3102 prev = &r->regtab; 3103 while (1) { 3104 reg = *prev; 3105 if (reg == NULL || !strcmp(name, reg->key.p)) 3106 break; 3107 prev = ®->next; 3108 } 3109 if (reg != NULL) { 3110 *prev = reg->next; 3111 free(reg->key.p); 3112 free(reg); 3113 } 3114 return ROFF_IGN; 3115 } 3116 3117 /* --- handler functions for roff requests -------------------------------- */ 3118 3119 static int 3120 roff_rm(ROFF_ARGS) 3121 { 3122 const char *name; 3123 char *cp; 3124 size_t namesz; 3125 3126 cp = buf->buf + pos; 3127 while (*cp != '\0') { 3128 name = cp; 3129 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 3130 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 3131 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3132 if (name[namesz] == '\\' || name[namesz] == '\t') 3133 break; 3134 } 3135 return ROFF_IGN; 3136 } 3137 3138 static int 3139 roff_it(ROFF_ARGS) 3140 { 3141 int iv; 3142 3143 /* Parse the number of lines. */ 3144 3145 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 3146 mandoc_msg(MANDOCERR_IT_NONUM, 3147 ln, ppos, "%s", buf->buf + 1); 3148 return ROFF_IGN; 3149 } 3150 3151 while (isspace((unsigned char)buf->buf[pos])) 3152 pos++; 3153 3154 /* 3155 * Arm the input line trap. 3156 * Special-casing "an-trap" is an ugly workaround to cope 3157 * with DocBook stupidly fiddling with man(7) internals. 3158 */ 3159 3160 roffit_lines = iv; 3161 roffit_macro = mandoc_strdup(iv != 1 || 3162 strcmp(buf->buf + pos, "an-trap") ? 3163 buf->buf + pos : "br"); 3164 return ROFF_IGN; 3165 } 3166 3167 static int 3168 roff_Dd(ROFF_ARGS) 3169 { 3170 int mask; 3171 enum roff_tok t, te; 3172 3173 switch (tok) { 3174 case ROFF_Dd: 3175 tok = MDOC_Dd; 3176 te = MDOC_MAX; 3177 if (r->format == 0) 3178 r->format = MPARSE_MDOC; 3179 mask = MPARSE_MDOC | MPARSE_QUICK; 3180 break; 3181 case ROFF_TH: 3182 tok = MAN_TH; 3183 te = MAN_MAX; 3184 if (r->format == 0) 3185 r->format = MPARSE_MAN; 3186 mask = MPARSE_QUICK; 3187 break; 3188 default: 3189 abort(); 3190 } 3191 if ((r->options & mask) == 0) 3192 for (t = tok; t < te; t++) 3193 roff_setstr(r, roff_name[t], NULL, 0); 3194 return ROFF_CONT; 3195 } 3196 3197 static int 3198 roff_TE(ROFF_ARGS) 3199 { 3200 r->man->flags &= ~ROFF_NONOFILL; 3201 if (r->tbl == NULL) { 3202 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE"); 3203 return ROFF_IGN; 3204 } 3205 if (tbl_end(r->tbl, 0) == 0) { 3206 r->tbl = NULL; 3207 free(buf->buf); 3208 buf->buf = mandoc_strdup(".sp"); 3209 buf->sz = 4; 3210 *offs = 0; 3211 return ROFF_REPARSE; 3212 } 3213 r->tbl = NULL; 3214 return ROFF_IGN; 3215 } 3216 3217 static int 3218 roff_T_(ROFF_ARGS) 3219 { 3220 3221 if (NULL == r->tbl) 3222 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&"); 3223 else 3224 tbl_restart(ln, ppos, r->tbl); 3225 3226 return ROFF_IGN; 3227 } 3228 3229 /* 3230 * Handle in-line equation delimiters. 3231 */ 3232 static int 3233 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 3234 { 3235 char *cp1, *cp2; 3236 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 3237 3238 /* 3239 * Outside equations, look for an opening delimiter. 3240 * If we are inside an equation, we already know it is 3241 * in-line, or this function wouldn't have been called; 3242 * so look for a closing delimiter. 3243 */ 3244 3245 cp1 = buf->buf + pos; 3246 cp2 = strchr(cp1, r->eqn == NULL ? 3247 r->last_eqn->odelim : r->last_eqn->cdelim); 3248 if (cp2 == NULL) 3249 return ROFF_CONT; 3250 3251 *cp2++ = '\0'; 3252 bef_pr = bef_nl = aft_nl = aft_pr = ""; 3253 3254 /* Handle preceding text, protecting whitespace. */ 3255 3256 if (*buf->buf != '\0') { 3257 if (r->eqn == NULL) 3258 bef_pr = "\\&"; 3259 bef_nl = "\n"; 3260 } 3261 3262 /* 3263 * Prepare replacing the delimiter with an equation macro 3264 * and drop leading white space from the equation. 3265 */ 3266 3267 if (r->eqn == NULL) { 3268 while (*cp2 == ' ') 3269 cp2++; 3270 mac = ".EQ"; 3271 } else 3272 mac = ".EN"; 3273 3274 /* Handle following text, protecting whitespace. */ 3275 3276 if (*cp2 != '\0') { 3277 aft_nl = "\n"; 3278 if (r->eqn != NULL) 3279 aft_pr = "\\&"; 3280 } 3281 3282 /* Do the actual replacement. */ 3283 3284 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 3285 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 3286 free(buf->buf); 3287 buf->buf = cp1; 3288 3289 /* Toggle the in-line state of the eqn subsystem. */ 3290 3291 r->eqn_inline = r->eqn == NULL; 3292 return ROFF_REPARSE; 3293 } 3294 3295 static int 3296 roff_EQ(ROFF_ARGS) 3297 { 3298 struct roff_node *n; 3299 3300 if (r->man->meta.macroset == MACROSET_MAN) 3301 man_breakscope(r->man, ROFF_EQ); 3302 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); 3303 if (ln > r->man->last->line) 3304 n->flags |= NODE_LINE; 3305 n->eqn = eqn_box_new(); 3306 roff_node_append(r->man, n); 3307 r->man->next = ROFF_NEXT_SIBLING; 3308 3309 assert(r->eqn == NULL); 3310 if (r->last_eqn == NULL) 3311 r->last_eqn = eqn_alloc(); 3312 else 3313 eqn_reset(r->last_eqn); 3314 r->eqn = r->last_eqn; 3315 r->eqn->node = n; 3316 3317 if (buf->buf[pos] != '\0') 3318 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3319 ".EQ %s", buf->buf + pos); 3320 3321 return ROFF_IGN; 3322 } 3323 3324 static int 3325 roff_EN(ROFF_ARGS) 3326 { 3327 if (r->eqn != NULL) { 3328 eqn_parse(r->eqn); 3329 r->eqn = NULL; 3330 } else 3331 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN"); 3332 if (buf->buf[pos] != '\0') 3333 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3334 "EN %s", buf->buf + pos); 3335 return ROFF_IGN; 3336 } 3337 3338 static int 3339 roff_TS(ROFF_ARGS) 3340 { 3341 if (r->tbl != NULL) { 3342 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); 3343 tbl_end(r->tbl, 0); 3344 } 3345 r->man->flags |= ROFF_NONOFILL; 3346 r->tbl = tbl_alloc(ppos, ln, r->last_tbl); 3347 if (r->last_tbl == NULL) 3348 r->first_tbl = r->tbl; 3349 r->last_tbl = r->tbl; 3350 return ROFF_IGN; 3351 } 3352 3353 static int 3354 roff_noarg(ROFF_ARGS) 3355 { 3356 if (r->man->flags & (MAN_BLINE | MAN_ELINE)) 3357 man_breakscope(r->man, tok); 3358 if (tok == ROFF_brp) 3359 tok = ROFF_br; 3360 roff_elem_alloc(r->man, ln, ppos, tok); 3361 if (buf->buf[pos] != '\0') 3362 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3363 "%s %s", roff_name[tok], buf->buf + pos); 3364 if (tok == ROFF_nf) 3365 r->man->flags |= ROFF_NOFILL; 3366 else if (tok == ROFF_fi) 3367 r->man->flags &= ~ROFF_NOFILL; 3368 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3369 r->man->next = ROFF_NEXT_SIBLING; 3370 return ROFF_IGN; 3371 } 3372 3373 static int 3374 roff_onearg(ROFF_ARGS) 3375 { 3376 struct roff_node *n; 3377 char *cp; 3378 int npos; 3379 3380 if (r->man->flags & (MAN_BLINE | MAN_ELINE) && 3381 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || 3382 tok == ROFF_ti)) 3383 man_breakscope(r->man, tok); 3384 3385 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { 3386 r->man->last = roffce_node; 3387 r->man->next = ROFF_NEXT_SIBLING; 3388 } 3389 3390 roff_elem_alloc(r->man, ln, ppos, tok); 3391 n = r->man->last; 3392 3393 cp = buf->buf + pos; 3394 if (*cp != '\0') { 3395 while (*cp != '\0' && *cp != ' ') 3396 cp++; 3397 while (*cp == ' ') 3398 *cp++ = '\0'; 3399 if (*cp != '\0') 3400 mandoc_msg(MANDOCERR_ARG_EXCESS, 3401 ln, (int)(cp - buf->buf), 3402 "%s ... %s", roff_name[tok], cp); 3403 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3404 } 3405 3406 if (tok == ROFF_ce || tok == ROFF_rj) { 3407 if (r->man->last->type == ROFFT_ELEM) { 3408 roff_word_alloc(r->man, ln, pos, "1"); 3409 r->man->last->flags |= NODE_NOSRC; 3410 } 3411 npos = 0; 3412 if (roff_evalnum(r, ln, r->man->last->string, &npos, 3413 &roffce_lines, 0) == 0) { 3414 mandoc_msg(MANDOCERR_CE_NONUM, 3415 ln, pos, "ce %s", buf->buf + pos); 3416 roffce_lines = 1; 3417 } 3418 if (roffce_lines < 1) { 3419 r->man->last = r->man->last->parent; 3420 roffce_node = NULL; 3421 roffce_lines = 0; 3422 } else 3423 roffce_node = r->man->last->parent; 3424 } else { 3425 n->flags |= NODE_VALID | NODE_ENDED; 3426 r->man->last = n; 3427 } 3428 n->flags |= NODE_LINE; 3429 r->man->next = ROFF_NEXT_SIBLING; 3430 return ROFF_IGN; 3431 } 3432 3433 static int 3434 roff_manyarg(ROFF_ARGS) 3435 { 3436 struct roff_node *n; 3437 char *sp, *ep; 3438 3439 roff_elem_alloc(r->man, ln, ppos, tok); 3440 n = r->man->last; 3441 3442 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { 3443 while (*ep != '\0' && *ep != ' ') 3444 ep++; 3445 while (*ep == ' ') 3446 *ep++ = '\0'; 3447 roff_word_alloc(r->man, ln, sp - buf->buf, sp); 3448 } 3449 3450 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3451 r->man->last = n; 3452 r->man->next = ROFF_NEXT_SIBLING; 3453 return ROFF_IGN; 3454 } 3455 3456 static int 3457 roff_als(ROFF_ARGS) 3458 { 3459 char *oldn, *newn, *end, *value; 3460 size_t oldsz, newsz, valsz; 3461 3462 newn = oldn = buf->buf + pos; 3463 if (*newn == '\0') 3464 return ROFF_IGN; 3465 3466 newsz = roff_getname(r, &oldn, ln, pos); 3467 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0') 3468 return ROFF_IGN; 3469 3470 end = oldn; 3471 oldsz = roff_getname(r, &end, ln, oldn - buf->buf); 3472 if (oldsz == 0) 3473 return ROFF_IGN; 3474 3475 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", 3476 (int)oldsz, oldn); 3477 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); 3478 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3479 free(value); 3480 return ROFF_IGN; 3481 } 3482 3483 static int 3484 roff_cc(ROFF_ARGS) 3485 { 3486 const char *p; 3487 3488 p = buf->buf + pos; 3489 3490 if (*p == '\0' || (r->control = *p++) == '.') 3491 r->control = '\0'; 3492 3493 if (*p != '\0') 3494 mandoc_msg(MANDOCERR_ARG_EXCESS, 3495 ln, p - buf->buf, "cc ... %s", p); 3496 3497 return ROFF_IGN; 3498 } 3499 3500 static int 3501 roff_char(ROFF_ARGS) 3502 { 3503 const char *p, *kp, *vp; 3504 size_t ksz, vsz; 3505 int font; 3506 3507 /* Parse the character to be replaced. */ 3508 3509 kp = buf->buf + pos; 3510 p = kp + 1; 3511 if (*kp == '\0' || (*kp == '\\' && 3512 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) || 3513 (*p != ' ' && *p != '\0')) { 3514 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp); 3515 return ROFF_IGN; 3516 } 3517 ksz = p - kp; 3518 while (*p == ' ') 3519 p++; 3520 3521 /* 3522 * If the replacement string contains a font escape sequence, 3523 * we have to restore the font at the end. 3524 */ 3525 3526 vp = p; 3527 vsz = strlen(p); 3528 font = 0; 3529 while (*p != '\0') { 3530 if (*p++ != '\\') 3531 continue; 3532 switch (mandoc_escape(&p, NULL, NULL)) { 3533 case ESCAPE_FONT: 3534 case ESCAPE_FONTROMAN: 3535 case ESCAPE_FONTITALIC: 3536 case ESCAPE_FONTBOLD: 3537 case ESCAPE_FONTBI: 3538 case ESCAPE_FONTCW: 3539 case ESCAPE_FONTPREV: 3540 font++; 3541 break; 3542 default: 3543 break; 3544 } 3545 } 3546 if (font > 1) 3547 mandoc_msg(MANDOCERR_CHAR_FONT, 3548 ln, (int)(vp - buf->buf), "%s", vp); 3549 3550 /* 3551 * Approximate the effect of .char using the .tr tables. 3552 * XXX In groff, .char and .tr interact differently. 3553 */ 3554 3555 if (ksz == 1) { 3556 if (r->xtab == NULL) 3557 r->xtab = mandoc_calloc(128, sizeof(*r->xtab)); 3558 assert((unsigned int)*kp < 128); 3559 free(r->xtab[(int)*kp].p); 3560 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p, 3561 "%s%s", vp, font ? "\fP" : ""); 3562 } else { 3563 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0); 3564 if (font) 3565 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1); 3566 } 3567 return ROFF_IGN; 3568 } 3569 3570 static int 3571 roff_ec(ROFF_ARGS) 3572 { 3573 const char *p; 3574 3575 p = buf->buf + pos; 3576 if (*p == '\0') 3577 r->escape = '\\'; 3578 else { 3579 r->escape = *p; 3580 if (*++p != '\0') 3581 mandoc_msg(MANDOCERR_ARG_EXCESS, ln, 3582 (int)(p - buf->buf), "ec ... %s", p); 3583 } 3584 return ROFF_IGN; 3585 } 3586 3587 static int 3588 roff_eo(ROFF_ARGS) 3589 { 3590 r->escape = '\0'; 3591 if (buf->buf[pos] != '\0') 3592 mandoc_msg(MANDOCERR_ARG_SKIP, 3593 ln, pos, "eo %s", buf->buf + pos); 3594 return ROFF_IGN; 3595 } 3596 3597 static int 3598 roff_nop(ROFF_ARGS) 3599 { 3600 while (buf->buf[pos] == ' ') 3601 pos++; 3602 *offs = pos; 3603 return ROFF_RERUN; 3604 } 3605 3606 static int 3607 roff_tr(ROFF_ARGS) 3608 { 3609 const char *p, *first, *second; 3610 size_t fsz, ssz; 3611 enum mandoc_esc esc; 3612 3613 p = buf->buf + pos; 3614 3615 if (*p == '\0') { 3616 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr"); 3617 return ROFF_IGN; 3618 } 3619 3620 while (*p != '\0') { 3621 fsz = ssz = 1; 3622 3623 first = p++; 3624 if (*first == '\\') { 3625 esc = mandoc_escape(&p, NULL, NULL); 3626 if (esc == ESCAPE_ERROR) { 3627 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3628 (int)(p - buf->buf), "%s", first); 3629 return ROFF_IGN; 3630 } 3631 fsz = (size_t)(p - first); 3632 } 3633 3634 second = p++; 3635 if (*second == '\\') { 3636 esc = mandoc_escape(&p, NULL, NULL); 3637 if (esc == ESCAPE_ERROR) { 3638 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3639 (int)(p - buf->buf), "%s", second); 3640 return ROFF_IGN; 3641 } 3642 ssz = (size_t)(p - second); 3643 } else if (*second == '\0') { 3644 mandoc_msg(MANDOCERR_TR_ODD, ln, 3645 (int)(first - buf->buf), "tr %s", first); 3646 second = " "; 3647 p--; 3648 } 3649 3650 if (fsz > 1) { 3651 roff_setstrn(&r->xmbtab, first, fsz, 3652 second, ssz, 0); 3653 continue; 3654 } 3655 3656 if (r->xtab == NULL) 3657 r->xtab = mandoc_calloc(128, 3658 sizeof(struct roffstr)); 3659 3660 free(r->xtab[(int)*first].p); 3661 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 3662 r->xtab[(int)*first].sz = ssz; 3663 } 3664 3665 return ROFF_IGN; 3666 } 3667 3668 /* 3669 * Implementation of the .return request. 3670 * There is no need to call roff_userret() from here. 3671 * The read module will call that after rewinding the reader stack 3672 * to the place from where the current macro was called. 3673 */ 3674 static int 3675 roff_return(ROFF_ARGS) 3676 { 3677 if (r->mstackpos >= 0) 3678 return ROFF_IGN | ROFF_USERRET; 3679 3680 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return"); 3681 return ROFF_IGN; 3682 } 3683 3684 static int 3685 roff_rn(ROFF_ARGS) 3686 { 3687 const char *value; 3688 char *oldn, *newn, *end; 3689 size_t oldsz, newsz; 3690 int deftype; 3691 3692 oldn = newn = buf->buf + pos; 3693 if (*oldn == '\0') 3694 return ROFF_IGN; 3695 3696 oldsz = roff_getname(r, &newn, ln, pos); 3697 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0') 3698 return ROFF_IGN; 3699 3700 end = newn; 3701 newsz = roff_getname(r, &end, ln, newn - buf->buf); 3702 if (newsz == 0) 3703 return ROFF_IGN; 3704 3705 deftype = ROFFDEF_ANY; 3706 value = roff_getstrn(r, oldn, oldsz, &deftype); 3707 switch (deftype) { 3708 case ROFFDEF_USER: 3709 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3710 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); 3711 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3712 break; 3713 case ROFFDEF_PRE: 3714 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3715 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3716 break; 3717 case ROFFDEF_REN: 3718 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); 3719 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); 3720 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3721 break; 3722 case ROFFDEF_STD: 3723 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); 3724 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3725 break; 3726 default: 3727 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3728 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3729 break; 3730 } 3731 return ROFF_IGN; 3732 } 3733 3734 static int 3735 roff_shift(ROFF_ARGS) 3736 { 3737 struct mctx *ctx; 3738 int levels, i; 3739 3740 levels = 1; 3741 if (buf->buf[pos] != '\0' && 3742 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { 3743 mandoc_msg(MANDOCERR_CE_NONUM, 3744 ln, pos, "shift %s", buf->buf + pos); 3745 levels = 1; 3746 } 3747 if (r->mstackpos < 0) { 3748 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift"); 3749 return ROFF_IGN; 3750 } 3751 ctx = r->mstack + r->mstackpos; 3752 if (levels > ctx->argc) { 3753 mandoc_msg(MANDOCERR_SHIFT, 3754 ln, pos, "%d, but max is %d", levels, ctx->argc); 3755 levels = ctx->argc; 3756 } 3757 if (levels == 0) 3758 return ROFF_IGN; 3759 for (i = 0; i < levels; i++) 3760 free(ctx->argv[i]); 3761 ctx->argc -= levels; 3762 for (i = 0; i < ctx->argc; i++) 3763 ctx->argv[i] = ctx->argv[i + levels]; 3764 return ROFF_IGN; 3765 } 3766 3767 static int 3768 roff_so(ROFF_ARGS) 3769 { 3770 char *name, *cp; 3771 3772 name = buf->buf + pos; 3773 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name); 3774 3775 /* 3776 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3777 * opening anything that's not in our cwd or anything beneath 3778 * it. Thus, explicitly disallow traversing up the file-system 3779 * or using absolute paths. 3780 */ 3781 3782 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3783 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name); 3784 buf->sz = mandoc_asprintf(&cp, 3785 ".sp\nSee the file %s.\n.sp", name) + 1; 3786 free(buf->buf); 3787 buf->buf = cp; 3788 *offs = 0; 3789 return ROFF_REPARSE; 3790 } 3791 3792 *offs = pos; 3793 return ROFF_SO; 3794 } 3795 3796 /* --- user defined strings and macros ------------------------------------ */ 3797 3798 static int 3799 roff_userdef(ROFF_ARGS) 3800 { 3801 struct mctx *ctx; 3802 char *arg, *ap, *dst, *src; 3803 size_t sz; 3804 3805 /* Initialize a new macro stack context. */ 3806 3807 if (++r->mstackpos == r->mstacksz) { 3808 r->mstack = mandoc_recallocarray(r->mstack, 3809 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); 3810 r->mstacksz += 8; 3811 } 3812 ctx = r->mstack + r->mstackpos; 3813 ctx->argsz = 0; 3814 ctx->argc = 0; 3815 ctx->argv = NULL; 3816 3817 /* 3818 * Collect pointers to macro argument strings, 3819 * NUL-terminating them and escaping quotes. 3820 */ 3821 3822 src = buf->buf + pos; 3823 while (*src != '\0') { 3824 if (ctx->argc == ctx->argsz) { 3825 ctx->argsz += 8; 3826 ctx->argv = mandoc_reallocarray(ctx->argv, 3827 ctx->argsz, sizeof(*ctx->argv)); 3828 } 3829 arg = roff_getarg(r, &src, ln, &pos); 3830 sz = 1; /* For the terminating NUL. */ 3831 for (ap = arg; *ap != '\0'; ap++) 3832 sz += *ap == '"' ? 4 : 1; 3833 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); 3834 for (ap = arg; *ap != '\0'; ap++) { 3835 if (*ap == '"') { 3836 memcpy(dst, "\\(dq", 4); 3837 dst += 4; 3838 } else 3839 *dst++ = *ap; 3840 } 3841 *dst = '\0'; 3842 free(arg); 3843 } 3844 3845 /* Replace the macro invocation by the macro definition. */ 3846 3847 free(buf->buf); 3848 buf->buf = mandoc_strdup(r->current_string); 3849 buf->sz = strlen(buf->buf) + 1; 3850 *offs = 0; 3851 3852 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ? 3853 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND; 3854 } 3855 3856 /* 3857 * Calling a high-level macro that was renamed with .rn. 3858 * r->current_string has already been set up by roff_parse(). 3859 */ 3860 static int 3861 roff_renamed(ROFF_ARGS) 3862 { 3863 char *nbuf; 3864 3865 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, 3866 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; 3867 free(buf->buf); 3868 buf->buf = nbuf; 3869 *offs = 0; 3870 return ROFF_CONT; 3871 } 3872 3873 /* 3874 * Measure the length in bytes of the roff identifier at *cpp 3875 * and advance the pointer to the next word. 3876 */ 3877 static size_t 3878 roff_getname(struct roff *r, char **cpp, int ln, int pos) 3879 { 3880 char *name, *cp; 3881 size_t namesz; 3882 3883 name = *cpp; 3884 if (*name == '\0') 3885 return 0; 3886 3887 /* Advance cp to the byte after the end of the name. */ 3888 3889 for (cp = name; 1; cp++) { 3890 namesz = cp - name; 3891 if (*cp == '\0') 3892 break; 3893 if (*cp == ' ' || *cp == '\t') { 3894 cp++; 3895 break; 3896 } 3897 if (*cp != '\\') 3898 continue; 3899 if (cp[1] == '{' || cp[1] == '}') 3900 break; 3901 if (*++cp == '\\') 3902 continue; 3903 mandoc_msg(MANDOCERR_NAMESC, ln, pos, 3904 "%.*s", (int)(cp - name + 1), name); 3905 mandoc_escape((const char **)&cp, NULL, NULL); 3906 break; 3907 } 3908 3909 /* Read past spaces. */ 3910 3911 while (*cp == ' ') 3912 cp++; 3913 3914 *cpp = cp; 3915 return namesz; 3916 } 3917 3918 /* 3919 * Store *string into the user-defined string called *name. 3920 * To clear an existing entry, call with (*r, *name, NULL, 0). 3921 * append == 0: replace mode 3922 * append == 1: single-line append mode 3923 * append == 2: multiline append mode, append '\n' after each call 3924 */ 3925 static void 3926 roff_setstr(struct roff *r, const char *name, const char *string, 3927 int append) 3928 { 3929 size_t namesz; 3930 3931 namesz = strlen(name); 3932 roff_setstrn(&r->strtab, name, namesz, string, 3933 string ? strlen(string) : 0, append); 3934 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3935 } 3936 3937 static void 3938 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 3939 const char *string, size_t stringsz, int append) 3940 { 3941 struct roffkv *n; 3942 char *c; 3943 int i; 3944 size_t oldch, newch; 3945 3946 /* Search for an existing string with the same name. */ 3947 n = *r; 3948 3949 while (n && (namesz != n->key.sz || 3950 strncmp(n->key.p, name, namesz))) 3951 n = n->next; 3952 3953 if (NULL == n) { 3954 /* Create a new string table entry. */ 3955 n = mandoc_malloc(sizeof(struct roffkv)); 3956 n->key.p = mandoc_strndup(name, namesz); 3957 n->key.sz = namesz; 3958 n->val.p = NULL; 3959 n->val.sz = 0; 3960 n->next = *r; 3961 *r = n; 3962 } else if (0 == append) { 3963 free(n->val.p); 3964 n->val.p = NULL; 3965 n->val.sz = 0; 3966 } 3967 3968 if (NULL == string) 3969 return; 3970 3971 /* 3972 * One additional byte for the '\n' in multiline mode, 3973 * and one for the terminating '\0'. 3974 */ 3975 newch = stringsz + (1 < append ? 2u : 1u); 3976 3977 if (NULL == n->val.p) { 3978 n->val.p = mandoc_malloc(newch); 3979 *n->val.p = '\0'; 3980 oldch = 0; 3981 } else { 3982 oldch = n->val.sz; 3983 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 3984 } 3985 3986 /* Skip existing content in the destination buffer. */ 3987 c = n->val.p + (int)oldch; 3988 3989 /* Append new content to the destination buffer. */ 3990 i = 0; 3991 while (i < (int)stringsz) { 3992 /* 3993 * Rudimentary roff copy mode: 3994 * Handle escaped backslashes. 3995 */ 3996 if ('\\' == string[i] && '\\' == string[i + 1]) 3997 i++; 3998 *c++ = string[i++]; 3999 } 4000 4001 /* Append terminating bytes. */ 4002 if (1 < append) 4003 *c++ = '\n'; 4004 4005 *c = '\0'; 4006 n->val.sz = (int)(c - n->val.p); 4007 } 4008 4009 static const char * 4010 roff_getstrn(struct roff *r, const char *name, size_t len, 4011 int *deftype) 4012 { 4013 const struct roffkv *n; 4014 int found, i; 4015 enum roff_tok tok; 4016 4017 found = 0; 4018 for (n = r->strtab; n != NULL; n = n->next) { 4019 if (strncmp(name, n->key.p, len) != 0 || 4020 n->key.p[len] != '\0' || n->val.p == NULL) 4021 continue; 4022 if (*deftype & ROFFDEF_USER) { 4023 *deftype = ROFFDEF_USER; 4024 return n->val.p; 4025 } else { 4026 found = 1; 4027 break; 4028 } 4029 } 4030 for (n = r->rentab; n != NULL; n = n->next) { 4031 if (strncmp(name, n->key.p, len) != 0 || 4032 n->key.p[len] != '\0' || n->val.p == NULL) 4033 continue; 4034 if (*deftype & ROFFDEF_REN) { 4035 *deftype = ROFFDEF_REN; 4036 return n->val.p; 4037 } else { 4038 found = 1; 4039 break; 4040 } 4041 } 4042 for (i = 0; i < PREDEFS_MAX; i++) { 4043 if (strncmp(name, predefs[i].name, len) != 0 || 4044 predefs[i].name[len] != '\0') 4045 continue; 4046 if (*deftype & ROFFDEF_PRE) { 4047 *deftype = ROFFDEF_PRE; 4048 return predefs[i].str; 4049 } else { 4050 found = 1; 4051 break; 4052 } 4053 } 4054 if (r->man->meta.macroset != MACROSET_MAN) { 4055 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { 4056 if (strncmp(name, roff_name[tok], len) != 0 || 4057 roff_name[tok][len] != '\0') 4058 continue; 4059 if (*deftype & ROFFDEF_STD) { 4060 *deftype = ROFFDEF_STD; 4061 return NULL; 4062 } else { 4063 found = 1; 4064 break; 4065 } 4066 } 4067 } 4068 if (r->man->meta.macroset != MACROSET_MDOC) { 4069 for (tok = MAN_TH; tok < MAN_MAX; tok++) { 4070 if (strncmp(name, roff_name[tok], len) != 0 || 4071 roff_name[tok][len] != '\0') 4072 continue; 4073 if (*deftype & ROFFDEF_STD) { 4074 *deftype = ROFFDEF_STD; 4075 return NULL; 4076 } else { 4077 found = 1; 4078 break; 4079 } 4080 } 4081 } 4082 4083 if (found == 0 && *deftype != ROFFDEF_ANY) { 4084 if (*deftype & ROFFDEF_REN) { 4085 /* 4086 * This might still be a request, 4087 * so do not treat it as undefined yet. 4088 */ 4089 *deftype = ROFFDEF_UNDEF; 4090 return NULL; 4091 } 4092 4093 /* Using an undefined string defines it to be empty. */ 4094 4095 roff_setstrn(&r->strtab, name, len, "", 0, 0); 4096 roff_setstrn(&r->rentab, name, len, NULL, 0, 0); 4097 } 4098 4099 *deftype = 0; 4100 return NULL; 4101 } 4102 4103 static void 4104 roff_freestr(struct roffkv *r) 4105 { 4106 struct roffkv *n, *nn; 4107 4108 for (n = r; n; n = nn) { 4109 free(n->key.p); 4110 free(n->val.p); 4111 nn = n->next; 4112 free(n); 4113 } 4114 } 4115 4116 /* --- accessors and utility functions ------------------------------------ */ 4117 4118 /* 4119 * Duplicate an input string, making the appropriate character 4120 * conversations (as stipulated by `tr') along the way. 4121 * Returns a heap-allocated string with all the replacements made. 4122 */ 4123 char * 4124 roff_strdup(const struct roff *r, const char *p) 4125 { 4126 const struct roffkv *cp; 4127 char *res; 4128 const char *pp; 4129 size_t ssz, sz; 4130 enum mandoc_esc esc; 4131 4132 if (NULL == r->xmbtab && NULL == r->xtab) 4133 return mandoc_strdup(p); 4134 else if ('\0' == *p) 4135 return mandoc_strdup(""); 4136 4137 /* 4138 * Step through each character looking for term matches 4139 * (remember that a `tr' can be invoked with an escape, which is 4140 * a glyph but the escape is multi-character). 4141 * We only do this if the character hash has been initialised 4142 * and the string is >0 length. 4143 */ 4144 4145 res = NULL; 4146 ssz = 0; 4147 4148 while ('\0' != *p) { 4149 assert((unsigned int)*p < 128); 4150 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { 4151 sz = r->xtab[(int)*p].sz; 4152 res = mandoc_realloc(res, ssz + sz + 1); 4153 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 4154 ssz += sz; 4155 p++; 4156 continue; 4157 } else if ('\\' != *p) { 4158 res = mandoc_realloc(res, ssz + 2); 4159 res[ssz++] = *p++; 4160 continue; 4161 } 4162 4163 /* Search for term matches. */ 4164 for (cp = r->xmbtab; cp; cp = cp->next) 4165 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 4166 break; 4167 4168 if (NULL != cp) { 4169 /* 4170 * A match has been found. 4171 * Append the match to the array and move 4172 * forward by its keysize. 4173 */ 4174 res = mandoc_realloc(res, 4175 ssz + cp->val.sz + 1); 4176 memcpy(res + ssz, cp->val.p, cp->val.sz); 4177 ssz += cp->val.sz; 4178 p += (int)cp->key.sz; 4179 continue; 4180 } 4181 4182 /* 4183 * Handle escapes carefully: we need to copy 4184 * over just the escape itself, or else we might 4185 * do replacements within the escape itself. 4186 * Make sure to pass along the bogus string. 4187 */ 4188 pp = p++; 4189 esc = mandoc_escape(&p, NULL, NULL); 4190 if (ESCAPE_ERROR == esc) { 4191 sz = strlen(pp); 4192 res = mandoc_realloc(res, ssz + sz + 1); 4193 memcpy(res + ssz, pp, sz); 4194 break; 4195 } 4196 /* 4197 * We bail out on bad escapes. 4198 * No need to warn: we already did so when 4199 * roff_expand() was called. 4200 */ 4201 sz = (int)(p - pp); 4202 res = mandoc_realloc(res, ssz + sz + 1); 4203 memcpy(res + ssz, pp, sz); 4204 ssz += sz; 4205 } 4206 4207 res[(int)ssz] = '\0'; 4208 return res; 4209 } 4210 4211 int 4212 roff_getformat(const struct roff *r) 4213 { 4214 4215 return r->format; 4216 } 4217 4218 /* 4219 * Find out whether a line is a macro line or not. 4220 * If it is, adjust the current position and return one; if it isn't, 4221 * return zero and don't change the current position. 4222 * If the control character has been set with `.cc', then let that grain 4223 * precedence. 4224 * This is slighly contrary to groff, where using the non-breaking 4225 * control character when `cc' has been invoked will cause the 4226 * non-breaking macro contents to be printed verbatim. 4227 */ 4228 int 4229 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 4230 { 4231 int pos; 4232 4233 pos = *ppos; 4234 4235 if (r->control != '\0' && cp[pos] == r->control) 4236 pos++; 4237 else if (r->control != '\0') 4238 return 0; 4239 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 4240 pos += 2; 4241 else if ('.' == cp[pos] || '\'' == cp[pos]) 4242 pos++; 4243 else 4244 return 0; 4245 4246 while (' ' == cp[pos] || '\t' == cp[pos]) 4247 pos++; 4248 4249 *ppos = pos; 4250 return 1; 4251 } 4252