1 /* $OpenBSD: roff.c,v 1.233 2019/01/05 09:10:24 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <limits.h> 23 #include <stddef.h> 24 #include <stdint.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 29 #include "mandoc_aux.h" 30 #include "mandoc_ohash.h" 31 #include "mandoc.h" 32 #include "roff.h" 33 #include "mandoc_parse.h" 34 #include "libmandoc.h" 35 #include "roff_int.h" 36 #include "tbl_parse.h" 37 #include "eqn_parse.h" 38 39 /* 40 * ASCII_ESC is used to signal from roff_getarg() to roff_expand() 41 * that an escape sequence resulted from copy-in processing and 42 * needs to be checked or interpolated. As it is used nowhere 43 * else, it is defined here rather than in a header file. 44 */ 45 #define ASCII_ESC 27 46 47 /* Maximum number of string expansions per line, to break infinite loops. */ 48 #define EXPAND_LIMIT 1000 49 50 /* Types of definitions of macros and strings. */ 51 #define ROFFDEF_USER (1 << 1) /* User-defined. */ 52 #define ROFFDEF_PRE (1 << 2) /* Predefined. */ 53 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ 54 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ 55 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ 56 ROFFDEF_REN | ROFFDEF_STD) 57 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ 58 59 /* --- data types --------------------------------------------------------- */ 60 61 /* 62 * An incredibly-simple string buffer. 63 */ 64 struct roffstr { 65 char *p; /* nil-terminated buffer */ 66 size_t sz; /* saved strlen(p) */ 67 }; 68 69 /* 70 * A key-value roffstr pair as part of a singly-linked list. 71 */ 72 struct roffkv { 73 struct roffstr key; 74 struct roffstr val; 75 struct roffkv *next; /* next in list */ 76 }; 77 78 /* 79 * A single number register as part of a singly-linked list. 80 */ 81 struct roffreg { 82 struct roffstr key; 83 int val; 84 int step; 85 struct roffreg *next; 86 }; 87 88 /* 89 * Association of request and macro names with token IDs. 90 */ 91 struct roffreq { 92 enum roff_tok tok; 93 char name[]; 94 }; 95 96 /* 97 * A macro processing context. 98 * More than one is needed when macro calls are nested. 99 */ 100 struct mctx { 101 char **argv; 102 int argc; 103 int argsz; 104 }; 105 106 struct roff { 107 struct roff_man *man; /* mdoc or man parser */ 108 struct roffnode *last; /* leaf of stack */ 109 struct mctx *mstack; /* stack of macro contexts */ 110 int *rstack; /* stack of inverted `ie' values */ 111 struct ohash *reqtab; /* request lookup table */ 112 struct roffreg *regtab; /* number registers */ 113 struct roffkv *strtab; /* user-defined strings & macros */ 114 struct roffkv *rentab; /* renamed strings & macros */ 115 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 116 struct roffstr *xtab; /* single-byte trans table (`tr') */ 117 const char *current_string; /* value of last called user macro */ 118 struct tbl_node *first_tbl; /* first table parsed */ 119 struct tbl_node *last_tbl; /* last table parsed */ 120 struct tbl_node *tbl; /* current table being parsed */ 121 struct eqn_node *last_eqn; /* equation parser */ 122 struct eqn_node *eqn; /* active equation parser */ 123 int eqn_inline; /* current equation is inline */ 124 int options; /* parse options */ 125 int mstacksz; /* current size of mstack */ 126 int mstackpos; /* position in mstack */ 127 int rstacksz; /* current size limit of rstack */ 128 int rstackpos; /* position in rstack */ 129 int format; /* current file in mdoc or man format */ 130 char control; /* control character */ 131 char escape; /* escape character */ 132 }; 133 134 struct roffnode { 135 enum roff_tok tok; /* type of node */ 136 struct roffnode *parent; /* up one in stack */ 137 int line; /* parse line */ 138 int col; /* parse col */ 139 char *name; /* node name, e.g. macro name */ 140 char *end; /* end-rules: custom token */ 141 int endspan; /* end-rules: next-line or infty */ 142 int rule; /* current evaluation rule */ 143 }; 144 145 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 146 enum roff_tok tok, /* tok of macro */ \ 147 struct buf *buf, /* input buffer */ \ 148 int ln, /* parse line */ \ 149 int ppos, /* original pos in buffer */ \ 150 int pos, /* current pos in buffer */ \ 151 int *offs /* reset offset of buffer data */ 152 153 typedef int (*roffproc)(ROFF_ARGS); 154 155 struct roffmac { 156 roffproc proc; /* process new macro */ 157 roffproc text; /* process as child text of macro */ 158 roffproc sub; /* process as child of macro */ 159 int flags; 160 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 161 }; 162 163 struct predef { 164 const char *name; /* predefined input name */ 165 const char *str; /* replacement symbol */ 166 }; 167 168 #define PREDEF(__name, __str) \ 169 { (__name), (__str) }, 170 171 /* --- function prototypes ------------------------------------------------ */ 172 173 static int roffnode_cleanscope(struct roff *); 174 static int roffnode_pop(struct roff *); 175 static void roffnode_push(struct roff *, enum roff_tok, 176 const char *, int, int); 177 static void roff_addtbl(struct roff_man *, int, struct tbl_node *); 178 static int roff_als(ROFF_ARGS); 179 static int roff_block(ROFF_ARGS); 180 static int roff_block_text(ROFF_ARGS); 181 static int roff_block_sub(ROFF_ARGS); 182 static int roff_cblock(ROFF_ARGS); 183 static int roff_cc(ROFF_ARGS); 184 static int roff_ccond(struct roff *, int, int); 185 static int roff_char(ROFF_ARGS); 186 static int roff_cond(ROFF_ARGS); 187 static int roff_cond_text(ROFF_ARGS); 188 static int roff_cond_sub(ROFF_ARGS); 189 static int roff_ds(ROFF_ARGS); 190 static int roff_ec(ROFF_ARGS); 191 static int roff_eo(ROFF_ARGS); 192 static int roff_eqndelim(struct roff *, struct buf *, int); 193 static int roff_evalcond(struct roff *r, int, char *, int *); 194 static int roff_evalnum(struct roff *, int, 195 const char *, int *, int *, int); 196 static int roff_evalpar(struct roff *, int, 197 const char *, int *, int *, int); 198 static int roff_evalstrcond(const char *, int *); 199 static int roff_expand(struct roff *, struct buf *, 200 int, int, char); 201 static void roff_free1(struct roff *); 202 static void roff_freereg(struct roffreg *); 203 static void roff_freestr(struct roffkv *); 204 static size_t roff_getname(struct roff *, char **, int, int); 205 static int roff_getnum(const char *, int *, int *, int); 206 static int roff_getop(const char *, int *, char *); 207 static int roff_getregn(struct roff *, 208 const char *, size_t, char); 209 static int roff_getregro(const struct roff *, 210 const char *name); 211 static const char *roff_getstrn(struct roff *, 212 const char *, size_t, int *); 213 static int roff_hasregn(const struct roff *, 214 const char *, size_t); 215 static int roff_insec(ROFF_ARGS); 216 static int roff_it(ROFF_ARGS); 217 static int roff_line_ignore(ROFF_ARGS); 218 static void roff_man_alloc1(struct roff_man *); 219 static void roff_man_free1(struct roff_man *); 220 static int roff_manyarg(ROFF_ARGS); 221 static int roff_noarg(ROFF_ARGS); 222 static int roff_nop(ROFF_ARGS); 223 static int roff_nr(ROFF_ARGS); 224 static int roff_onearg(ROFF_ARGS); 225 static enum roff_tok roff_parse(struct roff *, char *, int *, 226 int, int); 227 static int roff_parsetext(struct roff *, struct buf *, 228 int, int *); 229 static int roff_renamed(ROFF_ARGS); 230 static int roff_return(ROFF_ARGS); 231 static int roff_rm(ROFF_ARGS); 232 static int roff_rn(ROFF_ARGS); 233 static int roff_rr(ROFF_ARGS); 234 static void roff_setregn(struct roff *, const char *, 235 size_t, int, char, int); 236 static void roff_setstr(struct roff *, 237 const char *, const char *, int); 238 static void roff_setstrn(struct roffkv **, const char *, 239 size_t, const char *, size_t, int); 240 static int roff_shift(ROFF_ARGS); 241 static int roff_so(ROFF_ARGS); 242 static int roff_tr(ROFF_ARGS); 243 static int roff_Dd(ROFF_ARGS); 244 static int roff_TE(ROFF_ARGS); 245 static int roff_TS(ROFF_ARGS); 246 static int roff_EQ(ROFF_ARGS); 247 static int roff_EN(ROFF_ARGS); 248 static int roff_T_(ROFF_ARGS); 249 static int roff_unsupp(ROFF_ARGS); 250 static int roff_userdef(ROFF_ARGS); 251 252 /* --- constant data ------------------------------------------------------ */ 253 254 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 255 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 256 257 const char *__roff_name[MAN_MAX + 1] = { 258 "br", "ce", "fi", "ft", 259 "ll", "mc", "nf", 260 "po", "rj", "sp", 261 "ta", "ti", NULL, 262 "ab", "ad", "af", "aln", 263 "als", "am", "am1", "ami", 264 "ami1", "as", "as1", "asciify", 265 "backtrace", "bd", "bleedat", "blm", 266 "box", "boxa", "bp", "BP", 267 "break", "breakchar", "brnl", "brp", 268 "brpnl", "c2", "cc", 269 "cf", "cflags", "ch", "char", 270 "chop", "class", "close", "CL", 271 "color", "composite", "continue", "cp", 272 "cropat", "cs", "cu", "da", 273 "dch", "Dd", "de", "de1", 274 "defcolor", "dei", "dei1", "device", 275 "devicem", "di", "do", "ds", 276 "ds1", "dwh", "dt", "ec", 277 "ecr", "ecs", "el", "em", 278 "EN", "eo", "EP", "EQ", 279 "errprint", "ev", "evc", "ex", 280 "fallback", "fam", "fc", "fchar", 281 "fcolor", "fdeferlig", "feature", "fkern", 282 "fl", "flig", "fp", "fps", 283 "fschar", "fspacewidth", "fspecial", "ftr", 284 "fzoom", "gcolor", "hc", "hcode", 285 "hidechar", "hla", "hlm", "hpf", 286 "hpfa", "hpfcode", "hw", "hy", 287 "hylang", "hylen", "hym", "hypp", 288 "hys", "ie", "if", "ig", 289 "index", "it", "itc", "IX", 290 "kern", "kernafter", "kernbefore", "kernpair", 291 "lc", "lc_ctype", "lds", "length", 292 "letadj", "lf", "lg", "lhang", 293 "linetabs", "lnr", "lnrf", "lpfx", 294 "ls", "lsm", "lt", 295 "mediasize", "minss", "mk", "mso", 296 "na", "ne", "nh", "nhychar", 297 "nm", "nn", "nop", "nr", 298 "nrf", "nroff", "ns", "nx", 299 "open", "opena", "os", "output", 300 "padj", "papersize", "pc", "pev", 301 "pi", "PI", "pl", "pm", 302 "pn", "pnr", "ps", 303 "psbb", "pshape", "pso", "ptr", 304 "pvs", "rchar", "rd", "recursionlimit", 305 "return", "rfschar", "rhang", 306 "rm", "rn", "rnn", "rr", 307 "rs", "rt", "schar", "sentchar", 308 "shc", "shift", "sizes", "so", 309 "spacewidth", "special", "spreadwarn", "ss", 310 "sty", "substring", "sv", "sy", 311 "T&", "tc", "TE", 312 "TH", "tkf", "tl", 313 "tm", "tm1", "tmc", "tr", 314 "track", "transchar", "trf", "trimat", 315 "trin", "trnt", "troff", "TS", 316 "uf", "ul", "unformat", "unwatch", 317 "unwatchn", "vpt", "vs", "warn", 318 "warnscale", "watch", "watchlength", "watchn", 319 "wh", "while", "write", "writec", 320 "writem", "xflag", ".", NULL, 321 NULL, "text", 322 "Dd", "Dt", "Os", "Sh", 323 "Ss", "Pp", "D1", "Dl", 324 "Bd", "Ed", "Bl", "El", 325 "It", "Ad", "An", "Ap", 326 "Ar", "Cd", "Cm", "Dv", 327 "Er", "Ev", "Ex", "Fa", 328 "Fd", "Fl", "Fn", "Ft", 329 "Ic", "In", "Li", "Nd", 330 "Nm", "Op", "Ot", "Pa", 331 "Rv", "St", "Va", "Vt", 332 "Xr", "%A", "%B", "%D", 333 "%I", "%J", "%N", "%O", 334 "%P", "%R", "%T", "%V", 335 "Ac", "Ao", "Aq", "At", 336 "Bc", "Bf", "Bo", "Bq", 337 "Bsx", "Bx", "Db", "Dc", 338 "Do", "Dq", "Ec", "Ef", 339 "Em", "Eo", "Fx", "Ms", 340 "No", "Ns", "Nx", "Ox", 341 "Pc", "Pf", "Po", "Pq", 342 "Qc", "Ql", "Qo", "Qq", 343 "Re", "Rs", "Sc", "So", 344 "Sq", "Sm", "Sx", "Sy", 345 "Tn", "Ux", "Xc", "Xo", 346 "Fo", "Fc", "Oo", "Oc", 347 "Bk", "Ek", "Bt", "Hf", 348 "Fr", "Ud", "Lb", "Lp", 349 "Lk", "Mt", "Brq", "Bro", 350 "Brc", "%C", "Es", "En", 351 "Dx", "%Q", "%U", "Ta", 352 NULL, 353 "TH", "SH", "SS", "TP", 354 "TQ", 355 "LP", "PP", "P", "IP", 356 "HP", "SM", "SB", "BI", 357 "IB", "BR", "RB", "R", 358 "B", "I", "IR", "RI", 359 "RE", "RS", "DT", "UC", 360 "PD", "AT", "in", 361 "SY", "YS", "OP", 362 "EX", "EE", "UR", 363 "UE", "MT", "ME", NULL 364 }; 365 const char *const *roff_name = __roff_name; 366 367 static struct roffmac roffs[TOKEN_NONE] = { 368 { roff_noarg, NULL, NULL, 0 }, /* br */ 369 { roff_onearg, NULL, NULL, 0 }, /* ce */ 370 { roff_noarg, NULL, NULL, 0 }, /* fi */ 371 { roff_onearg, NULL, NULL, 0 }, /* ft */ 372 { roff_onearg, NULL, NULL, 0 }, /* ll */ 373 { roff_onearg, NULL, NULL, 0 }, /* mc */ 374 { roff_noarg, NULL, NULL, 0 }, /* nf */ 375 { roff_onearg, NULL, NULL, 0 }, /* po */ 376 { roff_onearg, NULL, NULL, 0 }, /* rj */ 377 { roff_onearg, NULL, NULL, 0 }, /* sp */ 378 { roff_manyarg, NULL, NULL, 0 }, /* ta */ 379 { roff_onearg, NULL, NULL, 0 }, /* ti */ 380 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ 381 { roff_unsupp, NULL, NULL, 0 }, /* ab */ 382 { roff_line_ignore, NULL, NULL, 0 }, /* ad */ 383 { roff_line_ignore, NULL, NULL, 0 }, /* af */ 384 { roff_unsupp, NULL, NULL, 0 }, /* aln */ 385 { roff_als, NULL, NULL, 0 }, /* als */ 386 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ 387 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ 388 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ 389 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ 390 { roff_ds, NULL, NULL, 0 }, /* as */ 391 { roff_ds, NULL, NULL, 0 }, /* as1 */ 392 { roff_unsupp, NULL, NULL, 0 }, /* asciify */ 393 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ 394 { roff_line_ignore, NULL, NULL, 0 }, /* bd */ 395 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ 396 { roff_unsupp, NULL, NULL, 0 }, /* blm */ 397 { roff_unsupp, NULL, NULL, 0 }, /* box */ 398 { roff_unsupp, NULL, NULL, 0 }, /* boxa */ 399 { roff_line_ignore, NULL, NULL, 0 }, /* bp */ 400 { roff_unsupp, NULL, NULL, 0 }, /* BP */ 401 { roff_unsupp, NULL, NULL, 0 }, /* break */ 402 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ 403 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ 404 { roff_noarg, NULL, NULL, 0 }, /* brp */ 405 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ 406 { roff_unsupp, NULL, NULL, 0 }, /* c2 */ 407 { roff_cc, NULL, NULL, 0 }, /* cc */ 408 { roff_insec, NULL, NULL, 0 }, /* cf */ 409 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ 410 { roff_line_ignore, NULL, NULL, 0 }, /* ch */ 411 { roff_char, NULL, NULL, 0 }, /* char */ 412 { roff_unsupp, NULL, NULL, 0 }, /* chop */ 413 { roff_line_ignore, NULL, NULL, 0 }, /* class */ 414 { roff_insec, NULL, NULL, 0 }, /* close */ 415 { roff_unsupp, NULL, NULL, 0 }, /* CL */ 416 { roff_line_ignore, NULL, NULL, 0 }, /* color */ 417 { roff_unsupp, NULL, NULL, 0 }, /* composite */ 418 { roff_unsupp, NULL, NULL, 0 }, /* continue */ 419 { roff_line_ignore, NULL, NULL, 0 }, /* cp */ 420 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ 421 { roff_line_ignore, NULL, NULL, 0 }, /* cs */ 422 { roff_line_ignore, NULL, NULL, 0 }, /* cu */ 423 { roff_unsupp, NULL, NULL, 0 }, /* da */ 424 { roff_unsupp, NULL, NULL, 0 }, /* dch */ 425 { roff_Dd, NULL, NULL, 0 }, /* Dd */ 426 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ 427 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ 428 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ 429 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ 430 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ 431 { roff_unsupp, NULL, NULL, 0 }, /* device */ 432 { roff_unsupp, NULL, NULL, 0 }, /* devicem */ 433 { roff_unsupp, NULL, NULL, 0 }, /* di */ 434 { roff_unsupp, NULL, NULL, 0 }, /* do */ 435 { roff_ds, NULL, NULL, 0 }, /* ds */ 436 { roff_ds, NULL, NULL, 0 }, /* ds1 */ 437 { roff_unsupp, NULL, NULL, 0 }, /* dwh */ 438 { roff_unsupp, NULL, NULL, 0 }, /* dt */ 439 { roff_ec, NULL, NULL, 0 }, /* ec */ 440 { roff_unsupp, NULL, NULL, 0 }, /* ecr */ 441 { roff_unsupp, NULL, NULL, 0 }, /* ecs */ 442 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ 443 { roff_unsupp, NULL, NULL, 0 }, /* em */ 444 { roff_EN, NULL, NULL, 0 }, /* EN */ 445 { roff_eo, NULL, NULL, 0 }, /* eo */ 446 { roff_unsupp, NULL, NULL, 0 }, /* EP */ 447 { roff_EQ, NULL, NULL, 0 }, /* EQ */ 448 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ 449 { roff_unsupp, NULL, NULL, 0 }, /* ev */ 450 { roff_unsupp, NULL, NULL, 0 }, /* evc */ 451 { roff_unsupp, NULL, NULL, 0 }, /* ex */ 452 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ 453 { roff_line_ignore, NULL, NULL, 0 }, /* fam */ 454 { roff_unsupp, NULL, NULL, 0 }, /* fc */ 455 { roff_unsupp, NULL, NULL, 0 }, /* fchar */ 456 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ 457 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ 458 { roff_line_ignore, NULL, NULL, 0 }, /* feature */ 459 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ 460 { roff_line_ignore, NULL, NULL, 0 }, /* fl */ 461 { roff_line_ignore, NULL, NULL, 0 }, /* flig */ 462 { roff_line_ignore, NULL, NULL, 0 }, /* fp */ 463 { roff_line_ignore, NULL, NULL, 0 }, /* fps */ 464 { roff_unsupp, NULL, NULL, 0 }, /* fschar */ 465 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ 466 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ 467 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ 468 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ 469 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ 470 { roff_line_ignore, NULL, NULL, 0 }, /* hc */ 471 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ 472 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ 473 { roff_line_ignore, NULL, NULL, 0 }, /* hla */ 474 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ 475 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ 476 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ 477 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ 478 { roff_line_ignore, NULL, NULL, 0 }, /* hw */ 479 { roff_line_ignore, NULL, NULL, 0 }, /* hy */ 480 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ 481 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ 482 { roff_line_ignore, NULL, NULL, 0 }, /* hym */ 483 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ 484 { roff_line_ignore, NULL, NULL, 0 }, /* hys */ 485 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ 486 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ 487 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ 488 { roff_unsupp, NULL, NULL, 0 }, /* index */ 489 { roff_it, NULL, NULL, 0 }, /* it */ 490 { roff_unsupp, NULL, NULL, 0 }, /* itc */ 491 { roff_line_ignore, NULL, NULL, 0 }, /* IX */ 492 { roff_line_ignore, NULL, NULL, 0 }, /* kern */ 493 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ 494 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ 495 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ 496 { roff_unsupp, NULL, NULL, 0 }, /* lc */ 497 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ 498 { roff_unsupp, NULL, NULL, 0 }, /* lds */ 499 { roff_unsupp, NULL, NULL, 0 }, /* length */ 500 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ 501 { roff_insec, NULL, NULL, 0 }, /* lf */ 502 { roff_line_ignore, NULL, NULL, 0 }, /* lg */ 503 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ 504 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ 505 { roff_unsupp, NULL, NULL, 0 }, /* lnr */ 506 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ 507 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ 508 { roff_line_ignore, NULL, NULL, 0 }, /* ls */ 509 { roff_unsupp, NULL, NULL, 0 }, /* lsm */ 510 { roff_line_ignore, NULL, NULL, 0 }, /* lt */ 511 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ 512 { roff_line_ignore, NULL, NULL, 0 }, /* minss */ 513 { roff_line_ignore, NULL, NULL, 0 }, /* mk */ 514 { roff_insec, NULL, NULL, 0 }, /* mso */ 515 { roff_line_ignore, NULL, NULL, 0 }, /* na */ 516 { roff_line_ignore, NULL, NULL, 0 }, /* ne */ 517 { roff_line_ignore, NULL, NULL, 0 }, /* nh */ 518 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ 519 { roff_unsupp, NULL, NULL, 0 }, /* nm */ 520 { roff_unsupp, NULL, NULL, 0 }, /* nn */ 521 { roff_nop, NULL, NULL, 0 }, /* nop */ 522 { roff_nr, NULL, NULL, 0 }, /* nr */ 523 { roff_unsupp, NULL, NULL, 0 }, /* nrf */ 524 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ 525 { roff_line_ignore, NULL, NULL, 0 }, /* ns */ 526 { roff_insec, NULL, NULL, 0 }, /* nx */ 527 { roff_insec, NULL, NULL, 0 }, /* open */ 528 { roff_insec, NULL, NULL, 0 }, /* opena */ 529 { roff_line_ignore, NULL, NULL, 0 }, /* os */ 530 { roff_unsupp, NULL, NULL, 0 }, /* output */ 531 { roff_line_ignore, NULL, NULL, 0 }, /* padj */ 532 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ 533 { roff_line_ignore, NULL, NULL, 0 }, /* pc */ 534 { roff_line_ignore, NULL, NULL, 0 }, /* pev */ 535 { roff_insec, NULL, NULL, 0 }, /* pi */ 536 { roff_unsupp, NULL, NULL, 0 }, /* PI */ 537 { roff_line_ignore, NULL, NULL, 0 }, /* pl */ 538 { roff_line_ignore, NULL, NULL, 0 }, /* pm */ 539 { roff_line_ignore, NULL, NULL, 0 }, /* pn */ 540 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ 541 { roff_line_ignore, NULL, NULL, 0 }, /* ps */ 542 { roff_unsupp, NULL, NULL, 0 }, /* psbb */ 543 { roff_unsupp, NULL, NULL, 0 }, /* pshape */ 544 { roff_insec, NULL, NULL, 0 }, /* pso */ 545 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ 546 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ 547 { roff_unsupp, NULL, NULL, 0 }, /* rchar */ 548 { roff_line_ignore, NULL, NULL, 0 }, /* rd */ 549 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ 550 { roff_return, NULL, NULL, 0 }, /* return */ 551 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ 552 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ 553 { roff_rm, NULL, NULL, 0 }, /* rm */ 554 { roff_rn, NULL, NULL, 0 }, /* rn */ 555 { roff_unsupp, NULL, NULL, 0 }, /* rnn */ 556 { roff_rr, NULL, NULL, 0 }, /* rr */ 557 { roff_line_ignore, NULL, NULL, 0 }, /* rs */ 558 { roff_line_ignore, NULL, NULL, 0 }, /* rt */ 559 { roff_unsupp, NULL, NULL, 0 }, /* schar */ 560 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ 561 { roff_line_ignore, NULL, NULL, 0 }, /* shc */ 562 { roff_shift, NULL, NULL, 0 }, /* shift */ 563 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ 564 { roff_so, NULL, NULL, 0 }, /* so */ 565 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ 566 { roff_line_ignore, NULL, NULL, 0 }, /* special */ 567 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ 568 { roff_line_ignore, NULL, NULL, 0 }, /* ss */ 569 { roff_line_ignore, NULL, NULL, 0 }, /* sty */ 570 { roff_unsupp, NULL, NULL, 0 }, /* substring */ 571 { roff_line_ignore, NULL, NULL, 0 }, /* sv */ 572 { roff_insec, NULL, NULL, 0 }, /* sy */ 573 { roff_T_, NULL, NULL, 0 }, /* T& */ 574 { roff_unsupp, NULL, NULL, 0 }, /* tc */ 575 { roff_TE, NULL, NULL, 0 }, /* TE */ 576 { roff_Dd, NULL, NULL, 0 }, /* TH */ 577 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ 578 { roff_unsupp, NULL, NULL, 0 }, /* tl */ 579 { roff_line_ignore, NULL, NULL, 0 }, /* tm */ 580 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ 581 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ 582 { roff_tr, NULL, NULL, 0 }, /* tr */ 583 { roff_line_ignore, NULL, NULL, 0 }, /* track */ 584 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ 585 { roff_insec, NULL, NULL, 0 }, /* trf */ 586 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ 587 { roff_unsupp, NULL, NULL, 0 }, /* trin */ 588 { roff_unsupp, NULL, NULL, 0 }, /* trnt */ 589 { roff_line_ignore, NULL, NULL, 0 }, /* troff */ 590 { roff_TS, NULL, NULL, 0 }, /* TS */ 591 { roff_line_ignore, NULL, NULL, 0 }, /* uf */ 592 { roff_line_ignore, NULL, NULL, 0 }, /* ul */ 593 { roff_unsupp, NULL, NULL, 0 }, /* unformat */ 594 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ 595 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ 596 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ 597 { roff_line_ignore, NULL, NULL, 0 }, /* vs */ 598 { roff_line_ignore, NULL, NULL, 0 }, /* warn */ 599 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ 600 { roff_line_ignore, NULL, NULL, 0 }, /* watch */ 601 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ 602 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ 603 { roff_unsupp, NULL, NULL, 0 }, /* wh */ 604 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/ 605 { roff_insec, NULL, NULL, 0 }, /* write */ 606 { roff_insec, NULL, NULL, 0 }, /* writec */ 607 { roff_insec, NULL, NULL, 0 }, /* writem */ 608 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ 609 { roff_cblock, NULL, NULL, 0 }, /* . */ 610 { roff_renamed, NULL, NULL, 0 }, 611 { roff_userdef, NULL, NULL, 0 } 612 }; 613 614 /* Array of injected predefined strings. */ 615 #define PREDEFS_MAX 38 616 static const struct predef predefs[PREDEFS_MAX] = { 617 #include "predefs.in" 618 }; 619 620 static int roffce_lines; /* number of input lines to center */ 621 static struct roff_node *roffce_node; /* active request */ 622 static int roffit_lines; /* number of lines to delay */ 623 static char *roffit_macro; /* nil-terminated macro line */ 624 625 626 /* --- request table ------------------------------------------------------ */ 627 628 struct ohash * 629 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) 630 { 631 struct ohash *htab; 632 struct roffreq *req; 633 enum roff_tok tok; 634 size_t sz; 635 unsigned int slot; 636 637 htab = mandoc_malloc(sizeof(*htab)); 638 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); 639 640 for (tok = mintok; tok < maxtok; tok++) { 641 if (roff_name[tok] == NULL) 642 continue; 643 sz = strlen(roff_name[tok]); 644 req = mandoc_malloc(sizeof(*req) + sz + 1); 645 req->tok = tok; 646 memcpy(req->name, roff_name[tok], sz + 1); 647 slot = ohash_qlookup(htab, req->name); 648 ohash_insert(htab, slot, req); 649 } 650 return htab; 651 } 652 653 void 654 roffhash_free(struct ohash *htab) 655 { 656 struct roffreq *req; 657 unsigned int slot; 658 659 if (htab == NULL) 660 return; 661 for (req = ohash_first(htab, &slot); req != NULL; 662 req = ohash_next(htab, &slot)) 663 free(req); 664 ohash_delete(htab); 665 free(htab); 666 } 667 668 enum roff_tok 669 roffhash_find(struct ohash *htab, const char *name, size_t sz) 670 { 671 struct roffreq *req; 672 const char *end; 673 674 if (sz) { 675 end = name + sz; 676 req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); 677 } else 678 req = ohash_find(htab, ohash_qlookup(htab, name)); 679 return req == NULL ? TOKEN_NONE : req->tok; 680 } 681 682 /* --- stack of request blocks -------------------------------------------- */ 683 684 /* 685 * Pop the current node off of the stack of roff instructions currently 686 * pending. 687 */ 688 static int 689 roffnode_pop(struct roff *r) 690 { 691 struct roffnode *p; 692 int inloop; 693 694 p = r->last; 695 inloop = p->tok == ROFF_while; 696 r->last = p->parent; 697 free(p->name); 698 free(p->end); 699 free(p); 700 return inloop; 701 } 702 703 /* 704 * Push a roff node onto the instruction stack. This must later be 705 * removed with roffnode_pop(). 706 */ 707 static void 708 roffnode_push(struct roff *r, enum roff_tok tok, const char *name, 709 int line, int col) 710 { 711 struct roffnode *p; 712 713 p = mandoc_calloc(1, sizeof(struct roffnode)); 714 p->tok = tok; 715 if (name) 716 p->name = mandoc_strdup(name); 717 p->parent = r->last; 718 p->line = line; 719 p->col = col; 720 p->rule = p->parent ? p->parent->rule : 0; 721 722 r->last = p; 723 } 724 725 /* --- roff parser state data management ---------------------------------- */ 726 727 static void 728 roff_free1(struct roff *r) 729 { 730 int i; 731 732 tbl_free(r->first_tbl); 733 r->first_tbl = r->last_tbl = r->tbl = NULL; 734 735 eqn_free(r->last_eqn); 736 r->last_eqn = r->eqn = NULL; 737 738 while (r->mstackpos >= 0) 739 roff_userret(r); 740 741 while (r->last) 742 roffnode_pop(r); 743 744 free (r->rstack); 745 r->rstack = NULL; 746 r->rstacksz = 0; 747 r->rstackpos = -1; 748 749 roff_freereg(r->regtab); 750 r->regtab = NULL; 751 752 roff_freestr(r->strtab); 753 roff_freestr(r->rentab); 754 roff_freestr(r->xmbtab); 755 r->strtab = r->rentab = r->xmbtab = NULL; 756 757 if (r->xtab) 758 for (i = 0; i < 128; i++) 759 free(r->xtab[i].p); 760 free(r->xtab); 761 r->xtab = NULL; 762 } 763 764 void 765 roff_reset(struct roff *r) 766 { 767 roff_free1(r); 768 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 769 r->control = '\0'; 770 r->escape = '\\'; 771 roffce_lines = 0; 772 roffce_node = NULL; 773 roffit_lines = 0; 774 roffit_macro = NULL; 775 } 776 777 void 778 roff_free(struct roff *r) 779 { 780 int i; 781 782 roff_free1(r); 783 for (i = 0; i < r->mstacksz; i++) 784 free(r->mstack[i].argv); 785 free(r->mstack); 786 roffhash_free(r->reqtab); 787 free(r); 788 } 789 790 struct roff * 791 roff_alloc(int options) 792 { 793 struct roff *r; 794 795 r = mandoc_calloc(1, sizeof(struct roff)); 796 r->reqtab = roffhash_alloc(0, ROFF_RENAMED); 797 r->options = options; 798 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 799 r->mstackpos = -1; 800 r->rstackpos = -1; 801 r->escape = '\\'; 802 return r; 803 } 804 805 /* --- syntax tree state data management ---------------------------------- */ 806 807 static void 808 roff_man_free1(struct roff_man *man) 809 { 810 if (man->meta.first != NULL) 811 roff_node_delete(man, man->meta.first); 812 free(man->meta.msec); 813 free(man->meta.vol); 814 free(man->meta.os); 815 free(man->meta.arch); 816 free(man->meta.title); 817 free(man->meta.name); 818 free(man->meta.date); 819 free(man->meta.sodest); 820 } 821 822 void 823 roff_state_reset(struct roff_man *man) 824 { 825 man->last = man->meta.first; 826 man->last_es = NULL; 827 man->flags = 0; 828 man->lastsec = man->lastnamed = SEC_NONE; 829 man->next = ROFF_NEXT_CHILD; 830 roff_setreg(man->roff, "nS", 0, '='); 831 } 832 833 static void 834 roff_man_alloc1(struct roff_man *man) 835 { 836 memset(&man->meta, 0, sizeof(man->meta)); 837 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first)); 838 man->meta.first->type = ROFFT_ROOT; 839 man->meta.macroset = MACROSET_NONE; 840 roff_state_reset(man); 841 } 842 843 void 844 roff_man_reset(struct roff_man *man) 845 { 846 roff_man_free1(man); 847 roff_man_alloc1(man); 848 } 849 850 void 851 roff_man_free(struct roff_man *man) 852 { 853 roff_man_free1(man); 854 free(man); 855 } 856 857 struct roff_man * 858 roff_man_alloc(struct roff *roff, const char *os_s, int quick) 859 { 860 struct roff_man *man; 861 862 man = mandoc_calloc(1, sizeof(*man)); 863 man->roff = roff; 864 man->os_s = os_s; 865 man->quick = quick; 866 roff_man_alloc1(man); 867 roff->man = man; 868 return man; 869 } 870 871 /* --- syntax tree handling ----------------------------------------------- */ 872 873 struct roff_node * 874 roff_node_alloc(struct roff_man *man, int line, int pos, 875 enum roff_type type, int tok) 876 { 877 struct roff_node *n; 878 879 n = mandoc_calloc(1, sizeof(*n)); 880 n->line = line; 881 n->pos = pos; 882 n->tok = tok; 883 n->type = type; 884 n->sec = man->lastsec; 885 886 if (man->flags & MDOC_SYNOPSIS) 887 n->flags |= NODE_SYNPRETTY; 888 else 889 n->flags &= ~NODE_SYNPRETTY; 890 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL) 891 n->flags |= NODE_NOFILL; 892 else 893 n->flags &= ~NODE_NOFILL; 894 if (man->flags & MDOC_NEWLINE) 895 n->flags |= NODE_LINE; 896 man->flags &= ~MDOC_NEWLINE; 897 898 return n; 899 } 900 901 void 902 roff_node_append(struct roff_man *man, struct roff_node *n) 903 { 904 905 switch (man->next) { 906 case ROFF_NEXT_SIBLING: 907 if (man->last->next != NULL) { 908 n->next = man->last->next; 909 man->last->next->prev = n; 910 } else 911 man->last->parent->last = n; 912 man->last->next = n; 913 n->prev = man->last; 914 n->parent = man->last->parent; 915 break; 916 case ROFF_NEXT_CHILD: 917 if (man->last->child != NULL) { 918 n->next = man->last->child; 919 man->last->child->prev = n; 920 } else 921 man->last->last = n; 922 man->last->child = n; 923 n->parent = man->last; 924 break; 925 default: 926 abort(); 927 } 928 man->last = n; 929 930 switch (n->type) { 931 case ROFFT_HEAD: 932 n->parent->head = n; 933 break; 934 case ROFFT_BODY: 935 if (n->end != ENDBODY_NOT) 936 return; 937 n->parent->body = n; 938 break; 939 case ROFFT_TAIL: 940 n->parent->tail = n; 941 break; 942 default: 943 return; 944 } 945 946 /* 947 * Copy over the normalised-data pointer of our parent. Not 948 * everybody has one, but copying a null pointer is fine. 949 */ 950 951 n->norm = n->parent->norm; 952 assert(n->parent->type == ROFFT_BLOCK); 953 } 954 955 void 956 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 957 { 958 struct roff_node *n; 959 960 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 961 n->string = roff_strdup(man->roff, word); 962 roff_node_append(man, n); 963 n->flags |= NODE_VALID | NODE_ENDED; 964 man->next = ROFF_NEXT_SIBLING; 965 } 966 967 void 968 roff_word_append(struct roff_man *man, const char *word) 969 { 970 struct roff_node *n; 971 char *addstr, *newstr; 972 973 n = man->last; 974 addstr = roff_strdup(man->roff, word); 975 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 976 free(addstr); 977 free(n->string); 978 n->string = newstr; 979 man->next = ROFF_NEXT_SIBLING; 980 } 981 982 void 983 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 984 { 985 struct roff_node *n; 986 987 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 988 roff_node_append(man, n); 989 man->next = ROFF_NEXT_CHILD; 990 } 991 992 struct roff_node * 993 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 994 { 995 struct roff_node *n; 996 997 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 998 roff_node_append(man, n); 999 man->next = ROFF_NEXT_CHILD; 1000 return n; 1001 } 1002 1003 struct roff_node * 1004 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 1005 { 1006 struct roff_node *n; 1007 1008 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 1009 roff_node_append(man, n); 1010 man->next = ROFF_NEXT_CHILD; 1011 return n; 1012 } 1013 1014 struct roff_node * 1015 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 1016 { 1017 struct roff_node *n; 1018 1019 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 1020 roff_node_append(man, n); 1021 man->next = ROFF_NEXT_CHILD; 1022 return n; 1023 } 1024 1025 static void 1026 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl) 1027 { 1028 struct roff_node *n; 1029 struct tbl_span *span; 1030 1031 if (man->meta.macroset == MACROSET_MAN) 1032 man_breakscope(man, ROFF_TS); 1033 while ((span = tbl_span(tbl)) != NULL) { 1034 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE); 1035 n->span = span; 1036 roff_node_append(man, n); 1037 n->flags |= NODE_VALID | NODE_ENDED; 1038 man->next = ROFF_NEXT_SIBLING; 1039 } 1040 } 1041 1042 void 1043 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1044 { 1045 1046 /* Adjust siblings. */ 1047 1048 if (n->prev) 1049 n->prev->next = n->next; 1050 if (n->next) 1051 n->next->prev = n->prev; 1052 1053 /* Adjust parent. */ 1054 1055 if (n->parent != NULL) { 1056 if (n->parent->child == n) 1057 n->parent->child = n->next; 1058 if (n->parent->last == n) 1059 n->parent->last = n->prev; 1060 } 1061 1062 /* Adjust parse point. */ 1063 1064 if (man == NULL) 1065 return; 1066 if (man->last == n) { 1067 if (n->prev == NULL) { 1068 man->last = n->parent; 1069 man->next = ROFF_NEXT_CHILD; 1070 } else { 1071 man->last = n->prev; 1072 man->next = ROFF_NEXT_SIBLING; 1073 } 1074 } 1075 if (man->meta.first == n) 1076 man->meta.first = NULL; 1077 } 1078 1079 void 1080 roff_node_relink(struct roff_man *man, struct roff_node *n) 1081 { 1082 roff_node_unlink(man, n); 1083 n->prev = n->next = NULL; 1084 roff_node_append(man, n); 1085 } 1086 1087 void 1088 roff_node_free(struct roff_node *n) 1089 { 1090 1091 if (n->args != NULL) 1092 mdoc_argv_free(n->args); 1093 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1094 free(n->norm); 1095 eqn_box_free(n->eqn); 1096 free(n->string); 1097 free(n); 1098 } 1099 1100 void 1101 roff_node_delete(struct roff_man *man, struct roff_node *n) 1102 { 1103 1104 while (n->child != NULL) 1105 roff_node_delete(man, n->child); 1106 roff_node_unlink(man, n); 1107 roff_node_free(n); 1108 } 1109 1110 void 1111 deroff(char **dest, const struct roff_node *n) 1112 { 1113 char *cp; 1114 size_t sz; 1115 1116 if (n->type != ROFFT_TEXT) { 1117 for (n = n->child; n != NULL; n = n->next) 1118 deroff(dest, n); 1119 return; 1120 } 1121 1122 /* Skip leading whitespace. */ 1123 1124 for (cp = n->string; *cp != '\0'; cp++) { 1125 if (cp[0] == '\\' && cp[1] != '\0' && 1126 strchr(" %&0^|~", cp[1]) != NULL) 1127 cp++; 1128 else if ( ! isspace((unsigned char)*cp)) 1129 break; 1130 } 1131 1132 /* Skip trailing backslash. */ 1133 1134 sz = strlen(cp); 1135 if (sz > 0 && cp[sz - 1] == '\\') 1136 sz--; 1137 1138 /* Skip trailing whitespace. */ 1139 1140 for (; sz; sz--) 1141 if ( ! isspace((unsigned char)cp[sz-1])) 1142 break; 1143 1144 /* Skip empty strings. */ 1145 1146 if (sz == 0) 1147 return; 1148 1149 if (*dest == NULL) { 1150 *dest = mandoc_strndup(cp, sz); 1151 return; 1152 } 1153 1154 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1155 free(*dest); 1156 *dest = cp; 1157 } 1158 1159 /* --- main functions of the roff parser ---------------------------------- */ 1160 1161 /* 1162 * In the current line, expand escape sequences that produce parsable 1163 * input text. Also check the syntax of the remaining escape sequences, 1164 * which typically produce output glyphs or change formatter state. 1165 */ 1166 static int 1167 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc) 1168 { 1169 struct mctx *ctx; /* current macro call context */ 1170 char ubuf[24]; /* buffer to print the number */ 1171 struct roff_node *n; /* used for header comments */ 1172 const char *start; /* start of the string to process */ 1173 char *stesc; /* start of an escape sequence ('\\') */ 1174 const char *esct; /* type of esccape sequence */ 1175 char *ep; /* end of comment string */ 1176 const char *stnam; /* start of the name, after "[(*" */ 1177 const char *cp; /* end of the name, e.g. before ']' */ 1178 const char *res; /* the string to be substituted */ 1179 char *nbuf; /* new buffer to copy buf->buf to */ 1180 size_t maxl; /* expected length of the escape name */ 1181 size_t naml; /* actual length of the escape name */ 1182 size_t asz; /* length of the replacement */ 1183 size_t rsz; /* length of the rest of the string */ 1184 int inaml; /* length returned from mandoc_escape() */ 1185 int expand_count; /* to avoid infinite loops */ 1186 int npos; /* position in numeric expression */ 1187 int arg_complete; /* argument not interrupted by eol */ 1188 int quote_args; /* true for \\$@, false for \\$* */ 1189 int done; /* no more input available */ 1190 int deftype; /* type of definition to paste */ 1191 int rcsid; /* kind of RCS id seen */ 1192 enum mandocerr err; /* for escape sequence problems */ 1193 char sign; /* increment number register */ 1194 char term; /* character terminating the escape */ 1195 1196 /* Search forward for comments. */ 1197 1198 done = 0; 1199 start = buf->buf + pos; 1200 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { 1201 if (stesc[0] != newesc || stesc[1] == '\0') 1202 continue; 1203 stesc++; 1204 if (*stesc != '"' && *stesc != '#') 1205 continue; 1206 1207 /* Comment found, look for RCS id. */ 1208 1209 rcsid = 0; 1210 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) { 1211 rcsid = 1 << MANDOC_OS_OPENBSD; 1212 cp += 8; 1213 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) { 1214 rcsid = 1 << MANDOC_OS_NETBSD; 1215 cp += 7; 1216 } 1217 if (cp != NULL && 1218 isalnum((unsigned char)*cp) == 0 && 1219 strchr(cp, '$') != NULL) { 1220 if (r->man->meta.rcsids & rcsid) 1221 mandoc_msg(MANDOCERR_RCS_REP, ln, 1222 (int)(stesc - buf->buf) + 1, 1223 "%s", stesc + 1); 1224 r->man->meta.rcsids |= rcsid; 1225 } 1226 1227 /* Handle trailing whitespace. */ 1228 1229 ep = strchr(stesc--, '\0') - 1; 1230 if (*ep == '\n') { 1231 done = 1; 1232 ep--; 1233 } 1234 if (*ep == ' ' || *ep == '\t') 1235 mandoc_msg(MANDOCERR_SPACE_EOL, 1236 ln, (int)(ep - buf->buf), NULL); 1237 1238 /* 1239 * Save comments preceding the title macro 1240 * in the syntax tree. 1241 */ 1242 1243 if (newesc != ASCII_ESC && r->format == 0) { 1244 while (*ep == ' ' || *ep == '\t') 1245 ep--; 1246 ep[1] = '\0'; 1247 n = roff_node_alloc(r->man, 1248 ln, stesc + 1 - buf->buf, 1249 ROFFT_COMMENT, TOKEN_NONE); 1250 n->string = mandoc_strdup(stesc + 2); 1251 roff_node_append(r->man, n); 1252 n->flags |= NODE_VALID | NODE_ENDED; 1253 r->man->next = ROFF_NEXT_SIBLING; 1254 } 1255 1256 /* Line continuation with comment. */ 1257 1258 if (stesc[1] == '#') { 1259 *stesc = '\0'; 1260 return ROFF_IGN | ROFF_APPEND; 1261 } 1262 1263 /* Discard normal comments. */ 1264 1265 while (stesc > start && stesc[-1] == ' ' && 1266 (stesc == start + 1 || stesc[-2] != '\\')) 1267 stesc--; 1268 *stesc = '\0'; 1269 break; 1270 } 1271 if (stesc == start) 1272 return ROFF_CONT; 1273 stesc--; 1274 1275 /* Notice the end of the input. */ 1276 1277 if (*stesc == '\n') { 1278 *stesc-- = '\0'; 1279 done = 1; 1280 } 1281 1282 expand_count = 0; 1283 while (stesc >= start) { 1284 if (*stesc != newesc) { 1285 1286 /* 1287 * If we have a non-standard escape character, 1288 * escape literal backslashes because all 1289 * processing in subsequent functions uses 1290 * the standard escaping rules. 1291 */ 1292 1293 if (newesc != ASCII_ESC && *stesc == '\\') { 1294 *stesc = '\0'; 1295 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", 1296 buf->buf, stesc + 1) + 1; 1297 start = nbuf + pos; 1298 stesc = nbuf + (stesc - buf->buf); 1299 free(buf->buf); 1300 buf->buf = nbuf; 1301 } 1302 1303 /* Search backwards for the next escape. */ 1304 1305 stesc--; 1306 continue; 1307 } 1308 1309 /* If it is escaped, skip it. */ 1310 1311 for (cp = stesc - 1; cp >= start; cp--) 1312 if (*cp != r->escape) 1313 break; 1314 1315 if ((stesc - cp) % 2 == 0) { 1316 while (stesc > cp) 1317 *stesc-- = '\\'; 1318 continue; 1319 } else if (stesc[1] != '\0') { 1320 *stesc = '\\'; 1321 } else { 1322 *stesc-- = '\0'; 1323 if (done) 1324 continue; 1325 else 1326 return ROFF_IGN | ROFF_APPEND; 1327 } 1328 1329 /* Decide whether to expand or to check only. */ 1330 1331 term = '\0'; 1332 cp = stesc + 1; 1333 if (*cp == 'E') 1334 cp++; 1335 esct = cp; 1336 switch (*esct) { 1337 case '*': 1338 case '$': 1339 res = NULL; 1340 break; 1341 case 'B': 1342 case 'w': 1343 term = cp[1]; 1344 /* FALLTHROUGH */ 1345 case 'n': 1346 sign = cp[1]; 1347 if (sign == '+' || sign == '-') 1348 cp++; 1349 res = ubuf; 1350 break; 1351 default: 1352 err = MANDOCERR_OK; 1353 switch(mandoc_escape(&cp, &stnam, &inaml)) { 1354 case ESCAPE_SPECIAL: 1355 if (mchars_spec2cp(stnam, inaml) >= 0) 1356 break; 1357 /* FALLTHROUGH */ 1358 case ESCAPE_ERROR: 1359 err = MANDOCERR_ESC_BAD; 1360 break; 1361 case ESCAPE_UNDEF: 1362 err = MANDOCERR_ESC_UNDEF; 1363 break; 1364 case ESCAPE_UNSUPP: 1365 err = MANDOCERR_ESC_UNSUPP; 1366 break; 1367 default: 1368 break; 1369 } 1370 if (err != MANDOCERR_OK) 1371 mandoc_msg(err, ln, (int)(stesc - buf->buf), 1372 "%.*s", (int)(cp - stesc), stesc); 1373 stesc--; 1374 continue; 1375 } 1376 1377 if (EXPAND_LIMIT < ++expand_count) { 1378 mandoc_msg(MANDOCERR_ROFFLOOP, 1379 ln, (int)(stesc - buf->buf), NULL); 1380 return ROFF_IGN; 1381 } 1382 1383 /* 1384 * The third character decides the length 1385 * of the name of the string or register. 1386 * Save a pointer to the name. 1387 */ 1388 1389 if (term == '\0') { 1390 switch (*++cp) { 1391 case '\0': 1392 maxl = 0; 1393 break; 1394 case '(': 1395 cp++; 1396 maxl = 2; 1397 break; 1398 case '[': 1399 cp++; 1400 term = ']'; 1401 maxl = 0; 1402 break; 1403 default: 1404 maxl = 1; 1405 break; 1406 } 1407 } else { 1408 cp += 2; 1409 maxl = 0; 1410 } 1411 stnam = cp; 1412 1413 /* Advance to the end of the name. */ 1414 1415 naml = 0; 1416 arg_complete = 1; 1417 while (maxl == 0 || naml < maxl) { 1418 if (*cp == '\0') { 1419 mandoc_msg(MANDOCERR_ESC_BAD, ln, 1420 (int)(stesc - buf->buf), "%s", stesc); 1421 arg_complete = 0; 1422 break; 1423 } 1424 if (maxl == 0 && *cp == term) { 1425 cp++; 1426 break; 1427 } 1428 if (*cp++ != '\\' || *esct != 'w') { 1429 naml++; 1430 continue; 1431 } 1432 switch (mandoc_escape(&cp, NULL, NULL)) { 1433 case ESCAPE_SPECIAL: 1434 case ESCAPE_UNICODE: 1435 case ESCAPE_NUMBERED: 1436 case ESCAPE_UNDEF: 1437 case ESCAPE_OVERSTRIKE: 1438 naml++; 1439 break; 1440 default: 1441 break; 1442 } 1443 } 1444 1445 /* 1446 * Retrieve the replacement string; if it is 1447 * undefined, resume searching for escapes. 1448 */ 1449 1450 switch (*esct) { 1451 case '*': 1452 if (arg_complete) { 1453 deftype = ROFFDEF_USER | ROFFDEF_PRE; 1454 res = roff_getstrn(r, stnam, naml, &deftype); 1455 1456 /* 1457 * If not overriden, let \*(.T 1458 * through to the formatters. 1459 */ 1460 1461 if (res == NULL && naml == 2 && 1462 stnam[0] == '.' && stnam[1] == 'T') { 1463 roff_setstrn(&r->strtab, 1464 ".T", 2, NULL, 0, 0); 1465 stesc--; 1466 continue; 1467 } 1468 } 1469 break; 1470 case '$': 1471 if (r->mstackpos < 0) { 1472 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, 1473 (int)(stesc - buf->buf), "%.3s", stesc); 1474 break; 1475 } 1476 ctx = r->mstack + r->mstackpos; 1477 npos = esct[1] - '1'; 1478 if (npos >= 0 && npos <= 8) { 1479 res = npos < ctx->argc ? 1480 ctx->argv[npos] : ""; 1481 break; 1482 } 1483 if (esct[1] == '*') 1484 quote_args = 0; 1485 else if (esct[1] == '@') 1486 quote_args = 1; 1487 else { 1488 mandoc_msg(MANDOCERR_ARG_NONUM, ln, 1489 (int)(stesc - buf->buf), "%.3s", stesc); 1490 break; 1491 } 1492 asz = 0; 1493 for (npos = 0; npos < ctx->argc; npos++) { 1494 if (npos) 1495 asz++; /* blank */ 1496 if (quote_args) 1497 asz += 2; /* quotes */ 1498 asz += strlen(ctx->argv[npos]); 1499 } 1500 if (asz != 3) { 1501 rsz = buf->sz - (stesc - buf->buf) - 3; 1502 if (asz < 3) 1503 memmove(stesc + asz, stesc + 3, rsz); 1504 buf->sz += asz - 3; 1505 nbuf = mandoc_realloc(buf->buf, buf->sz); 1506 start = nbuf + pos; 1507 stesc = nbuf + (stesc - buf->buf); 1508 buf->buf = nbuf; 1509 if (asz > 3) 1510 memmove(stesc + asz, stesc + 3, rsz); 1511 } 1512 for (npos = 0; npos < ctx->argc; npos++) { 1513 if (npos) 1514 *stesc++ = ' '; 1515 if (quote_args) 1516 *stesc++ = '"'; 1517 cp = ctx->argv[npos]; 1518 while (*cp != '\0') 1519 *stesc++ = *cp++; 1520 if (quote_args) 1521 *stesc++ = '"'; 1522 } 1523 continue; 1524 case 'B': 1525 npos = 0; 1526 ubuf[0] = arg_complete && 1527 roff_evalnum(r, ln, stnam, &npos, 1528 NULL, ROFFNUM_SCALE) && 1529 stnam + npos + 1 == cp ? '1' : '0'; 1530 ubuf[1] = '\0'; 1531 break; 1532 case 'n': 1533 if (arg_complete) 1534 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1535 roff_getregn(r, stnam, naml, sign)); 1536 else 1537 ubuf[0] = '\0'; 1538 break; 1539 case 'w': 1540 /* use even incomplete args */ 1541 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1542 24 * (int)naml); 1543 break; 1544 } 1545 1546 if (res == NULL) { 1547 if (*esct == '*') 1548 mandoc_msg(MANDOCERR_STR_UNDEF, 1549 ln, (int)(stesc - buf->buf), 1550 "%.*s", (int)naml, stnam); 1551 res = ""; 1552 } else if (buf->sz + strlen(res) > SHRT_MAX) { 1553 mandoc_msg(MANDOCERR_ROFFLOOP, 1554 ln, (int)(stesc - buf->buf), NULL); 1555 return ROFF_IGN; 1556 } 1557 1558 /* Replace the escape sequence by the string. */ 1559 1560 *stesc = '\0'; 1561 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", 1562 buf->buf, res, cp) + 1; 1563 1564 /* Prepare for the next replacement. */ 1565 1566 start = nbuf + pos; 1567 stesc = nbuf + (stesc - buf->buf) + strlen(res); 1568 free(buf->buf); 1569 buf->buf = nbuf; 1570 } 1571 return ROFF_CONT; 1572 } 1573 1574 /* 1575 * Parse a quoted or unquoted roff-style request or macro argument. 1576 * Return a pointer to the parsed argument, which is either the original 1577 * pointer or advanced by one byte in case the argument is quoted. 1578 * NUL-terminate the argument in place. 1579 * Collapse pairs of quotes inside quoted arguments. 1580 * Advance the argument pointer to the next argument, 1581 * or to the NUL byte terminating the argument line. 1582 */ 1583 char * 1584 roff_getarg(struct roff *r, char **cpp, int ln, int *pos) 1585 { 1586 struct buf buf; 1587 char *cp, *start; 1588 int newesc, pairs, quoted, white; 1589 1590 /* Quoting can only start with a new word. */ 1591 start = *cpp; 1592 quoted = 0; 1593 if ('"' == *start) { 1594 quoted = 1; 1595 start++; 1596 } 1597 1598 newesc = pairs = white = 0; 1599 for (cp = start; '\0' != *cp; cp++) { 1600 1601 /* 1602 * Move the following text left 1603 * after quoted quotes and after "\\" and "\t". 1604 */ 1605 if (pairs) 1606 cp[-pairs] = cp[0]; 1607 1608 if ('\\' == cp[0]) { 1609 /* 1610 * In copy mode, translate double to single 1611 * backslashes and backslash-t to literal tabs. 1612 */ 1613 switch (cp[1]) { 1614 case 'a': 1615 case 't': 1616 cp[-pairs] = '\t'; 1617 pairs++; 1618 cp++; 1619 break; 1620 case '\\': 1621 newesc = 1; 1622 cp[-pairs] = ASCII_ESC; 1623 pairs++; 1624 cp++; 1625 break; 1626 case ' ': 1627 /* Skip escaped blanks. */ 1628 if (0 == quoted) 1629 cp++; 1630 break; 1631 default: 1632 break; 1633 } 1634 } else if (0 == quoted) { 1635 if (' ' == cp[0]) { 1636 /* Unescaped blanks end unquoted args. */ 1637 white = 1; 1638 break; 1639 } 1640 } else if ('"' == cp[0]) { 1641 if ('"' == cp[1]) { 1642 /* Quoted quotes collapse. */ 1643 pairs++; 1644 cp++; 1645 } else { 1646 /* Unquoted quotes end quoted args. */ 1647 quoted = 2; 1648 break; 1649 } 1650 } 1651 } 1652 1653 /* Quoted argument without a closing quote. */ 1654 if (1 == quoted) 1655 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL); 1656 1657 /* NUL-terminate this argument and move to the next one. */ 1658 if (pairs) 1659 cp[-pairs] = '\0'; 1660 if ('\0' != *cp) { 1661 *cp++ = '\0'; 1662 while (' ' == *cp) 1663 cp++; 1664 } 1665 *pos += (int)(cp - start) + (quoted ? 1 : 0); 1666 *cpp = cp; 1667 1668 if ('\0' == *cp && (white || ' ' == cp[-1])) 1669 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); 1670 1671 start = mandoc_strdup(start); 1672 if (newesc == 0) 1673 return start; 1674 1675 buf.buf = start; 1676 buf.sz = strlen(start) + 1; 1677 buf.next = NULL; 1678 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { 1679 free(buf.buf); 1680 buf.buf = mandoc_strdup(""); 1681 } 1682 return buf.buf; 1683 } 1684 1685 1686 /* 1687 * Process text streams. 1688 */ 1689 static int 1690 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) 1691 { 1692 size_t sz; 1693 const char *start; 1694 char *p; 1695 int isz; 1696 enum mandoc_esc esc; 1697 1698 /* Spring the input line trap. */ 1699 1700 if (roffit_lines == 1) { 1701 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1702 free(buf->buf); 1703 buf->buf = p; 1704 buf->sz = isz + 1; 1705 *offs = 0; 1706 free(roffit_macro); 1707 roffit_lines = 0; 1708 return ROFF_REPARSE; 1709 } else if (roffit_lines > 1) 1710 --roffit_lines; 1711 1712 if (roffce_node != NULL && buf->buf[pos] != '\0') { 1713 if (roffce_lines < 1) { 1714 r->man->last = roffce_node; 1715 r->man->next = ROFF_NEXT_SIBLING; 1716 roffce_lines = 0; 1717 roffce_node = NULL; 1718 } else 1719 roffce_lines--; 1720 } 1721 1722 /* Convert all breakable hyphens into ASCII_HYPH. */ 1723 1724 start = p = buf->buf + pos; 1725 1726 while (*p != '\0') { 1727 sz = strcspn(p, "-\\"); 1728 p += sz; 1729 1730 if (*p == '\0') 1731 break; 1732 1733 if (*p == '\\') { 1734 /* Skip over escapes. */ 1735 p++; 1736 esc = mandoc_escape((const char **)&p, NULL, NULL); 1737 if (esc == ESCAPE_ERROR) 1738 break; 1739 while (*p == '-') 1740 p++; 1741 continue; 1742 } else if (p == start) { 1743 p++; 1744 continue; 1745 } 1746 1747 if (isalpha((unsigned char)p[-1]) && 1748 isalpha((unsigned char)p[1])) 1749 *p = ASCII_HYPH; 1750 p++; 1751 } 1752 return ROFF_CONT; 1753 } 1754 1755 int 1756 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) 1757 { 1758 enum roff_tok t; 1759 int e; 1760 int pos; /* parse point */ 1761 int spos; /* saved parse point for messages */ 1762 int ppos; /* original offset in buf->buf */ 1763 int ctl; /* macro line (boolean) */ 1764 1765 ppos = pos = *offs; 1766 1767 /* Handle in-line equation delimiters. */ 1768 1769 if (r->tbl == NULL && 1770 r->last_eqn != NULL && r->last_eqn->delim && 1771 (r->eqn == NULL || r->eqn_inline)) { 1772 e = roff_eqndelim(r, buf, pos); 1773 if (e == ROFF_REPARSE) 1774 return e; 1775 assert(e == ROFF_CONT); 1776 } 1777 1778 /* Expand some escape sequences. */ 1779 1780 e = roff_expand(r, buf, ln, pos, r->escape); 1781 if ((e & ROFF_MASK) == ROFF_IGN) 1782 return e; 1783 assert(e == ROFF_CONT); 1784 1785 ctl = roff_getcontrol(r, buf->buf, &pos); 1786 1787 /* 1788 * First, if a scope is open and we're not a macro, pass the 1789 * text through the macro's filter. 1790 * Equations process all content themselves. 1791 * Tables process almost all content themselves, but we want 1792 * to warn about macros before passing it there. 1793 */ 1794 1795 if (r->last != NULL && ! ctl) { 1796 t = r->last->tok; 1797 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1798 if ((e & ROFF_MASK) == ROFF_IGN) 1799 return e; 1800 e &= ~ROFF_MASK; 1801 } else 1802 e = ROFF_IGN; 1803 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { 1804 eqn_read(r->eqn, buf->buf + ppos); 1805 return e; 1806 } 1807 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { 1808 tbl_read(r->tbl, ln, buf->buf, ppos); 1809 roff_addtbl(r->man, ln, r->tbl); 1810 return e; 1811 } 1812 if ( ! ctl) 1813 return roff_parsetext(r, buf, pos, offs) | e; 1814 1815 /* Skip empty request lines. */ 1816 1817 if (buf->buf[pos] == '"') { 1818 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL); 1819 return ROFF_IGN; 1820 } else if (buf->buf[pos] == '\0') 1821 return ROFF_IGN; 1822 1823 /* 1824 * If a scope is open, go to the child handler for that macro, 1825 * as it may want to preprocess before doing anything with it. 1826 * Don't do so if an equation is open. 1827 */ 1828 1829 if (r->last) { 1830 t = r->last->tok; 1831 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1832 } 1833 1834 /* No scope is open. This is a new request or macro. */ 1835 1836 spos = pos; 1837 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1838 1839 /* Tables ignore most macros. */ 1840 1841 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS || 1842 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) { 1843 mandoc_msg(MANDOCERR_TBLMACRO, 1844 ln, pos, "%s", buf->buf + spos); 1845 if (t != TOKEN_NONE) 1846 return ROFF_IGN; 1847 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1848 pos++; 1849 while (buf->buf[pos] == ' ') 1850 pos++; 1851 tbl_read(r->tbl, ln, buf->buf, pos); 1852 roff_addtbl(r->man, ln, r->tbl); 1853 return ROFF_IGN; 1854 } 1855 1856 /* For now, let high level macros abort .ce mode. */ 1857 1858 if (ctl && roffce_node != NULL && 1859 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 1860 t == ROFF_TH || t == ROFF_TS)) { 1861 r->man->last = roffce_node; 1862 r->man->next = ROFF_NEXT_SIBLING; 1863 roffce_lines = 0; 1864 roffce_node = NULL; 1865 } 1866 1867 /* 1868 * This is neither a roff request nor a user-defined macro. 1869 * Let the standard macro set parsers handle it. 1870 */ 1871 1872 if (t == TOKEN_NONE) 1873 return ROFF_CONT; 1874 1875 /* Execute a roff request or a user defined macro. */ 1876 1877 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); 1878 } 1879 1880 /* 1881 * Internal interface function to tell the roff parser that execution 1882 * of the current macro ended. This is required because macro 1883 * definitions usually do not end with a .return request. 1884 */ 1885 void 1886 roff_userret(struct roff *r) 1887 { 1888 struct mctx *ctx; 1889 int i; 1890 1891 assert(r->mstackpos >= 0); 1892 ctx = r->mstack + r->mstackpos; 1893 for (i = 0; i < ctx->argc; i++) 1894 free(ctx->argv[i]); 1895 ctx->argc = 0; 1896 r->mstackpos--; 1897 } 1898 1899 void 1900 roff_endparse(struct roff *r) 1901 { 1902 if (r->last != NULL) 1903 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line, 1904 r->last->col, "%s", roff_name[r->last->tok]); 1905 1906 if (r->eqn != NULL) { 1907 mandoc_msg(MANDOCERR_BLK_NOEND, 1908 r->eqn->node->line, r->eqn->node->pos, "EQ"); 1909 eqn_parse(r->eqn); 1910 r->eqn = NULL; 1911 } 1912 1913 if (r->tbl != NULL) { 1914 tbl_end(r->tbl, 1); 1915 r->tbl = NULL; 1916 } 1917 } 1918 1919 /* 1920 * Parse a roff node's type from the input buffer. This must be in the 1921 * form of ".foo xxx" in the usual way. 1922 */ 1923 static enum roff_tok 1924 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 1925 { 1926 char *cp; 1927 const char *mac; 1928 size_t maclen; 1929 int deftype; 1930 enum roff_tok t; 1931 1932 cp = buf + *pos; 1933 1934 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 1935 return TOKEN_NONE; 1936 1937 mac = cp; 1938 maclen = roff_getname(r, &cp, ln, ppos); 1939 1940 deftype = ROFFDEF_USER | ROFFDEF_REN; 1941 r->current_string = roff_getstrn(r, mac, maclen, &deftype); 1942 switch (deftype) { 1943 case ROFFDEF_USER: 1944 t = ROFF_USERDEF; 1945 break; 1946 case ROFFDEF_REN: 1947 t = ROFF_RENAMED; 1948 break; 1949 default: 1950 t = roffhash_find(r->reqtab, mac, maclen); 1951 break; 1952 } 1953 if (t != TOKEN_NONE) 1954 *pos = cp - buf; 1955 else if (deftype == ROFFDEF_UNDEF) { 1956 /* Using an undefined macro defines it to be empty. */ 1957 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); 1958 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); 1959 } 1960 return t; 1961 } 1962 1963 /* --- handling of request blocks ----------------------------------------- */ 1964 1965 static int 1966 roff_cblock(ROFF_ARGS) 1967 { 1968 1969 /* 1970 * A block-close `..' should only be invoked as a child of an 1971 * ignore macro, otherwise raise a warning and just ignore it. 1972 */ 1973 1974 if (r->last == NULL) { 1975 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 1976 return ROFF_IGN; 1977 } 1978 1979 switch (r->last->tok) { 1980 case ROFF_am: 1981 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */ 1982 case ROFF_ami: 1983 case ROFF_de: 1984 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 1985 case ROFF_dei: 1986 case ROFF_ig: 1987 break; 1988 default: 1989 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 1990 return ROFF_IGN; 1991 } 1992 1993 if (buf->buf[pos] != '\0') 1994 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 1995 ".. %s", buf->buf + pos); 1996 1997 roffnode_pop(r); 1998 roffnode_cleanscope(r); 1999 return ROFF_IGN; 2000 2001 } 2002 2003 static int 2004 roffnode_cleanscope(struct roff *r) 2005 { 2006 int inloop; 2007 2008 inloop = 0; 2009 while (r->last != NULL) { 2010 if (--r->last->endspan != 0) 2011 break; 2012 inloop += roffnode_pop(r); 2013 } 2014 return inloop; 2015 } 2016 2017 static int 2018 roff_ccond(struct roff *r, int ln, int ppos) 2019 { 2020 if (NULL == r->last) { 2021 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2022 return 0; 2023 } 2024 2025 switch (r->last->tok) { 2026 case ROFF_el: 2027 case ROFF_ie: 2028 case ROFF_if: 2029 case ROFF_while: 2030 break; 2031 default: 2032 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2033 return 0; 2034 } 2035 2036 if (r->last->endspan > -1) { 2037 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2038 return 0; 2039 } 2040 2041 return roffnode_pop(r) + roffnode_cleanscope(r); 2042 } 2043 2044 static int 2045 roff_block(ROFF_ARGS) 2046 { 2047 const char *name, *value; 2048 char *call, *cp, *iname, *rname; 2049 size_t csz, namesz, rsz; 2050 int deftype; 2051 2052 /* Ignore groff compatibility mode for now. */ 2053 2054 if (tok == ROFF_de1) 2055 tok = ROFF_de; 2056 else if (tok == ROFF_dei1) 2057 tok = ROFF_dei; 2058 else if (tok == ROFF_am1) 2059 tok = ROFF_am; 2060 else if (tok == ROFF_ami1) 2061 tok = ROFF_ami; 2062 2063 /* Parse the macro name argument. */ 2064 2065 cp = buf->buf + pos; 2066 if (tok == ROFF_ig) { 2067 iname = NULL; 2068 namesz = 0; 2069 } else { 2070 iname = cp; 2071 namesz = roff_getname(r, &cp, ln, ppos); 2072 iname[namesz] = '\0'; 2073 } 2074 2075 /* Resolve the macro name argument if it is indirect. */ 2076 2077 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2078 deftype = ROFFDEF_USER; 2079 name = roff_getstrn(r, iname, namesz, &deftype); 2080 if (name == NULL) { 2081 mandoc_msg(MANDOCERR_STR_UNDEF, 2082 ln, (int)(iname - buf->buf), 2083 "%.*s", (int)namesz, iname); 2084 namesz = 0; 2085 } else 2086 namesz = strlen(name); 2087 } else 2088 name = iname; 2089 2090 if (namesz == 0 && tok != ROFF_ig) { 2091 mandoc_msg(MANDOCERR_REQ_EMPTY, 2092 ln, ppos, "%s", roff_name[tok]); 2093 return ROFF_IGN; 2094 } 2095 2096 roffnode_push(r, tok, name, ln, ppos); 2097 2098 /* 2099 * At the beginning of a `de' macro, clear the existing string 2100 * with the same name, if there is one. New content will be 2101 * appended from roff_block_text() in multiline mode. 2102 */ 2103 2104 if (tok == ROFF_de || tok == ROFF_dei) { 2105 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 2106 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2107 } else if (tok == ROFF_am || tok == ROFF_ami) { 2108 deftype = ROFFDEF_ANY; 2109 value = roff_getstrn(r, iname, namesz, &deftype); 2110 switch (deftype) { /* Before appending, ... */ 2111 case ROFFDEF_PRE: /* copy predefined to user-defined. */ 2112 roff_setstrn(&r->strtab, name, namesz, 2113 value, strlen(value), 0); 2114 break; 2115 case ROFFDEF_REN: /* call original standard macro. */ 2116 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2117 (int)strlen(value), value); 2118 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2119 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2120 free(call); 2121 break; 2122 case ROFFDEF_STD: /* rename and call standard macro. */ 2123 rsz = mandoc_asprintf(&rname, "__%s_renamed", name); 2124 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); 2125 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2126 (int)rsz, rname); 2127 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2128 free(call); 2129 free(rname); 2130 break; 2131 default: 2132 break; 2133 } 2134 } 2135 2136 if (*cp == '\0') 2137 return ROFF_IGN; 2138 2139 /* Get the custom end marker. */ 2140 2141 iname = cp; 2142 namesz = roff_getname(r, &cp, ln, ppos); 2143 2144 /* Resolve the end marker if it is indirect. */ 2145 2146 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2147 deftype = ROFFDEF_USER; 2148 name = roff_getstrn(r, iname, namesz, &deftype); 2149 if (name == NULL) { 2150 mandoc_msg(MANDOCERR_STR_UNDEF, 2151 ln, (int)(iname - buf->buf), 2152 "%.*s", (int)namesz, iname); 2153 namesz = 0; 2154 } else 2155 namesz = strlen(name); 2156 } else 2157 name = iname; 2158 2159 if (namesz) 2160 r->last->end = mandoc_strndup(name, namesz); 2161 2162 if (*cp != '\0') 2163 mandoc_msg(MANDOCERR_ARG_EXCESS, 2164 ln, pos, ".%s ... %s", roff_name[tok], cp); 2165 2166 return ROFF_IGN; 2167 } 2168 2169 static int 2170 roff_block_sub(ROFF_ARGS) 2171 { 2172 enum roff_tok t; 2173 int i, j; 2174 2175 /* 2176 * First check whether a custom macro exists at this level. If 2177 * it does, then check against it. This is some of groff's 2178 * stranger behaviours. If we encountered a custom end-scope 2179 * tag and that tag also happens to be a "real" macro, then we 2180 * need to try interpreting it again as a real macro. If it's 2181 * not, then return ignore. Else continue. 2182 */ 2183 2184 if (r->last->end) { 2185 for (i = pos, j = 0; r->last->end[j]; j++, i++) 2186 if (buf->buf[i] != r->last->end[j]) 2187 break; 2188 2189 if (r->last->end[j] == '\0' && 2190 (buf->buf[i] == '\0' || 2191 buf->buf[i] == ' ' || 2192 buf->buf[i] == '\t')) { 2193 roffnode_pop(r); 2194 roffnode_cleanscope(r); 2195 2196 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 2197 i++; 2198 2199 pos = i; 2200 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 2201 TOKEN_NONE) 2202 return ROFF_RERUN; 2203 return ROFF_IGN; 2204 } 2205 } 2206 2207 /* 2208 * If we have no custom end-query or lookup failed, then try 2209 * pulling it out of the hashtable. 2210 */ 2211 2212 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2213 2214 if (t != ROFF_cblock) { 2215 if (tok != ROFF_ig) 2216 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 2217 return ROFF_IGN; 2218 } 2219 2220 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2221 } 2222 2223 static int 2224 roff_block_text(ROFF_ARGS) 2225 { 2226 2227 if (tok != ROFF_ig) 2228 roff_setstr(r, r->last->name, buf->buf + pos, 2); 2229 2230 return ROFF_IGN; 2231 } 2232 2233 static int 2234 roff_cond_sub(ROFF_ARGS) 2235 { 2236 char *ep; 2237 int endloop, irc, rr; 2238 enum roff_tok t; 2239 2240 irc = ROFF_IGN; 2241 rr = r->last->rule; 2242 endloop = tok != ROFF_while ? ROFF_IGN : 2243 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2244 if (roffnode_cleanscope(r)) 2245 irc |= endloop; 2246 2247 /* 2248 * If `\}' occurs on a macro line without a preceding macro, 2249 * drop the line completely. 2250 */ 2251 2252 ep = buf->buf + pos; 2253 if (ep[0] == '\\' && ep[1] == '}') 2254 rr = 0; 2255 2256 /* 2257 * The closing delimiter `\}' rewinds the conditional scope 2258 * but is otherwise ignored when interpreting the line. 2259 */ 2260 2261 while ((ep = strchr(ep, '\\')) != NULL) { 2262 switch (ep[1]) { 2263 case '}': 2264 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2265 if (roff_ccond(r, ln, ep - buf->buf)) 2266 irc |= endloop; 2267 break; 2268 case '\0': 2269 ++ep; 2270 break; 2271 default: 2272 ep += 2; 2273 break; 2274 } 2275 } 2276 2277 /* 2278 * Fully handle known macros when they are structurally 2279 * required or when the conditional evaluated to true. 2280 */ 2281 2282 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2283 irc |= t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT) ? 2284 (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) : 2285 rr ? ROFF_CONT : ROFF_IGN; 2286 return irc; 2287 } 2288 2289 static int 2290 roff_cond_text(ROFF_ARGS) 2291 { 2292 char *ep; 2293 int endloop, irc, rr; 2294 2295 irc = ROFF_IGN; 2296 rr = r->last->rule; 2297 endloop = tok != ROFF_while ? ROFF_IGN : 2298 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2299 if (roffnode_cleanscope(r)) 2300 irc |= endloop; 2301 2302 /* 2303 * If `\}' occurs on a text line with neither preceding 2304 * nor following characters, drop the line completely. 2305 */ 2306 2307 ep = buf->buf + pos; 2308 if (strcmp(ep, "\\}") == 0) 2309 rr = 0; 2310 2311 /* 2312 * The closing delimiter `\}' rewinds the conditional scope 2313 * but is otherwise ignored when interpreting the line. 2314 */ 2315 2316 while ((ep = strchr(ep, '\\')) != NULL) { 2317 switch (ep[1]) { 2318 case '}': 2319 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2320 if (roff_ccond(r, ln, ep - buf->buf)) 2321 irc |= endloop; 2322 break; 2323 case '\0': 2324 ++ep; 2325 break; 2326 default: 2327 ep += 2; 2328 break; 2329 } 2330 } 2331 if (rr) 2332 irc |= ROFF_CONT; 2333 return irc; 2334 } 2335 2336 /* --- handling of numeric and conditional expressions -------------------- */ 2337 2338 /* 2339 * Parse a single signed integer number. Stop at the first non-digit. 2340 * If there is at least one digit, return success and advance the 2341 * parse point, else return failure and let the parse point unchanged. 2342 * Ignore overflows, treat them just like the C language. 2343 */ 2344 static int 2345 roff_getnum(const char *v, int *pos, int *res, int flags) 2346 { 2347 int myres, scaled, n, p; 2348 2349 if (NULL == res) 2350 res = &myres; 2351 2352 p = *pos; 2353 n = v[p] == '-'; 2354 if (n || v[p] == '+') 2355 p++; 2356 2357 if (flags & ROFFNUM_WHITE) 2358 while (isspace((unsigned char)v[p])) 2359 p++; 2360 2361 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2362 *res = 10 * *res + v[p] - '0'; 2363 if (p == *pos + n) 2364 return 0; 2365 2366 if (n) 2367 *res = -*res; 2368 2369 /* Each number may be followed by one optional scaling unit. */ 2370 2371 switch (v[p]) { 2372 case 'f': 2373 scaled = *res * 65536; 2374 break; 2375 case 'i': 2376 scaled = *res * 240; 2377 break; 2378 case 'c': 2379 scaled = *res * 240 / 2.54; 2380 break; 2381 case 'v': 2382 case 'P': 2383 scaled = *res * 40; 2384 break; 2385 case 'm': 2386 case 'n': 2387 scaled = *res * 24; 2388 break; 2389 case 'p': 2390 scaled = *res * 10 / 3; 2391 break; 2392 case 'u': 2393 scaled = *res; 2394 break; 2395 case 'M': 2396 scaled = *res * 6 / 25; 2397 break; 2398 default: 2399 scaled = *res; 2400 p--; 2401 break; 2402 } 2403 if (flags & ROFFNUM_SCALE) 2404 *res = scaled; 2405 2406 *pos = p + 1; 2407 return 1; 2408 } 2409 2410 /* 2411 * Evaluate a string comparison condition. 2412 * The first character is the delimiter. 2413 * Succeed if the string up to its second occurrence 2414 * matches the string up to its third occurence. 2415 * Advance the cursor after the third occurrence 2416 * or lacking that, to the end of the line. 2417 */ 2418 static int 2419 roff_evalstrcond(const char *v, int *pos) 2420 { 2421 const char *s1, *s2, *s3; 2422 int match; 2423 2424 match = 0; 2425 s1 = v + *pos; /* initial delimiter */ 2426 s2 = s1 + 1; /* for scanning the first string */ 2427 s3 = strchr(s2, *s1); /* for scanning the second string */ 2428 2429 if (NULL == s3) /* found no middle delimiter */ 2430 goto out; 2431 2432 while ('\0' != *++s3) { 2433 if (*s2 != *s3) { /* mismatch */ 2434 s3 = strchr(s3, *s1); 2435 break; 2436 } 2437 if (*s3 == *s1) { /* found the final delimiter */ 2438 match = 1; 2439 break; 2440 } 2441 s2++; 2442 } 2443 2444 out: 2445 if (NULL == s3) 2446 s3 = strchr(s2, '\0'); 2447 else if (*s3 != '\0') 2448 s3++; 2449 *pos = s3 - v; 2450 return match; 2451 } 2452 2453 /* 2454 * Evaluate an optionally negated single character, numerical, 2455 * or string condition. 2456 */ 2457 static int 2458 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2459 { 2460 const char *start, *end; 2461 char *cp, *name; 2462 size_t sz; 2463 int deftype, len, number, savepos, istrue, wanttrue; 2464 2465 if ('!' == v[*pos]) { 2466 wanttrue = 0; 2467 (*pos)++; 2468 } else 2469 wanttrue = 1; 2470 2471 switch (v[*pos]) { 2472 case '\0': 2473 return 0; 2474 case 'n': 2475 case 'o': 2476 (*pos)++; 2477 return wanttrue; 2478 case 'e': 2479 case 't': 2480 case 'v': 2481 (*pos)++; 2482 return !wanttrue; 2483 case 'c': 2484 do { 2485 (*pos)++; 2486 } while (v[*pos] == ' '); 2487 2488 /* 2489 * Quirk for groff compatibility: 2490 * The horizontal tab is neither available nor unavailable. 2491 */ 2492 2493 if (v[*pos] == '\t') { 2494 (*pos)++; 2495 return 0; 2496 } 2497 2498 /* Printable ASCII characters are available. */ 2499 2500 if (v[*pos] != '\\') { 2501 (*pos)++; 2502 return wanttrue; 2503 } 2504 2505 end = v + ++*pos; 2506 switch (mandoc_escape(&end, &start, &len)) { 2507 case ESCAPE_SPECIAL: 2508 istrue = mchars_spec2cp(start, len) != -1; 2509 break; 2510 case ESCAPE_UNICODE: 2511 istrue = 1; 2512 break; 2513 case ESCAPE_NUMBERED: 2514 istrue = mchars_num2char(start, len) != -1; 2515 break; 2516 default: 2517 istrue = !wanttrue; 2518 break; 2519 } 2520 *pos = end - v; 2521 return istrue == wanttrue; 2522 case 'd': 2523 case 'r': 2524 cp = v + *pos + 1; 2525 while (*cp == ' ') 2526 cp++; 2527 name = cp; 2528 sz = roff_getname(r, &cp, ln, cp - v); 2529 if (sz == 0) 2530 istrue = 0; 2531 else if (v[*pos] == 'r') 2532 istrue = roff_hasregn(r, name, sz); 2533 else { 2534 deftype = ROFFDEF_ANY; 2535 roff_getstrn(r, name, sz, &deftype); 2536 istrue = !!deftype; 2537 } 2538 *pos = cp - v; 2539 return istrue == wanttrue; 2540 default: 2541 break; 2542 } 2543 2544 savepos = *pos; 2545 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2546 return (number > 0) == wanttrue; 2547 else if (*pos == savepos) 2548 return roff_evalstrcond(v, pos) == wanttrue; 2549 else 2550 return 0; 2551 } 2552 2553 static int 2554 roff_line_ignore(ROFF_ARGS) 2555 { 2556 2557 return ROFF_IGN; 2558 } 2559 2560 static int 2561 roff_insec(ROFF_ARGS) 2562 { 2563 2564 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]); 2565 return ROFF_IGN; 2566 } 2567 2568 static int 2569 roff_unsupp(ROFF_ARGS) 2570 { 2571 2572 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]); 2573 return ROFF_IGN; 2574 } 2575 2576 static int 2577 roff_cond(ROFF_ARGS) 2578 { 2579 int irc; 2580 2581 roffnode_push(r, tok, NULL, ln, ppos); 2582 2583 /* 2584 * An `.el' has no conditional body: it will consume the value 2585 * of the current rstack entry set in prior `ie' calls or 2586 * defaults to DENY. 2587 * 2588 * If we're not an `el', however, then evaluate the conditional. 2589 */ 2590 2591 r->last->rule = tok == ROFF_el ? 2592 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2593 roff_evalcond(r, ln, buf->buf, &pos); 2594 2595 /* 2596 * An if-else will put the NEGATION of the current evaluated 2597 * conditional into the stack of rules. 2598 */ 2599 2600 if (tok == ROFF_ie) { 2601 if (r->rstackpos + 1 == r->rstacksz) { 2602 r->rstacksz += 16; 2603 r->rstack = mandoc_reallocarray(r->rstack, 2604 r->rstacksz, sizeof(int)); 2605 } 2606 r->rstack[++r->rstackpos] = !r->last->rule; 2607 } 2608 2609 /* If the parent has false as its rule, then so do we. */ 2610 2611 if (r->last->parent && !r->last->parent->rule) 2612 r->last->rule = 0; 2613 2614 /* 2615 * Determine scope. 2616 * If there is nothing on the line after the conditional, 2617 * not even whitespace, use next-line scope. 2618 * Except that .while does not support next-line scope. 2619 */ 2620 2621 if (buf->buf[pos] == '\0' && tok != ROFF_while) { 2622 r->last->endspan = 2; 2623 goto out; 2624 } 2625 2626 while (buf->buf[pos] == ' ') 2627 pos++; 2628 2629 /* An opening brace requests multiline scope. */ 2630 2631 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2632 r->last->endspan = -1; 2633 pos += 2; 2634 while (buf->buf[pos] == ' ') 2635 pos++; 2636 goto out; 2637 } 2638 2639 /* 2640 * Anything else following the conditional causes 2641 * single-line scope. Warn if the scope contains 2642 * nothing but trailing whitespace. 2643 */ 2644 2645 if (buf->buf[pos] == '\0') 2646 mandoc_msg(MANDOCERR_COND_EMPTY, 2647 ln, ppos, "%s", roff_name[tok]); 2648 2649 r->last->endspan = 1; 2650 2651 out: 2652 *offs = pos; 2653 irc = ROFF_RERUN; 2654 if (tok == ROFF_while) 2655 irc |= ROFF_WHILE; 2656 return irc; 2657 } 2658 2659 static int 2660 roff_ds(ROFF_ARGS) 2661 { 2662 char *string; 2663 const char *name; 2664 size_t namesz; 2665 2666 /* Ignore groff compatibility mode for now. */ 2667 2668 if (tok == ROFF_ds1) 2669 tok = ROFF_ds; 2670 else if (tok == ROFF_as1) 2671 tok = ROFF_as; 2672 2673 /* 2674 * The first word is the name of the string. 2675 * If it is empty or terminated by an escape sequence, 2676 * abort the `ds' request without defining anything. 2677 */ 2678 2679 name = string = buf->buf + pos; 2680 if (*name == '\0') 2681 return ROFF_IGN; 2682 2683 namesz = roff_getname(r, &string, ln, pos); 2684 if (name[namesz] == '\\') 2685 return ROFF_IGN; 2686 2687 /* Read past the initial double-quote, if any. */ 2688 if (*string == '"') 2689 string++; 2690 2691 /* The rest is the value. */ 2692 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2693 ROFF_as == tok); 2694 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2695 return ROFF_IGN; 2696 } 2697 2698 /* 2699 * Parse a single operator, one or two characters long. 2700 * If the operator is recognized, return success and advance the 2701 * parse point, else return failure and let the parse point unchanged. 2702 */ 2703 static int 2704 roff_getop(const char *v, int *pos, char *res) 2705 { 2706 2707 *res = v[*pos]; 2708 2709 switch (*res) { 2710 case '+': 2711 case '-': 2712 case '*': 2713 case '/': 2714 case '%': 2715 case '&': 2716 case ':': 2717 break; 2718 case '<': 2719 switch (v[*pos + 1]) { 2720 case '=': 2721 *res = 'l'; 2722 (*pos)++; 2723 break; 2724 case '>': 2725 *res = '!'; 2726 (*pos)++; 2727 break; 2728 case '?': 2729 *res = 'i'; 2730 (*pos)++; 2731 break; 2732 default: 2733 break; 2734 } 2735 break; 2736 case '>': 2737 switch (v[*pos + 1]) { 2738 case '=': 2739 *res = 'g'; 2740 (*pos)++; 2741 break; 2742 case '?': 2743 *res = 'a'; 2744 (*pos)++; 2745 break; 2746 default: 2747 break; 2748 } 2749 break; 2750 case '=': 2751 if ('=' == v[*pos + 1]) 2752 (*pos)++; 2753 break; 2754 default: 2755 return 0; 2756 } 2757 (*pos)++; 2758 2759 return *res; 2760 } 2761 2762 /* 2763 * Evaluate either a parenthesized numeric expression 2764 * or a single signed integer number. 2765 */ 2766 static int 2767 roff_evalpar(struct roff *r, int ln, 2768 const char *v, int *pos, int *res, int flags) 2769 { 2770 2771 if ('(' != v[*pos]) 2772 return roff_getnum(v, pos, res, flags); 2773 2774 (*pos)++; 2775 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2776 return 0; 2777 2778 /* 2779 * Omission of the closing parenthesis 2780 * is an error in validation mode, 2781 * but ignored in evaluation mode. 2782 */ 2783 2784 if (')' == v[*pos]) 2785 (*pos)++; 2786 else if (NULL == res) 2787 return 0; 2788 2789 return 1; 2790 } 2791 2792 /* 2793 * Evaluate a complete numeric expression. 2794 * Proceed left to right, there is no concept of precedence. 2795 */ 2796 static int 2797 roff_evalnum(struct roff *r, int ln, const char *v, 2798 int *pos, int *res, int flags) 2799 { 2800 int mypos, operand2; 2801 char operator; 2802 2803 if (NULL == pos) { 2804 mypos = 0; 2805 pos = &mypos; 2806 } 2807 2808 if (flags & ROFFNUM_WHITE) 2809 while (isspace((unsigned char)v[*pos])) 2810 (*pos)++; 2811 2812 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2813 return 0; 2814 2815 while (1) { 2816 if (flags & ROFFNUM_WHITE) 2817 while (isspace((unsigned char)v[*pos])) 2818 (*pos)++; 2819 2820 if ( ! roff_getop(v, pos, &operator)) 2821 break; 2822 2823 if (flags & ROFFNUM_WHITE) 2824 while (isspace((unsigned char)v[*pos])) 2825 (*pos)++; 2826 2827 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2828 return 0; 2829 2830 if (flags & ROFFNUM_WHITE) 2831 while (isspace((unsigned char)v[*pos])) 2832 (*pos)++; 2833 2834 if (NULL == res) 2835 continue; 2836 2837 switch (operator) { 2838 case '+': 2839 *res += operand2; 2840 break; 2841 case '-': 2842 *res -= operand2; 2843 break; 2844 case '*': 2845 *res *= operand2; 2846 break; 2847 case '/': 2848 if (operand2 == 0) { 2849 mandoc_msg(MANDOCERR_DIVZERO, 2850 ln, *pos, "%s", v); 2851 *res = 0; 2852 break; 2853 } 2854 *res /= operand2; 2855 break; 2856 case '%': 2857 if (operand2 == 0) { 2858 mandoc_msg(MANDOCERR_DIVZERO, 2859 ln, *pos, "%s", v); 2860 *res = 0; 2861 break; 2862 } 2863 *res %= operand2; 2864 break; 2865 case '<': 2866 *res = *res < operand2; 2867 break; 2868 case '>': 2869 *res = *res > operand2; 2870 break; 2871 case 'l': 2872 *res = *res <= operand2; 2873 break; 2874 case 'g': 2875 *res = *res >= operand2; 2876 break; 2877 case '=': 2878 *res = *res == operand2; 2879 break; 2880 case '!': 2881 *res = *res != operand2; 2882 break; 2883 case '&': 2884 *res = *res && operand2; 2885 break; 2886 case ':': 2887 *res = *res || operand2; 2888 break; 2889 case 'i': 2890 if (operand2 < *res) 2891 *res = operand2; 2892 break; 2893 case 'a': 2894 if (operand2 > *res) 2895 *res = operand2; 2896 break; 2897 default: 2898 abort(); 2899 } 2900 } 2901 return 1; 2902 } 2903 2904 /* --- register management ------------------------------------------------ */ 2905 2906 void 2907 roff_setreg(struct roff *r, const char *name, int val, char sign) 2908 { 2909 roff_setregn(r, name, strlen(name), val, sign, INT_MIN); 2910 } 2911 2912 static void 2913 roff_setregn(struct roff *r, const char *name, size_t len, 2914 int val, char sign, int step) 2915 { 2916 struct roffreg *reg; 2917 2918 /* Search for an existing register with the same name. */ 2919 reg = r->regtab; 2920 2921 while (reg != NULL && (reg->key.sz != len || 2922 strncmp(reg->key.p, name, len) != 0)) 2923 reg = reg->next; 2924 2925 if (NULL == reg) { 2926 /* Create a new register. */ 2927 reg = mandoc_malloc(sizeof(struct roffreg)); 2928 reg->key.p = mandoc_strndup(name, len); 2929 reg->key.sz = len; 2930 reg->val = 0; 2931 reg->step = 0; 2932 reg->next = r->regtab; 2933 r->regtab = reg; 2934 } 2935 2936 if ('+' == sign) 2937 reg->val += val; 2938 else if ('-' == sign) 2939 reg->val -= val; 2940 else 2941 reg->val = val; 2942 if (step != INT_MIN) 2943 reg->step = step; 2944 } 2945 2946 /* 2947 * Handle some predefined read-only number registers. 2948 * For now, return -1 if the requested register is not predefined; 2949 * in case a predefined read-only register having the value -1 2950 * were to turn up, another special value would have to be chosen. 2951 */ 2952 static int 2953 roff_getregro(const struct roff *r, const char *name) 2954 { 2955 2956 switch (*name) { 2957 case '$': /* Number of arguments of the last macro evaluated. */ 2958 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; 2959 case 'A': /* ASCII approximation mode is always off. */ 2960 return 0; 2961 case 'g': /* Groff compatibility mode is always on. */ 2962 return 1; 2963 case 'H': /* Fixed horizontal resolution. */ 2964 return 24; 2965 case 'j': /* Always adjust left margin only. */ 2966 return 0; 2967 case 'T': /* Some output device is always defined. */ 2968 return 1; 2969 case 'V': /* Fixed vertical resolution. */ 2970 return 40; 2971 default: 2972 return -1; 2973 } 2974 } 2975 2976 int 2977 roff_getreg(struct roff *r, const char *name) 2978 { 2979 return roff_getregn(r, name, strlen(name), '\0'); 2980 } 2981 2982 static int 2983 roff_getregn(struct roff *r, const char *name, size_t len, char sign) 2984 { 2985 struct roffreg *reg; 2986 int val; 2987 2988 if ('.' == name[0] && 2 == len) { 2989 val = roff_getregro(r, name + 1); 2990 if (-1 != val) 2991 return val; 2992 } 2993 2994 for (reg = r->regtab; reg; reg = reg->next) { 2995 if (len == reg->key.sz && 2996 0 == strncmp(name, reg->key.p, len)) { 2997 switch (sign) { 2998 case '+': 2999 reg->val += reg->step; 3000 break; 3001 case '-': 3002 reg->val -= reg->step; 3003 break; 3004 default: 3005 break; 3006 } 3007 return reg->val; 3008 } 3009 } 3010 3011 roff_setregn(r, name, len, 0, '\0', INT_MIN); 3012 return 0; 3013 } 3014 3015 static int 3016 roff_hasregn(const struct roff *r, const char *name, size_t len) 3017 { 3018 struct roffreg *reg; 3019 int val; 3020 3021 if ('.' == name[0] && 2 == len) { 3022 val = roff_getregro(r, name + 1); 3023 if (-1 != val) 3024 return 1; 3025 } 3026 3027 for (reg = r->regtab; reg; reg = reg->next) 3028 if (len == reg->key.sz && 3029 0 == strncmp(name, reg->key.p, len)) 3030 return 1; 3031 3032 return 0; 3033 } 3034 3035 static void 3036 roff_freereg(struct roffreg *reg) 3037 { 3038 struct roffreg *old_reg; 3039 3040 while (NULL != reg) { 3041 free(reg->key.p); 3042 old_reg = reg; 3043 reg = reg->next; 3044 free(old_reg); 3045 } 3046 } 3047 3048 static int 3049 roff_nr(ROFF_ARGS) 3050 { 3051 char *key, *val, *step; 3052 size_t keysz; 3053 int iv, is, len; 3054 char sign; 3055 3056 key = val = buf->buf + pos; 3057 if (*key == '\0') 3058 return ROFF_IGN; 3059 3060 keysz = roff_getname(r, &val, ln, pos); 3061 if (key[keysz] == '\\') 3062 return ROFF_IGN; 3063 3064 sign = *val; 3065 if (sign == '+' || sign == '-') 3066 val++; 3067 3068 len = 0; 3069 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) 3070 return ROFF_IGN; 3071 3072 step = val + len; 3073 while (isspace((unsigned char)*step)) 3074 step++; 3075 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) 3076 is = INT_MIN; 3077 3078 roff_setregn(r, key, keysz, iv, sign, is); 3079 return ROFF_IGN; 3080 } 3081 3082 static int 3083 roff_rr(ROFF_ARGS) 3084 { 3085 struct roffreg *reg, **prev; 3086 char *name, *cp; 3087 size_t namesz; 3088 3089 name = cp = buf->buf + pos; 3090 if (*name == '\0') 3091 return ROFF_IGN; 3092 namesz = roff_getname(r, &cp, ln, pos); 3093 name[namesz] = '\0'; 3094 3095 prev = &r->regtab; 3096 while (1) { 3097 reg = *prev; 3098 if (reg == NULL || !strcmp(name, reg->key.p)) 3099 break; 3100 prev = ®->next; 3101 } 3102 if (reg != NULL) { 3103 *prev = reg->next; 3104 free(reg->key.p); 3105 free(reg); 3106 } 3107 return ROFF_IGN; 3108 } 3109 3110 /* --- handler functions for roff requests -------------------------------- */ 3111 3112 static int 3113 roff_rm(ROFF_ARGS) 3114 { 3115 const char *name; 3116 char *cp; 3117 size_t namesz; 3118 3119 cp = buf->buf + pos; 3120 while (*cp != '\0') { 3121 name = cp; 3122 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 3123 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 3124 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3125 if (name[namesz] == '\\') 3126 break; 3127 } 3128 return ROFF_IGN; 3129 } 3130 3131 static int 3132 roff_it(ROFF_ARGS) 3133 { 3134 int iv; 3135 3136 /* Parse the number of lines. */ 3137 3138 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 3139 mandoc_msg(MANDOCERR_IT_NONUM, 3140 ln, ppos, "%s", buf->buf + 1); 3141 return ROFF_IGN; 3142 } 3143 3144 while (isspace((unsigned char)buf->buf[pos])) 3145 pos++; 3146 3147 /* 3148 * Arm the input line trap. 3149 * Special-casing "an-trap" is an ugly workaround to cope 3150 * with DocBook stupidly fiddling with man(7) internals. 3151 */ 3152 3153 roffit_lines = iv; 3154 roffit_macro = mandoc_strdup(iv != 1 || 3155 strcmp(buf->buf + pos, "an-trap") ? 3156 buf->buf + pos : "br"); 3157 return ROFF_IGN; 3158 } 3159 3160 static int 3161 roff_Dd(ROFF_ARGS) 3162 { 3163 int mask; 3164 enum roff_tok t, te; 3165 3166 switch (tok) { 3167 case ROFF_Dd: 3168 tok = MDOC_Dd; 3169 te = MDOC_MAX; 3170 if (r->format == 0) 3171 r->format = MPARSE_MDOC; 3172 mask = MPARSE_MDOC | MPARSE_QUICK; 3173 break; 3174 case ROFF_TH: 3175 tok = MAN_TH; 3176 te = MAN_MAX; 3177 if (r->format == 0) 3178 r->format = MPARSE_MAN; 3179 mask = MPARSE_QUICK; 3180 break; 3181 default: 3182 abort(); 3183 } 3184 if ((r->options & mask) == 0) 3185 for (t = tok; t < te; t++) 3186 roff_setstr(r, roff_name[t], NULL, 0); 3187 return ROFF_CONT; 3188 } 3189 3190 static int 3191 roff_TE(ROFF_ARGS) 3192 { 3193 r->man->flags &= ~ROFF_NONOFILL; 3194 if (r->tbl == NULL) { 3195 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE"); 3196 return ROFF_IGN; 3197 } 3198 if (tbl_end(r->tbl, 0) == 0) { 3199 r->tbl = NULL; 3200 free(buf->buf); 3201 buf->buf = mandoc_strdup(".sp"); 3202 buf->sz = 4; 3203 *offs = 0; 3204 return ROFF_REPARSE; 3205 } 3206 r->tbl = NULL; 3207 return ROFF_IGN; 3208 } 3209 3210 static int 3211 roff_T_(ROFF_ARGS) 3212 { 3213 3214 if (NULL == r->tbl) 3215 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&"); 3216 else 3217 tbl_restart(ln, ppos, r->tbl); 3218 3219 return ROFF_IGN; 3220 } 3221 3222 /* 3223 * Handle in-line equation delimiters. 3224 */ 3225 static int 3226 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 3227 { 3228 char *cp1, *cp2; 3229 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 3230 3231 /* 3232 * Outside equations, look for an opening delimiter. 3233 * If we are inside an equation, we already know it is 3234 * in-line, or this function wouldn't have been called; 3235 * so look for a closing delimiter. 3236 */ 3237 3238 cp1 = buf->buf + pos; 3239 cp2 = strchr(cp1, r->eqn == NULL ? 3240 r->last_eqn->odelim : r->last_eqn->cdelim); 3241 if (cp2 == NULL) 3242 return ROFF_CONT; 3243 3244 *cp2++ = '\0'; 3245 bef_pr = bef_nl = aft_nl = aft_pr = ""; 3246 3247 /* Handle preceding text, protecting whitespace. */ 3248 3249 if (*buf->buf != '\0') { 3250 if (r->eqn == NULL) 3251 bef_pr = "\\&"; 3252 bef_nl = "\n"; 3253 } 3254 3255 /* 3256 * Prepare replacing the delimiter with an equation macro 3257 * and drop leading white space from the equation. 3258 */ 3259 3260 if (r->eqn == NULL) { 3261 while (*cp2 == ' ') 3262 cp2++; 3263 mac = ".EQ"; 3264 } else 3265 mac = ".EN"; 3266 3267 /* Handle following text, protecting whitespace. */ 3268 3269 if (*cp2 != '\0') { 3270 aft_nl = "\n"; 3271 if (r->eqn != NULL) 3272 aft_pr = "\\&"; 3273 } 3274 3275 /* Do the actual replacement. */ 3276 3277 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 3278 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 3279 free(buf->buf); 3280 buf->buf = cp1; 3281 3282 /* Toggle the in-line state of the eqn subsystem. */ 3283 3284 r->eqn_inline = r->eqn == NULL; 3285 return ROFF_REPARSE; 3286 } 3287 3288 static int 3289 roff_EQ(ROFF_ARGS) 3290 { 3291 struct roff_node *n; 3292 3293 if (r->man->meta.macroset == MACROSET_MAN) 3294 man_breakscope(r->man, ROFF_EQ); 3295 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); 3296 if (ln > r->man->last->line) 3297 n->flags |= NODE_LINE; 3298 n->eqn = eqn_box_new(); 3299 roff_node_append(r->man, n); 3300 r->man->next = ROFF_NEXT_SIBLING; 3301 3302 assert(r->eqn == NULL); 3303 if (r->last_eqn == NULL) 3304 r->last_eqn = eqn_alloc(); 3305 else 3306 eqn_reset(r->last_eqn); 3307 r->eqn = r->last_eqn; 3308 r->eqn->node = n; 3309 3310 if (buf->buf[pos] != '\0') 3311 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3312 ".EQ %s", buf->buf + pos); 3313 3314 return ROFF_IGN; 3315 } 3316 3317 static int 3318 roff_EN(ROFF_ARGS) 3319 { 3320 if (r->eqn != NULL) { 3321 eqn_parse(r->eqn); 3322 r->eqn = NULL; 3323 } else 3324 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN"); 3325 if (buf->buf[pos] != '\0') 3326 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3327 "EN %s", buf->buf + pos); 3328 return ROFF_IGN; 3329 } 3330 3331 static int 3332 roff_TS(ROFF_ARGS) 3333 { 3334 if (r->tbl != NULL) { 3335 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); 3336 tbl_end(r->tbl, 0); 3337 } 3338 r->man->flags |= ROFF_NONOFILL; 3339 r->tbl = tbl_alloc(ppos, ln, r->last_tbl); 3340 if (r->last_tbl == NULL) 3341 r->first_tbl = r->tbl; 3342 r->last_tbl = r->tbl; 3343 return ROFF_IGN; 3344 } 3345 3346 static int 3347 roff_noarg(ROFF_ARGS) 3348 { 3349 if (r->man->flags & (MAN_BLINE | MAN_ELINE)) 3350 man_breakscope(r->man, tok); 3351 if (tok == ROFF_brp) 3352 tok = ROFF_br; 3353 roff_elem_alloc(r->man, ln, ppos, tok); 3354 if (buf->buf[pos] != '\0') 3355 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3356 "%s %s", roff_name[tok], buf->buf + pos); 3357 if (tok == ROFF_nf) 3358 r->man->flags |= ROFF_NOFILL; 3359 else if (tok == ROFF_fi) 3360 r->man->flags &= ~ROFF_NOFILL; 3361 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3362 r->man->next = ROFF_NEXT_SIBLING; 3363 return ROFF_IGN; 3364 } 3365 3366 static int 3367 roff_onearg(ROFF_ARGS) 3368 { 3369 struct roff_node *n; 3370 char *cp; 3371 int npos; 3372 3373 if (r->man->flags & (MAN_BLINE | MAN_ELINE) && 3374 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || 3375 tok == ROFF_ti)) 3376 man_breakscope(r->man, tok); 3377 3378 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { 3379 r->man->last = roffce_node; 3380 r->man->next = ROFF_NEXT_SIBLING; 3381 } 3382 3383 roff_elem_alloc(r->man, ln, ppos, tok); 3384 n = r->man->last; 3385 3386 cp = buf->buf + pos; 3387 if (*cp != '\0') { 3388 while (*cp != '\0' && *cp != ' ') 3389 cp++; 3390 while (*cp == ' ') 3391 *cp++ = '\0'; 3392 if (*cp != '\0') 3393 mandoc_msg(MANDOCERR_ARG_EXCESS, 3394 ln, (int)(cp - buf->buf), 3395 "%s ... %s", roff_name[tok], cp); 3396 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3397 } 3398 3399 if (tok == ROFF_ce || tok == ROFF_rj) { 3400 if (r->man->last->type == ROFFT_ELEM) { 3401 roff_word_alloc(r->man, ln, pos, "1"); 3402 r->man->last->flags |= NODE_NOSRC; 3403 } 3404 npos = 0; 3405 if (roff_evalnum(r, ln, r->man->last->string, &npos, 3406 &roffce_lines, 0) == 0) { 3407 mandoc_msg(MANDOCERR_CE_NONUM, 3408 ln, pos, "ce %s", buf->buf + pos); 3409 roffce_lines = 1; 3410 } 3411 if (roffce_lines < 1) { 3412 r->man->last = r->man->last->parent; 3413 roffce_node = NULL; 3414 roffce_lines = 0; 3415 } else 3416 roffce_node = r->man->last->parent; 3417 } else { 3418 n->flags |= NODE_VALID | NODE_ENDED; 3419 r->man->last = n; 3420 } 3421 n->flags |= NODE_LINE; 3422 r->man->next = ROFF_NEXT_SIBLING; 3423 return ROFF_IGN; 3424 } 3425 3426 static int 3427 roff_manyarg(ROFF_ARGS) 3428 { 3429 struct roff_node *n; 3430 char *sp, *ep; 3431 3432 roff_elem_alloc(r->man, ln, ppos, tok); 3433 n = r->man->last; 3434 3435 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { 3436 while (*ep != '\0' && *ep != ' ') 3437 ep++; 3438 while (*ep == ' ') 3439 *ep++ = '\0'; 3440 roff_word_alloc(r->man, ln, sp - buf->buf, sp); 3441 } 3442 3443 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3444 r->man->last = n; 3445 r->man->next = ROFF_NEXT_SIBLING; 3446 return ROFF_IGN; 3447 } 3448 3449 static int 3450 roff_als(ROFF_ARGS) 3451 { 3452 char *oldn, *newn, *end, *value; 3453 size_t oldsz, newsz, valsz; 3454 3455 newn = oldn = buf->buf + pos; 3456 if (*newn == '\0') 3457 return ROFF_IGN; 3458 3459 newsz = roff_getname(r, &oldn, ln, pos); 3460 if (newn[newsz] == '\\' || *oldn == '\0') 3461 return ROFF_IGN; 3462 3463 end = oldn; 3464 oldsz = roff_getname(r, &end, ln, oldn - buf->buf); 3465 if (oldsz == 0) 3466 return ROFF_IGN; 3467 3468 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", 3469 (int)oldsz, oldn); 3470 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); 3471 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3472 free(value); 3473 return ROFF_IGN; 3474 } 3475 3476 static int 3477 roff_cc(ROFF_ARGS) 3478 { 3479 const char *p; 3480 3481 p = buf->buf + pos; 3482 3483 if (*p == '\0' || (r->control = *p++) == '.') 3484 r->control = '\0'; 3485 3486 if (*p != '\0') 3487 mandoc_msg(MANDOCERR_ARG_EXCESS, 3488 ln, p - buf->buf, "cc ... %s", p); 3489 3490 return ROFF_IGN; 3491 } 3492 3493 static int 3494 roff_char(ROFF_ARGS) 3495 { 3496 const char *p, *kp, *vp; 3497 size_t ksz, vsz; 3498 int font; 3499 3500 /* Parse the character to be replaced. */ 3501 3502 kp = buf->buf + pos; 3503 p = kp + 1; 3504 if (*kp == '\0' || (*kp == '\\' && 3505 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) || 3506 (*p != ' ' && *p != '\0')) { 3507 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp); 3508 return ROFF_IGN; 3509 } 3510 ksz = p - kp; 3511 while (*p == ' ') 3512 p++; 3513 3514 /* 3515 * If the replacement string contains a font escape sequence, 3516 * we have to restore the font at the end. 3517 */ 3518 3519 vp = p; 3520 vsz = strlen(p); 3521 font = 0; 3522 while (*p != '\0') { 3523 if (*p++ != '\\') 3524 continue; 3525 switch (mandoc_escape(&p, NULL, NULL)) { 3526 case ESCAPE_FONT: 3527 case ESCAPE_FONTROMAN: 3528 case ESCAPE_FONTITALIC: 3529 case ESCAPE_FONTBOLD: 3530 case ESCAPE_FONTBI: 3531 case ESCAPE_FONTCW: 3532 case ESCAPE_FONTPREV: 3533 font++; 3534 break; 3535 default: 3536 break; 3537 } 3538 } 3539 if (font > 1) 3540 mandoc_msg(MANDOCERR_CHAR_FONT, 3541 ln, (int)(vp - buf->buf), "%s", vp); 3542 3543 /* 3544 * Approximate the effect of .char using the .tr tables. 3545 * XXX In groff, .char and .tr interact differently. 3546 */ 3547 3548 if (ksz == 1) { 3549 if (r->xtab == NULL) 3550 r->xtab = mandoc_calloc(128, sizeof(*r->xtab)); 3551 assert((unsigned int)*kp < 128); 3552 free(r->xtab[(int)*kp].p); 3553 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p, 3554 "%s%s", vp, font ? "\fP" : ""); 3555 } else { 3556 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0); 3557 if (font) 3558 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1); 3559 } 3560 return ROFF_IGN; 3561 } 3562 3563 static int 3564 roff_ec(ROFF_ARGS) 3565 { 3566 const char *p; 3567 3568 p = buf->buf + pos; 3569 if (*p == '\0') 3570 r->escape = '\\'; 3571 else { 3572 r->escape = *p; 3573 if (*++p != '\0') 3574 mandoc_msg(MANDOCERR_ARG_EXCESS, ln, 3575 (int)(p - buf->buf), "ec ... %s", p); 3576 } 3577 return ROFF_IGN; 3578 } 3579 3580 static int 3581 roff_eo(ROFF_ARGS) 3582 { 3583 r->escape = '\0'; 3584 if (buf->buf[pos] != '\0') 3585 mandoc_msg(MANDOCERR_ARG_SKIP, 3586 ln, pos, "eo %s", buf->buf + pos); 3587 return ROFF_IGN; 3588 } 3589 3590 static int 3591 roff_nop(ROFF_ARGS) 3592 { 3593 while (buf->buf[pos] == ' ') 3594 pos++; 3595 *offs = pos; 3596 return ROFF_RERUN; 3597 } 3598 3599 static int 3600 roff_tr(ROFF_ARGS) 3601 { 3602 const char *p, *first, *second; 3603 size_t fsz, ssz; 3604 enum mandoc_esc esc; 3605 3606 p = buf->buf + pos; 3607 3608 if (*p == '\0') { 3609 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr"); 3610 return ROFF_IGN; 3611 } 3612 3613 while (*p != '\0') { 3614 fsz = ssz = 1; 3615 3616 first = p++; 3617 if (*first == '\\') { 3618 esc = mandoc_escape(&p, NULL, NULL); 3619 if (esc == ESCAPE_ERROR) { 3620 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3621 (int)(p - buf->buf), "%s", first); 3622 return ROFF_IGN; 3623 } 3624 fsz = (size_t)(p - first); 3625 } 3626 3627 second = p++; 3628 if (*second == '\\') { 3629 esc = mandoc_escape(&p, NULL, NULL); 3630 if (esc == ESCAPE_ERROR) { 3631 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3632 (int)(p - buf->buf), "%s", second); 3633 return ROFF_IGN; 3634 } 3635 ssz = (size_t)(p - second); 3636 } else if (*second == '\0') { 3637 mandoc_msg(MANDOCERR_TR_ODD, ln, 3638 (int)(first - buf->buf), "tr %s", first); 3639 second = " "; 3640 p--; 3641 } 3642 3643 if (fsz > 1) { 3644 roff_setstrn(&r->xmbtab, first, fsz, 3645 second, ssz, 0); 3646 continue; 3647 } 3648 3649 if (r->xtab == NULL) 3650 r->xtab = mandoc_calloc(128, 3651 sizeof(struct roffstr)); 3652 3653 free(r->xtab[(int)*first].p); 3654 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 3655 r->xtab[(int)*first].sz = ssz; 3656 } 3657 3658 return ROFF_IGN; 3659 } 3660 3661 /* 3662 * Implementation of the .return request. 3663 * There is no need to call roff_userret() from here. 3664 * The read module will call that after rewinding the reader stack 3665 * to the place from where the current macro was called. 3666 */ 3667 static int 3668 roff_return(ROFF_ARGS) 3669 { 3670 if (r->mstackpos >= 0) 3671 return ROFF_IGN | ROFF_USERRET; 3672 3673 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return"); 3674 return ROFF_IGN; 3675 } 3676 3677 static int 3678 roff_rn(ROFF_ARGS) 3679 { 3680 const char *value; 3681 char *oldn, *newn, *end; 3682 size_t oldsz, newsz; 3683 int deftype; 3684 3685 oldn = newn = buf->buf + pos; 3686 if (*oldn == '\0') 3687 return ROFF_IGN; 3688 3689 oldsz = roff_getname(r, &newn, ln, pos); 3690 if (oldn[oldsz] == '\\' || *newn == '\0') 3691 return ROFF_IGN; 3692 3693 end = newn; 3694 newsz = roff_getname(r, &end, ln, newn - buf->buf); 3695 if (newsz == 0) 3696 return ROFF_IGN; 3697 3698 deftype = ROFFDEF_ANY; 3699 value = roff_getstrn(r, oldn, oldsz, &deftype); 3700 switch (deftype) { 3701 case ROFFDEF_USER: 3702 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3703 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); 3704 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3705 break; 3706 case ROFFDEF_PRE: 3707 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3708 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3709 break; 3710 case ROFFDEF_REN: 3711 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); 3712 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); 3713 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3714 break; 3715 case ROFFDEF_STD: 3716 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); 3717 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3718 break; 3719 default: 3720 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3721 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3722 break; 3723 } 3724 return ROFF_IGN; 3725 } 3726 3727 static int 3728 roff_shift(ROFF_ARGS) 3729 { 3730 struct mctx *ctx; 3731 int levels, i; 3732 3733 levels = 1; 3734 if (buf->buf[pos] != '\0' && 3735 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { 3736 mandoc_msg(MANDOCERR_CE_NONUM, 3737 ln, pos, "shift %s", buf->buf + pos); 3738 levels = 1; 3739 } 3740 if (r->mstackpos < 0) { 3741 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift"); 3742 return ROFF_IGN; 3743 } 3744 ctx = r->mstack + r->mstackpos; 3745 if (levels > ctx->argc) { 3746 mandoc_msg(MANDOCERR_SHIFT, 3747 ln, pos, "%d, but max is %d", levels, ctx->argc); 3748 levels = ctx->argc; 3749 } 3750 if (levels == 0) 3751 return ROFF_IGN; 3752 for (i = 0; i < levels; i++) 3753 free(ctx->argv[i]); 3754 ctx->argc -= levels; 3755 for (i = 0; i < ctx->argc; i++) 3756 ctx->argv[i] = ctx->argv[i + levels]; 3757 return ROFF_IGN; 3758 } 3759 3760 static int 3761 roff_so(ROFF_ARGS) 3762 { 3763 char *name, *cp; 3764 3765 name = buf->buf + pos; 3766 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name); 3767 3768 /* 3769 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3770 * opening anything that's not in our cwd or anything beneath 3771 * it. Thus, explicitly disallow traversing up the file-system 3772 * or using absolute paths. 3773 */ 3774 3775 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3776 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name); 3777 buf->sz = mandoc_asprintf(&cp, 3778 ".sp\nSee the file %s.\n.sp", name) + 1; 3779 free(buf->buf); 3780 buf->buf = cp; 3781 *offs = 0; 3782 return ROFF_REPARSE; 3783 } 3784 3785 *offs = pos; 3786 return ROFF_SO; 3787 } 3788 3789 /* --- user defined strings and macros ------------------------------------ */ 3790 3791 static int 3792 roff_userdef(ROFF_ARGS) 3793 { 3794 struct mctx *ctx; 3795 char *arg, *ap, *dst, *src; 3796 size_t sz; 3797 3798 /* Initialize a new macro stack context. */ 3799 3800 if (++r->mstackpos == r->mstacksz) { 3801 r->mstack = mandoc_recallocarray(r->mstack, 3802 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); 3803 r->mstacksz += 8; 3804 } 3805 ctx = r->mstack + r->mstackpos; 3806 ctx->argsz = 0; 3807 ctx->argc = 0; 3808 ctx->argv = NULL; 3809 3810 /* 3811 * Collect pointers to macro argument strings, 3812 * NUL-terminating them and escaping quotes. 3813 */ 3814 3815 src = buf->buf + pos; 3816 while (*src != '\0') { 3817 if (ctx->argc == ctx->argsz) { 3818 ctx->argsz += 8; 3819 ctx->argv = mandoc_reallocarray(ctx->argv, 3820 ctx->argsz, sizeof(*ctx->argv)); 3821 } 3822 arg = roff_getarg(r, &src, ln, &pos); 3823 sz = 1; /* For the terminating NUL. */ 3824 for (ap = arg; *ap != '\0'; ap++) 3825 sz += *ap == '"' ? 4 : 1; 3826 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); 3827 for (ap = arg; *ap != '\0'; ap++) { 3828 if (*ap == '"') { 3829 memcpy(dst, "\\(dq", 4); 3830 dst += 4; 3831 } else 3832 *dst++ = *ap; 3833 } 3834 *dst = '\0'; 3835 free(arg); 3836 } 3837 3838 /* Replace the macro invocation by the macro definition. */ 3839 3840 free(buf->buf); 3841 buf->buf = mandoc_strdup(r->current_string); 3842 buf->sz = strlen(buf->buf) + 1; 3843 *offs = 0; 3844 3845 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ? 3846 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND; 3847 } 3848 3849 /* 3850 * Calling a high-level macro that was renamed with .rn. 3851 * r->current_string has already been set up by roff_parse(). 3852 */ 3853 static int 3854 roff_renamed(ROFF_ARGS) 3855 { 3856 char *nbuf; 3857 3858 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, 3859 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; 3860 free(buf->buf); 3861 buf->buf = nbuf; 3862 *offs = 0; 3863 return ROFF_CONT; 3864 } 3865 3866 static size_t 3867 roff_getname(struct roff *r, char **cpp, int ln, int pos) 3868 { 3869 char *name, *cp; 3870 size_t namesz; 3871 3872 name = *cpp; 3873 if ('\0' == *name) 3874 return 0; 3875 3876 /* Read until end of name and terminate it with NUL. */ 3877 for (cp = name; 1; cp++) { 3878 if ('\0' == *cp || ' ' == *cp) { 3879 namesz = cp - name; 3880 break; 3881 } 3882 if ('\\' != *cp) 3883 continue; 3884 namesz = cp - name; 3885 if ('{' == cp[1] || '}' == cp[1]) 3886 break; 3887 cp++; 3888 if ('\\' == *cp) 3889 continue; 3890 mandoc_msg(MANDOCERR_NAMESC, ln, pos, 3891 "%.*s", (int)(cp - name + 1), name); 3892 mandoc_escape((const char **)&cp, NULL, NULL); 3893 break; 3894 } 3895 3896 /* Read past spaces. */ 3897 while (' ' == *cp) 3898 cp++; 3899 3900 *cpp = cp; 3901 return namesz; 3902 } 3903 3904 /* 3905 * Store *string into the user-defined string called *name. 3906 * To clear an existing entry, call with (*r, *name, NULL, 0). 3907 * append == 0: replace mode 3908 * append == 1: single-line append mode 3909 * append == 2: multiline append mode, append '\n' after each call 3910 */ 3911 static void 3912 roff_setstr(struct roff *r, const char *name, const char *string, 3913 int append) 3914 { 3915 size_t namesz; 3916 3917 namesz = strlen(name); 3918 roff_setstrn(&r->strtab, name, namesz, string, 3919 string ? strlen(string) : 0, append); 3920 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3921 } 3922 3923 static void 3924 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 3925 const char *string, size_t stringsz, int append) 3926 { 3927 struct roffkv *n; 3928 char *c; 3929 int i; 3930 size_t oldch, newch; 3931 3932 /* Search for an existing string with the same name. */ 3933 n = *r; 3934 3935 while (n && (namesz != n->key.sz || 3936 strncmp(n->key.p, name, namesz))) 3937 n = n->next; 3938 3939 if (NULL == n) { 3940 /* Create a new string table entry. */ 3941 n = mandoc_malloc(sizeof(struct roffkv)); 3942 n->key.p = mandoc_strndup(name, namesz); 3943 n->key.sz = namesz; 3944 n->val.p = NULL; 3945 n->val.sz = 0; 3946 n->next = *r; 3947 *r = n; 3948 } else if (0 == append) { 3949 free(n->val.p); 3950 n->val.p = NULL; 3951 n->val.sz = 0; 3952 } 3953 3954 if (NULL == string) 3955 return; 3956 3957 /* 3958 * One additional byte for the '\n' in multiline mode, 3959 * and one for the terminating '\0'. 3960 */ 3961 newch = stringsz + (1 < append ? 2u : 1u); 3962 3963 if (NULL == n->val.p) { 3964 n->val.p = mandoc_malloc(newch); 3965 *n->val.p = '\0'; 3966 oldch = 0; 3967 } else { 3968 oldch = n->val.sz; 3969 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 3970 } 3971 3972 /* Skip existing content in the destination buffer. */ 3973 c = n->val.p + (int)oldch; 3974 3975 /* Append new content to the destination buffer. */ 3976 i = 0; 3977 while (i < (int)stringsz) { 3978 /* 3979 * Rudimentary roff copy mode: 3980 * Handle escaped backslashes. 3981 */ 3982 if ('\\' == string[i] && '\\' == string[i + 1]) 3983 i++; 3984 *c++ = string[i++]; 3985 } 3986 3987 /* Append terminating bytes. */ 3988 if (1 < append) 3989 *c++ = '\n'; 3990 3991 *c = '\0'; 3992 n->val.sz = (int)(c - n->val.p); 3993 } 3994 3995 static const char * 3996 roff_getstrn(struct roff *r, const char *name, size_t len, 3997 int *deftype) 3998 { 3999 const struct roffkv *n; 4000 int found, i; 4001 enum roff_tok tok; 4002 4003 found = 0; 4004 for (n = r->strtab; n != NULL; n = n->next) { 4005 if (strncmp(name, n->key.p, len) != 0 || 4006 n->key.p[len] != '\0' || n->val.p == NULL) 4007 continue; 4008 if (*deftype & ROFFDEF_USER) { 4009 *deftype = ROFFDEF_USER; 4010 return n->val.p; 4011 } else { 4012 found = 1; 4013 break; 4014 } 4015 } 4016 for (n = r->rentab; n != NULL; n = n->next) { 4017 if (strncmp(name, n->key.p, len) != 0 || 4018 n->key.p[len] != '\0' || n->val.p == NULL) 4019 continue; 4020 if (*deftype & ROFFDEF_REN) { 4021 *deftype = ROFFDEF_REN; 4022 return n->val.p; 4023 } else { 4024 found = 1; 4025 break; 4026 } 4027 } 4028 for (i = 0; i < PREDEFS_MAX; i++) { 4029 if (strncmp(name, predefs[i].name, len) != 0 || 4030 predefs[i].name[len] != '\0') 4031 continue; 4032 if (*deftype & ROFFDEF_PRE) { 4033 *deftype = ROFFDEF_PRE; 4034 return predefs[i].str; 4035 } else { 4036 found = 1; 4037 break; 4038 } 4039 } 4040 if (r->man->meta.macroset != MACROSET_MAN) { 4041 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { 4042 if (strncmp(name, roff_name[tok], len) != 0 || 4043 roff_name[tok][len] != '\0') 4044 continue; 4045 if (*deftype & ROFFDEF_STD) { 4046 *deftype = ROFFDEF_STD; 4047 return NULL; 4048 } else { 4049 found = 1; 4050 break; 4051 } 4052 } 4053 } 4054 if (r->man->meta.macroset != MACROSET_MDOC) { 4055 for (tok = MAN_TH; tok < MAN_MAX; tok++) { 4056 if (strncmp(name, roff_name[tok], len) != 0 || 4057 roff_name[tok][len] != '\0') 4058 continue; 4059 if (*deftype & ROFFDEF_STD) { 4060 *deftype = ROFFDEF_STD; 4061 return NULL; 4062 } else { 4063 found = 1; 4064 break; 4065 } 4066 } 4067 } 4068 4069 if (found == 0 && *deftype != ROFFDEF_ANY) { 4070 if (*deftype & ROFFDEF_REN) { 4071 /* 4072 * This might still be a request, 4073 * so do not treat it as undefined yet. 4074 */ 4075 *deftype = ROFFDEF_UNDEF; 4076 return NULL; 4077 } 4078 4079 /* Using an undefined string defines it to be empty. */ 4080 4081 roff_setstrn(&r->strtab, name, len, "", 0, 0); 4082 roff_setstrn(&r->rentab, name, len, NULL, 0, 0); 4083 } 4084 4085 *deftype = 0; 4086 return NULL; 4087 } 4088 4089 static void 4090 roff_freestr(struct roffkv *r) 4091 { 4092 struct roffkv *n, *nn; 4093 4094 for (n = r; n; n = nn) { 4095 free(n->key.p); 4096 free(n->val.p); 4097 nn = n->next; 4098 free(n); 4099 } 4100 } 4101 4102 /* --- accessors and utility functions ------------------------------------ */ 4103 4104 /* 4105 * Duplicate an input string, making the appropriate character 4106 * conversations (as stipulated by `tr') along the way. 4107 * Returns a heap-allocated string with all the replacements made. 4108 */ 4109 char * 4110 roff_strdup(const struct roff *r, const char *p) 4111 { 4112 const struct roffkv *cp; 4113 char *res; 4114 const char *pp; 4115 size_t ssz, sz; 4116 enum mandoc_esc esc; 4117 4118 if (NULL == r->xmbtab && NULL == r->xtab) 4119 return mandoc_strdup(p); 4120 else if ('\0' == *p) 4121 return mandoc_strdup(""); 4122 4123 /* 4124 * Step through each character looking for term matches 4125 * (remember that a `tr' can be invoked with an escape, which is 4126 * a glyph but the escape is multi-character). 4127 * We only do this if the character hash has been initialised 4128 * and the string is >0 length. 4129 */ 4130 4131 res = NULL; 4132 ssz = 0; 4133 4134 while ('\0' != *p) { 4135 assert((unsigned int)*p < 128); 4136 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { 4137 sz = r->xtab[(int)*p].sz; 4138 res = mandoc_realloc(res, ssz + sz + 1); 4139 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 4140 ssz += sz; 4141 p++; 4142 continue; 4143 } else if ('\\' != *p) { 4144 res = mandoc_realloc(res, ssz + 2); 4145 res[ssz++] = *p++; 4146 continue; 4147 } 4148 4149 /* Search for term matches. */ 4150 for (cp = r->xmbtab; cp; cp = cp->next) 4151 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 4152 break; 4153 4154 if (NULL != cp) { 4155 /* 4156 * A match has been found. 4157 * Append the match to the array and move 4158 * forward by its keysize. 4159 */ 4160 res = mandoc_realloc(res, 4161 ssz + cp->val.sz + 1); 4162 memcpy(res + ssz, cp->val.p, cp->val.sz); 4163 ssz += cp->val.sz; 4164 p += (int)cp->key.sz; 4165 continue; 4166 } 4167 4168 /* 4169 * Handle escapes carefully: we need to copy 4170 * over just the escape itself, or else we might 4171 * do replacements within the escape itself. 4172 * Make sure to pass along the bogus string. 4173 */ 4174 pp = p++; 4175 esc = mandoc_escape(&p, NULL, NULL); 4176 if (ESCAPE_ERROR == esc) { 4177 sz = strlen(pp); 4178 res = mandoc_realloc(res, ssz + sz + 1); 4179 memcpy(res + ssz, pp, sz); 4180 break; 4181 } 4182 /* 4183 * We bail out on bad escapes. 4184 * No need to warn: we already did so when 4185 * roff_expand() was called. 4186 */ 4187 sz = (int)(p - pp); 4188 res = mandoc_realloc(res, ssz + sz + 1); 4189 memcpy(res + ssz, pp, sz); 4190 ssz += sz; 4191 } 4192 4193 res[(int)ssz] = '\0'; 4194 return res; 4195 } 4196 4197 int 4198 roff_getformat(const struct roff *r) 4199 { 4200 4201 return r->format; 4202 } 4203 4204 /* 4205 * Find out whether a line is a macro line or not. 4206 * If it is, adjust the current position and return one; if it isn't, 4207 * return zero and don't change the current position. 4208 * If the control character has been set with `.cc', then let that grain 4209 * precedence. 4210 * This is slighly contrary to groff, where using the non-breaking 4211 * control character when `cc' has been invoked will cause the 4212 * non-breaking macro contents to be printed verbatim. 4213 */ 4214 int 4215 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 4216 { 4217 int pos; 4218 4219 pos = *ppos; 4220 4221 if (r->control != '\0' && cp[pos] == r->control) 4222 pos++; 4223 else if (r->control != '\0') 4224 return 0; 4225 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 4226 pos += 2; 4227 else if ('.' == cp[pos] || '\'' == cp[pos]) 4228 pos++; 4229 else 4230 return 0; 4231 4232 while (' ' == cp[pos] || '\t' == cp[pos]) 4233 pos++; 4234 4235 *ppos = pos; 4236 return 1; 4237 } 4238