1 /* $Id: roff.c,v 1.48 2012/07/07 18:27:36 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2011, 2012 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <assert.h> 19 #include <ctype.h> 20 #include <stdlib.h> 21 #include <string.h> 22 23 #include "mandoc.h" 24 #include "libroff.h" 25 #include "libmandoc.h" 26 27 /* Maximum number of nested if-else conditionals. */ 28 #define RSTACK_MAX 128 29 30 /* Maximum number of string expansions per line, to break infinite loops. */ 31 #define EXPAND_LIMIT 1000 32 33 enum rofft { 34 ROFF_ad, 35 ROFF_am, 36 ROFF_ami, 37 ROFF_am1, 38 ROFF_cc, 39 ROFF_de, 40 ROFF_dei, 41 ROFF_de1, 42 ROFF_ds, 43 ROFF_el, 44 ROFF_hy, 45 ROFF_ie, 46 ROFF_if, 47 ROFF_ig, 48 ROFF_it, 49 ROFF_ne, 50 ROFF_nh, 51 ROFF_nr, 52 ROFF_ns, 53 ROFF_ps, 54 ROFF_rm, 55 ROFF_so, 56 ROFF_ta, 57 ROFF_tr, 58 ROFF_Dd, 59 ROFF_TH, 60 ROFF_TS, 61 ROFF_TE, 62 ROFF_T_, 63 ROFF_EQ, 64 ROFF_EN, 65 ROFF_cblock, 66 ROFF_ccond, 67 ROFF_USERDEF, 68 ROFF_MAX 69 }; 70 71 enum roffrule { 72 ROFFRULE_ALLOW, 73 ROFFRULE_DENY 74 }; 75 76 /* 77 * A single register entity. If "set" is zero, the value of the 78 * register should be the default one, which is per-register. 79 * Registers are assumed to be unsigned ints for now. 80 */ 81 struct reg { 82 int set; /* whether set or not */ 83 unsigned int u; /* unsigned integer */ 84 }; 85 86 /* 87 * An incredibly-simple string buffer. 88 */ 89 struct roffstr { 90 char *p; /* nil-terminated buffer */ 91 size_t sz; /* saved strlen(p) */ 92 }; 93 94 /* 95 * A key-value roffstr pair as part of a singly-linked list. 96 */ 97 struct roffkv { 98 struct roffstr key; 99 struct roffstr val; 100 struct roffkv *next; /* next in list */ 101 }; 102 103 struct roff { 104 enum mparset parsetype; /* requested parse type */ 105 struct mparse *parse; /* parse point */ 106 struct roffnode *last; /* leaf of stack */ 107 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */ 108 char control; /* control character */ 109 int rstackpos; /* position in rstack */ 110 struct reg regs[REG__MAX]; 111 struct roffkv *strtab; /* user-defined strings & macros */ 112 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 113 struct roffstr *xtab; /* single-byte trans table (`tr') */ 114 const char *current_string; /* value of last called user macro */ 115 struct tbl_node *first_tbl; /* first table parsed */ 116 struct tbl_node *last_tbl; /* last table parsed */ 117 struct tbl_node *tbl; /* current table being parsed */ 118 struct eqn_node *last_eqn; /* last equation parsed */ 119 struct eqn_node *first_eqn; /* first equation parsed */ 120 struct eqn_node *eqn; /* current equation being parsed */ 121 }; 122 123 struct roffnode { 124 enum rofft tok; /* type of node */ 125 struct roffnode *parent; /* up one in stack */ 126 int line; /* parse line */ 127 int col; /* parse col */ 128 char *name; /* node name, e.g. macro name */ 129 char *end; /* end-rules: custom token */ 130 int endspan; /* end-rules: next-line or infty */ 131 enum roffrule rule; /* current evaluation rule */ 132 }; 133 134 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 135 enum rofft tok, /* tok of macro */ \ 136 char **bufp, /* input buffer */ \ 137 size_t *szp, /* size of input buffer */ \ 138 int ln, /* parse line */ \ 139 int ppos, /* original pos in buffer */ \ 140 int pos, /* current pos in buffer */ \ 141 int *offs /* reset offset of buffer data */ 142 143 typedef enum rofferr (*roffproc)(ROFF_ARGS); 144 145 struct roffmac { 146 const char *name; /* macro name */ 147 roffproc proc; /* process new macro */ 148 roffproc text; /* process as child text of macro */ 149 roffproc sub; /* process as child of macro */ 150 int flags; 151 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 152 struct roffmac *next; 153 }; 154 155 struct predef { 156 const char *name; /* predefined input name */ 157 const char *str; /* replacement symbol */ 158 }; 159 160 #define PREDEF(__name, __str) \ 161 { (__name), (__str) }, 162 163 static enum rofft roffhash_find(const char *, size_t); 164 static void roffhash_init(void); 165 static void roffnode_cleanscope(struct roff *); 166 static void roffnode_pop(struct roff *); 167 static void roffnode_push(struct roff *, enum rofft, 168 const char *, int, int); 169 static enum rofferr roff_block(ROFF_ARGS); 170 static enum rofferr roff_block_text(ROFF_ARGS); 171 static enum rofferr roff_block_sub(ROFF_ARGS); 172 static enum rofferr roff_cblock(ROFF_ARGS); 173 static enum rofferr roff_cc(ROFF_ARGS); 174 static enum rofferr roff_ccond(ROFF_ARGS); 175 static enum rofferr roff_cond(ROFF_ARGS); 176 static enum rofferr roff_cond_text(ROFF_ARGS); 177 static enum rofferr roff_cond_sub(ROFF_ARGS); 178 static enum rofferr roff_ds(ROFF_ARGS); 179 static enum roffrule roff_evalcond(const char *, int *); 180 static void roff_free1(struct roff *); 181 static void roff_freestr(struct roffkv *); 182 static char *roff_getname(struct roff *, char **, int, int); 183 static const char *roff_getstrn(const struct roff *, 184 const char *, size_t); 185 static enum rofferr roff_line_ignore(ROFF_ARGS); 186 static enum rofferr roff_nr(ROFF_ARGS); 187 static void roff_openeqn(struct roff *, const char *, 188 int, int, const char *); 189 static enum rofft roff_parse(struct roff *, const char *, int *); 190 static enum rofferr roff_parsetext(char *); 191 static enum rofferr roff_res(struct roff *, 192 char **, size_t *, int, int); 193 static enum rofferr roff_rm(ROFF_ARGS); 194 static void roff_setstr(struct roff *, 195 const char *, const char *, int); 196 static void roff_setstrn(struct roffkv **, const char *, 197 size_t, const char *, size_t, int); 198 static enum rofferr roff_so(ROFF_ARGS); 199 static enum rofferr roff_tr(ROFF_ARGS); 200 static enum rofferr roff_Dd(ROFF_ARGS); 201 static enum rofferr roff_TH(ROFF_ARGS); 202 static enum rofferr roff_TE(ROFF_ARGS); 203 static enum rofferr roff_TS(ROFF_ARGS); 204 static enum rofferr roff_EQ(ROFF_ARGS); 205 static enum rofferr roff_EN(ROFF_ARGS); 206 static enum rofferr roff_T_(ROFF_ARGS); 207 static enum rofferr roff_userdef(ROFF_ARGS); 208 209 /* See roffhash_find() */ 210 211 #define ASCII_HI 126 212 #define ASCII_LO 33 213 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1) 214 215 static struct roffmac *hash[HASHWIDTH]; 216 217 static struct roffmac roffs[ROFF_MAX] = { 218 { "ad", roff_line_ignore, NULL, NULL, 0, NULL }, 219 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 220 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 221 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 222 { "cc", roff_cc, NULL, NULL, 0, NULL }, 223 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 224 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 225 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 226 { "ds", roff_ds, NULL, NULL, 0, NULL }, 227 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 228 { "hy", roff_line_ignore, NULL, NULL, 0, NULL }, 229 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 230 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 231 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 232 { "it", roff_line_ignore, NULL, NULL, 0, NULL }, 233 { "ne", roff_line_ignore, NULL, NULL, 0, NULL }, 234 { "nh", roff_line_ignore, NULL, NULL, 0, NULL }, 235 { "nr", roff_nr, NULL, NULL, 0, NULL }, 236 { "ns", roff_line_ignore, NULL, NULL, 0, NULL }, 237 { "ps", roff_line_ignore, NULL, NULL, 0, NULL }, 238 { "rm", roff_rm, NULL, NULL, 0, NULL }, 239 { "so", roff_so, NULL, NULL, 0, NULL }, 240 { "ta", roff_line_ignore, NULL, NULL, 0, NULL }, 241 { "tr", roff_tr, NULL, NULL, 0, NULL }, 242 { "Dd", roff_Dd, NULL, NULL, 0, NULL }, 243 { "TH", roff_TH, NULL, NULL, 0, NULL }, 244 { "TS", roff_TS, NULL, NULL, 0, NULL }, 245 { "TE", roff_TE, NULL, NULL, 0, NULL }, 246 { "T&", roff_T_, NULL, NULL, 0, NULL }, 247 { "EQ", roff_EQ, NULL, NULL, 0, NULL }, 248 { "EN", roff_EN, NULL, NULL, 0, NULL }, 249 { ".", roff_cblock, NULL, NULL, 0, NULL }, 250 { "\\}", roff_ccond, NULL, NULL, 0, NULL }, 251 { NULL, roff_userdef, NULL, NULL, 0, NULL }, 252 }; 253 254 const char *const __mdoc_reserved[] = { 255 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", 256 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq", 257 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx", 258 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq", 259 "Ds", "Dt", "Dv", "Dx", "D1", 260 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em", 261 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex", 262 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx", 263 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP", 264 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx", 265 "Oc", "Oo", "Op", "Os", "Ot", "Ox", 266 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq", 267 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv", 268 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq", 269 "Ss", "St", "Sx", "Sy", 270 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr", 271 "%A", "%B", "%D", "%I", "%J", "%N", "%O", 272 "%P", "%Q", "%R", "%T", "%U", "%V", 273 NULL 274 }; 275 276 const char *const __man_reserved[] = { 277 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT", 278 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR", 279 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT", 280 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY", 281 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS", 282 NULL 283 }; 284 285 /* Array of injected predefined strings. */ 286 #define PREDEFS_MAX 38 287 static const struct predef predefs[PREDEFS_MAX] = { 288 #include "predefs.in" 289 }; 290 291 /* See roffhash_find() */ 292 #define ROFF_HASH(p) (p[0] - ASCII_LO) 293 294 static void 295 roffhash_init(void) 296 { 297 struct roffmac *n; 298 int buc, i; 299 300 for (i = 0; i < (int)ROFF_USERDEF; i++) { 301 assert(roffs[i].name[0] >= ASCII_LO); 302 assert(roffs[i].name[0] <= ASCII_HI); 303 304 buc = ROFF_HASH(roffs[i].name); 305 306 if (NULL != (n = hash[buc])) { 307 for ( ; n->next; n = n->next) 308 /* Do nothing. */ ; 309 n->next = &roffs[i]; 310 } else 311 hash[buc] = &roffs[i]; 312 } 313 } 314 315 /* 316 * Look up a roff token by its name. Returns ROFF_MAX if no macro by 317 * the nil-terminated string name could be found. 318 */ 319 static enum rofft 320 roffhash_find(const char *p, size_t s) 321 { 322 int buc; 323 struct roffmac *n; 324 325 /* 326 * libroff has an extremely simple hashtable, for the time 327 * being, which simply keys on the first character, which must 328 * be printable, then walks a chain. It works well enough until 329 * optimised. 330 */ 331 332 if (p[0] < ASCII_LO || p[0] > ASCII_HI) 333 return(ROFF_MAX); 334 335 buc = ROFF_HASH(p); 336 337 if (NULL == (n = hash[buc])) 338 return(ROFF_MAX); 339 for ( ; n; n = n->next) 340 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s]) 341 return((enum rofft)(n - roffs)); 342 343 return(ROFF_MAX); 344 } 345 346 347 /* 348 * Pop the current node off of the stack of roff instructions currently 349 * pending. 350 */ 351 static void 352 roffnode_pop(struct roff *r) 353 { 354 struct roffnode *p; 355 356 assert(r->last); 357 p = r->last; 358 359 r->last = r->last->parent; 360 free(p->name); 361 free(p->end); 362 free(p); 363 } 364 365 366 /* 367 * Push a roff node onto the instruction stack. This must later be 368 * removed with roffnode_pop(). 369 */ 370 static void 371 roffnode_push(struct roff *r, enum rofft tok, const char *name, 372 int line, int col) 373 { 374 struct roffnode *p; 375 376 p = mandoc_calloc(1, sizeof(struct roffnode)); 377 p->tok = tok; 378 if (name) 379 p->name = mandoc_strdup(name); 380 p->parent = r->last; 381 p->line = line; 382 p->col = col; 383 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY; 384 385 r->last = p; 386 } 387 388 389 static void 390 roff_free1(struct roff *r) 391 { 392 struct tbl_node *t; 393 struct eqn_node *e; 394 int i; 395 396 while (NULL != (t = r->first_tbl)) { 397 r->first_tbl = t->next; 398 tbl_free(t); 399 } 400 401 r->first_tbl = r->last_tbl = r->tbl = NULL; 402 403 while (NULL != (e = r->first_eqn)) { 404 r->first_eqn = e->next; 405 eqn_free(e); 406 } 407 408 r->first_eqn = r->last_eqn = r->eqn = NULL; 409 410 while (r->last) 411 roffnode_pop(r); 412 413 roff_freestr(r->strtab); 414 roff_freestr(r->xmbtab); 415 416 r->strtab = r->xmbtab = NULL; 417 418 if (r->xtab) 419 for (i = 0; i < 128; i++) 420 free(r->xtab[i].p); 421 422 free(r->xtab); 423 r->xtab = NULL; 424 } 425 426 void 427 roff_reset(struct roff *r) 428 { 429 int i; 430 431 roff_free1(r); 432 433 r->control = 0; 434 memset(&r->regs, 0, sizeof(struct reg) * REG__MAX); 435 436 for (i = 0; i < PREDEFS_MAX; i++) 437 roff_setstr(r, predefs[i].name, predefs[i].str, 0); 438 } 439 440 441 void 442 roff_free(struct roff *r) 443 { 444 445 roff_free1(r); 446 free(r); 447 } 448 449 450 struct roff * 451 roff_alloc(enum mparset type, struct mparse *parse) 452 { 453 struct roff *r; 454 int i; 455 456 r = mandoc_calloc(1, sizeof(struct roff)); 457 r->parsetype = type; 458 r->parse = parse; 459 r->rstackpos = -1; 460 461 roffhash_init(); 462 463 for (i = 0; i < PREDEFS_MAX; i++) 464 roff_setstr(r, predefs[i].name, predefs[i].str, 0); 465 466 return(r); 467 } 468 469 /* 470 * Pre-filter each and every line for reserved words (one beginning with 471 * `\*', e.g., `\*(ab'). These must be handled before the actual line 472 * is processed. 473 * This also checks the syntax of regular escapes. 474 */ 475 static enum rofferr 476 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) 477 { 478 enum mandoc_esc esc; 479 const char *stesc; /* start of an escape sequence ('\\') */ 480 const char *stnam; /* start of the name, after "[(*" */ 481 const char *cp; /* end of the name, e.g. before ']' */ 482 const char *res; /* the string to be substituted */ 483 int i, maxl, expand_count; 484 size_t nsz; 485 char *n; 486 487 expand_count = 0; 488 489 again: 490 cp = *bufp + pos; 491 while (NULL != (cp = strchr(cp, '\\'))) { 492 stesc = cp++; 493 494 /* 495 * The second character must be an asterisk. 496 * If it isn't, skip it anyway: It is escaped, 497 * so it can't start another escape sequence. 498 */ 499 500 if ('\0' == *cp) 501 return(ROFF_CONT); 502 503 if ('*' != *cp) { 504 res = cp; 505 esc = mandoc_escape(&cp, NULL, NULL); 506 if (ESCAPE_ERROR != esc) 507 continue; 508 cp = res; 509 mandoc_msg 510 (MANDOCERR_BADESCAPE, r->parse, 511 ln, (int)(stesc - *bufp), NULL); 512 return(ROFF_CONT); 513 } 514 515 cp++; 516 517 /* 518 * The third character decides the length 519 * of the name of the string. 520 * Save a pointer to the name. 521 */ 522 523 switch (*cp) { 524 case ('\0'): 525 return(ROFF_CONT); 526 case ('('): 527 cp++; 528 maxl = 2; 529 break; 530 case ('['): 531 cp++; 532 maxl = 0; 533 break; 534 default: 535 maxl = 1; 536 break; 537 } 538 stnam = cp; 539 540 /* Advance to the end of the name. */ 541 542 for (i = 0; 0 == maxl || i < maxl; i++, cp++) { 543 if ('\0' == *cp) { 544 mandoc_msg 545 (MANDOCERR_BADESCAPE, 546 r->parse, ln, 547 (int)(stesc - *bufp), NULL); 548 return(ROFF_CONT); 549 } 550 if (0 == maxl && ']' == *cp) 551 break; 552 } 553 554 /* 555 * Retrieve the replacement string; if it is 556 * undefined, resume searching for escapes. 557 */ 558 559 res = roff_getstrn(r, stnam, (size_t)i); 560 561 if (NULL == res) { 562 mandoc_msg 563 (MANDOCERR_BADESCAPE, r->parse, 564 ln, (int)(stesc - *bufp), NULL); 565 res = ""; 566 } 567 568 /* Replace the escape sequence by the string. */ 569 570 pos = stesc - *bufp; 571 572 nsz = *szp + strlen(res) + 1; 573 n = mandoc_malloc(nsz); 574 575 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1)); 576 strlcat(n, res, nsz); 577 strlcat(n, cp + (maxl ? 0 : 1), nsz); 578 579 free(*bufp); 580 581 *bufp = n; 582 *szp = nsz; 583 584 if (EXPAND_LIMIT >= ++expand_count) 585 goto again; 586 587 /* Just leave the string unexpanded. */ 588 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL); 589 return(ROFF_IGN); 590 } 591 return(ROFF_CONT); 592 } 593 594 /* 595 * Process text streams: convert all breakable hyphens into ASCII_HYPH. 596 */ 597 static enum rofferr 598 roff_parsetext(char *p) 599 { 600 size_t sz; 601 const char *start; 602 enum mandoc_esc esc; 603 604 start = p; 605 606 while ('\0' != *p) { 607 sz = strcspn(p, "-\\"); 608 p += sz; 609 610 if ('\0' == *p) 611 break; 612 613 if ('\\' == *p) { 614 /* Skip over escapes. */ 615 p++; 616 esc = mandoc_escape 617 ((const char **)&p, NULL, NULL); 618 if (ESCAPE_ERROR == esc) 619 break; 620 continue; 621 } else if (p == start) { 622 p++; 623 continue; 624 } 625 626 if (isalpha((unsigned char)p[-1]) && 627 isalpha((unsigned char)p[1])) 628 *p = ASCII_HYPH; 629 p++; 630 } 631 632 return(ROFF_CONT); 633 } 634 635 enum rofferr 636 roff_parseln(struct roff *r, int ln, char **bufp, 637 size_t *szp, int pos, int *offs) 638 { 639 enum rofft t; 640 enum rofferr e; 641 int ppos, ctl; 642 643 /* 644 * Run the reserved-word filter only if we have some reserved 645 * words to fill in. 646 */ 647 648 e = roff_res(r, bufp, szp, ln, pos); 649 if (ROFF_IGN == e) 650 return(e); 651 assert(ROFF_CONT == e); 652 653 ppos = pos; 654 ctl = roff_getcontrol(r, *bufp, &pos); 655 656 /* 657 * First, if a scope is open and we're not a macro, pass the 658 * text through the macro's filter. If a scope isn't open and 659 * we're not a macro, just let it through. 660 * Finally, if there's an equation scope open, divert it into it 661 * no matter our state. 662 */ 663 664 if (r->last && ! ctl) { 665 t = r->last->tok; 666 assert(roffs[t].text); 667 e = (*roffs[t].text) 668 (r, t, bufp, szp, ln, pos, pos, offs); 669 assert(ROFF_IGN == e || ROFF_CONT == e); 670 if (ROFF_CONT != e) 671 return(e); 672 if (r->eqn) 673 return(eqn_read(&r->eqn, ln, *bufp, pos, offs)); 674 if (r->tbl) 675 return(tbl_read(r->tbl, ln, *bufp, pos)); 676 return(roff_parsetext(*bufp + pos)); 677 } else if ( ! ctl) { 678 if (r->eqn) 679 return(eqn_read(&r->eqn, ln, *bufp, pos, offs)); 680 if (r->tbl) 681 return(tbl_read(r->tbl, ln, *bufp, pos)); 682 return(roff_parsetext(*bufp + pos)); 683 } else if (r->eqn) 684 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs)); 685 686 /* 687 * If a scope is open, go to the child handler for that macro, 688 * as it may want to preprocess before doing anything with it. 689 * Don't do so if an equation is open. 690 */ 691 692 if (r->last) { 693 t = r->last->tok; 694 assert(roffs[t].sub); 695 return((*roffs[t].sub) 696 (r, t, bufp, szp, 697 ln, ppos, pos, offs)); 698 } 699 700 /* 701 * Lastly, as we've no scope open, try to look up and execute 702 * the new macro. If no macro is found, simply return and let 703 * the compilers handle it. 704 */ 705 706 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) 707 return(ROFF_CONT); 708 709 assert(roffs[t].proc); 710 return((*roffs[t].proc) 711 (r, t, bufp, szp, 712 ln, ppos, pos, offs)); 713 } 714 715 716 void 717 roff_endparse(struct roff *r) 718 { 719 720 if (r->last) 721 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, 722 r->last->line, r->last->col, NULL); 723 724 if (r->eqn) { 725 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, 726 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL); 727 eqn_end(&r->eqn); 728 } 729 730 if (r->tbl) { 731 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, 732 r->tbl->line, r->tbl->pos, NULL); 733 tbl_end(&r->tbl); 734 } 735 } 736 737 /* 738 * Parse a roff node's type from the input buffer. This must be in the 739 * form of ".foo xxx" in the usual way. 740 */ 741 static enum rofft 742 roff_parse(struct roff *r, const char *buf, int *pos) 743 { 744 const char *mac; 745 size_t maclen; 746 enum rofft t; 747 748 if ('\0' == buf[*pos] || '"' == buf[*pos] || 749 '\t' == buf[*pos] || ' ' == buf[*pos]) 750 return(ROFF_MAX); 751 752 /* 753 * We stop the macro parse at an escape, tab, space, or nil. 754 * However, `\}' is also a valid macro, so make sure we don't 755 * clobber it by seeing the `\' as the end of token. 756 */ 757 758 mac = buf + *pos; 759 maclen = strcspn(mac + 1, " \\\t\0") + 1; 760 761 t = (r->current_string = roff_getstrn(r, mac, maclen)) 762 ? ROFF_USERDEF : roffhash_find(mac, maclen); 763 764 *pos += (int)maclen; 765 766 while (buf[*pos] && ' ' == buf[*pos]) 767 (*pos)++; 768 769 return(t); 770 } 771 772 /* ARGSUSED */ 773 static enum rofferr 774 roff_cblock(ROFF_ARGS) 775 { 776 777 /* 778 * A block-close `..' should only be invoked as a child of an 779 * ignore macro, otherwise raise a warning and just ignore it. 780 */ 781 782 if (NULL == r->last) { 783 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 784 return(ROFF_IGN); 785 } 786 787 switch (r->last->tok) { 788 case (ROFF_am): 789 /* FALLTHROUGH */ 790 case (ROFF_ami): 791 /* FALLTHROUGH */ 792 case (ROFF_am1): 793 /* FALLTHROUGH */ 794 case (ROFF_de): 795 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 796 /* FALLTHROUGH */ 797 case (ROFF_dei): 798 /* FALLTHROUGH */ 799 case (ROFF_ig): 800 break; 801 default: 802 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 803 return(ROFF_IGN); 804 } 805 806 if ((*bufp)[pos]) 807 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); 808 809 roffnode_pop(r); 810 roffnode_cleanscope(r); 811 return(ROFF_IGN); 812 813 } 814 815 816 static void 817 roffnode_cleanscope(struct roff *r) 818 { 819 820 while (r->last) { 821 if (--r->last->endspan != 0) 822 break; 823 roffnode_pop(r); 824 } 825 } 826 827 828 /* ARGSUSED */ 829 static enum rofferr 830 roff_ccond(ROFF_ARGS) 831 { 832 833 if (NULL == r->last) { 834 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 835 return(ROFF_IGN); 836 } 837 838 switch (r->last->tok) { 839 case (ROFF_el): 840 /* FALLTHROUGH */ 841 case (ROFF_ie): 842 /* FALLTHROUGH */ 843 case (ROFF_if): 844 break; 845 default: 846 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 847 return(ROFF_IGN); 848 } 849 850 if (r->last->endspan > -1) { 851 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 852 return(ROFF_IGN); 853 } 854 855 if ((*bufp)[pos]) 856 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); 857 858 roffnode_pop(r); 859 roffnode_cleanscope(r); 860 return(ROFF_IGN); 861 } 862 863 864 /* ARGSUSED */ 865 static enum rofferr 866 roff_block(ROFF_ARGS) 867 { 868 int sv; 869 size_t sz; 870 char *name; 871 872 name = NULL; 873 874 if (ROFF_ig != tok) { 875 if ('\0' == (*bufp)[pos]) { 876 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); 877 return(ROFF_IGN); 878 } 879 880 /* 881 * Re-write `de1', since we don't really care about 882 * groff's strange compatibility mode, into `de'. 883 */ 884 885 if (ROFF_de1 == tok) 886 tok = ROFF_de; 887 if (ROFF_de == tok) 888 name = *bufp + pos; 889 else 890 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, 891 roffs[tok].name); 892 893 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos])) 894 pos++; 895 896 while (isspace((unsigned char)(*bufp)[pos])) 897 (*bufp)[pos++] = '\0'; 898 } 899 900 roffnode_push(r, tok, name, ln, ppos); 901 902 /* 903 * At the beginning of a `de' macro, clear the existing string 904 * with the same name, if there is one. New content will be 905 * added from roff_block_text() in multiline mode. 906 */ 907 908 if (ROFF_de == tok) 909 roff_setstr(r, name, "", 0); 910 911 if ('\0' == (*bufp)[pos]) 912 return(ROFF_IGN); 913 914 /* If present, process the custom end-of-line marker. */ 915 916 sv = pos; 917 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos])) 918 pos++; 919 920 /* 921 * Note: groff does NOT like escape characters in the input. 922 * Instead of detecting this, we're just going to let it fly and 923 * to hell with it. 924 */ 925 926 assert(pos > sv); 927 sz = (size_t)(pos - sv); 928 929 if (1 == sz && '.' == (*bufp)[sv]) 930 return(ROFF_IGN); 931 932 r->last->end = mandoc_malloc(sz + 1); 933 934 memcpy(r->last->end, *bufp + sv, sz); 935 r->last->end[(int)sz] = '\0'; 936 937 if ((*bufp)[pos]) 938 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); 939 940 return(ROFF_IGN); 941 } 942 943 944 /* ARGSUSED */ 945 static enum rofferr 946 roff_block_sub(ROFF_ARGS) 947 { 948 enum rofft t; 949 int i, j; 950 951 /* 952 * First check whether a custom macro exists at this level. If 953 * it does, then check against it. This is some of groff's 954 * stranger behaviours. If we encountered a custom end-scope 955 * tag and that tag also happens to be a "real" macro, then we 956 * need to try interpreting it again as a real macro. If it's 957 * not, then return ignore. Else continue. 958 */ 959 960 if (r->last->end) { 961 for (i = pos, j = 0; r->last->end[j]; j++, i++) 962 if ((*bufp)[i] != r->last->end[j]) 963 break; 964 965 if ('\0' == r->last->end[j] && 966 ('\0' == (*bufp)[i] || 967 ' ' == (*bufp)[i] || 968 '\t' == (*bufp)[i])) { 969 roffnode_pop(r); 970 roffnode_cleanscope(r); 971 972 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i]) 973 i++; 974 975 pos = i; 976 if (ROFF_MAX != roff_parse(r, *bufp, &pos)) 977 return(ROFF_RERUN); 978 return(ROFF_IGN); 979 } 980 } 981 982 /* 983 * If we have no custom end-query or lookup failed, then try 984 * pulling it out of the hashtable. 985 */ 986 987 t = roff_parse(r, *bufp, &pos); 988 989 /* 990 * Macros other than block-end are only significant 991 * in `de' blocks; elsewhere, simply throw them away. 992 */ 993 if (ROFF_cblock != t) { 994 if (ROFF_de == tok) 995 roff_setstr(r, r->last->name, *bufp + ppos, 1); 996 return(ROFF_IGN); 997 } 998 999 assert(roffs[t].proc); 1000 return((*roffs[t].proc)(r, t, bufp, szp, 1001 ln, ppos, pos, offs)); 1002 } 1003 1004 1005 /* ARGSUSED */ 1006 static enum rofferr 1007 roff_block_text(ROFF_ARGS) 1008 { 1009 1010 if (ROFF_de == tok) 1011 roff_setstr(r, r->last->name, *bufp + pos, 1); 1012 1013 return(ROFF_IGN); 1014 } 1015 1016 1017 /* ARGSUSED */ 1018 static enum rofferr 1019 roff_cond_sub(ROFF_ARGS) 1020 { 1021 enum rofft t; 1022 enum roffrule rr; 1023 char *ep; 1024 1025 rr = r->last->rule; 1026 roffnode_cleanscope(r); 1027 1028 /* 1029 * If the macro is unknown, first check if it contains a closing 1030 * delimiter `\}'. If it does, close out our scope and return 1031 * the currently-scoped rule (ignore or continue). Else, drop 1032 * into the currently-scoped rule. 1033 */ 1034 1035 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) { 1036 ep = &(*bufp)[pos]; 1037 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) { 1038 ep++; 1039 if ('}' != *ep) 1040 continue; 1041 1042 /* 1043 * Make the \} go away. 1044 * This is a little haphazard, as it's not quite 1045 * clear how nroff does this. 1046 * If we're at the end of line, then just chop 1047 * off the \} and resize the buffer. 1048 * If we aren't, then conver it to spaces. 1049 */ 1050 1051 if ('\0' == *(ep + 1)) { 1052 *--ep = '\0'; 1053 *szp -= 2; 1054 } else 1055 *(ep - 1) = *ep = ' '; 1056 1057 roff_ccond(r, ROFF_ccond, bufp, szp, 1058 ln, pos, pos + 2, offs); 1059 break; 1060 } 1061 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); 1062 } 1063 1064 /* 1065 * A denied conditional must evaluate its children if and only 1066 * if they're either structurally required (such as loops and 1067 * conditionals) or a closing macro. 1068 */ 1069 1070 if (ROFFRULE_DENY == rr) 1071 if ( ! (ROFFMAC_STRUCT & roffs[t].flags)) 1072 if (ROFF_ccond != t) 1073 return(ROFF_IGN); 1074 1075 assert(roffs[t].proc); 1076 return((*roffs[t].proc)(r, t, bufp, szp, 1077 ln, ppos, pos, offs)); 1078 } 1079 1080 /* ARGSUSED */ 1081 static enum rofferr 1082 roff_cond_text(ROFF_ARGS) 1083 { 1084 char *ep; 1085 enum roffrule rr; 1086 1087 rr = r->last->rule; 1088 roffnode_cleanscope(r); 1089 1090 ep = &(*bufp)[pos]; 1091 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) { 1092 ep++; 1093 if ('}' != *ep) 1094 continue; 1095 *ep = '&'; 1096 roff_ccond(r, ROFF_ccond, bufp, szp, 1097 ln, pos, pos + 2, offs); 1098 } 1099 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); 1100 } 1101 1102 static enum roffrule 1103 roff_evalcond(const char *v, int *pos) 1104 { 1105 1106 switch (v[*pos]) { 1107 case ('n'): 1108 (*pos)++; 1109 return(ROFFRULE_ALLOW); 1110 case ('e'): 1111 /* FALLTHROUGH */ 1112 case ('o'): 1113 /* FALLTHROUGH */ 1114 case ('t'): 1115 (*pos)++; 1116 return(ROFFRULE_DENY); 1117 default: 1118 break; 1119 } 1120 1121 while (v[*pos] && ' ' != v[*pos]) 1122 (*pos)++; 1123 return(ROFFRULE_DENY); 1124 } 1125 1126 /* ARGSUSED */ 1127 static enum rofferr 1128 roff_line_ignore(ROFF_ARGS) 1129 { 1130 1131 if (ROFF_it == tok) 1132 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, "it"); 1133 1134 return(ROFF_IGN); 1135 } 1136 1137 /* ARGSUSED */ 1138 static enum rofferr 1139 roff_cond(ROFF_ARGS) 1140 { 1141 1142 roffnode_push(r, tok, NULL, ln, ppos); 1143 1144 /* 1145 * An `.el' has no conditional body: it will consume the value 1146 * of the current rstack entry set in prior `ie' calls or 1147 * defaults to DENY. 1148 * 1149 * If we're not an `el', however, then evaluate the conditional. 1150 */ 1151 1152 r->last->rule = ROFF_el == tok ? 1153 (r->rstackpos < 0 ? 1154 ROFFRULE_DENY : r->rstack[r->rstackpos--]) : 1155 roff_evalcond(*bufp, &pos); 1156 1157 /* 1158 * An if-else will put the NEGATION of the current evaluated 1159 * conditional into the stack of rules. 1160 */ 1161 1162 if (ROFF_ie == tok) { 1163 if (r->rstackpos == RSTACK_MAX - 1) { 1164 mandoc_msg(MANDOCERR_MEM, 1165 r->parse, ln, ppos, NULL); 1166 return(ROFF_ERR); 1167 } 1168 r->rstack[++r->rstackpos] = 1169 ROFFRULE_DENY == r->last->rule ? 1170 ROFFRULE_ALLOW : ROFFRULE_DENY; 1171 } 1172 1173 /* If the parent has false as its rule, then so do we. */ 1174 1175 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule) 1176 r->last->rule = ROFFRULE_DENY; 1177 1178 /* 1179 * Determine scope. 1180 * If there is nothing on the line after the conditional, 1181 * not even whitespace, use next-line scope. 1182 */ 1183 1184 if ('\0' == (*bufp)[pos]) { 1185 r->last->endspan = 2; 1186 goto out; 1187 } 1188 1189 while (' ' == (*bufp)[pos]) 1190 pos++; 1191 1192 /* An opening brace requests multiline scope. */ 1193 1194 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) { 1195 r->last->endspan = -1; 1196 pos += 2; 1197 goto out; 1198 } 1199 1200 /* 1201 * Anything else following the conditional causes 1202 * single-line scope. Warn if the scope contains 1203 * nothing but trailing whitespace. 1204 */ 1205 1206 if ('\0' == (*bufp)[pos]) 1207 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); 1208 1209 r->last->endspan = 1; 1210 1211 out: 1212 *offs = pos; 1213 return(ROFF_RERUN); 1214 } 1215 1216 1217 /* ARGSUSED */ 1218 static enum rofferr 1219 roff_ds(ROFF_ARGS) 1220 { 1221 char *name, *string; 1222 1223 /* 1224 * A symbol is named by the first word following the macro 1225 * invocation up to a space. Its value is anything after the 1226 * name's trailing whitespace and optional double-quote. Thus, 1227 * 1228 * [.ds foo "bar " ] 1229 * 1230 * will have `bar " ' as its value. 1231 */ 1232 1233 string = *bufp + pos; 1234 name = roff_getname(r, &string, ln, pos); 1235 if ('\0' == *name) 1236 return(ROFF_IGN); 1237 1238 /* Read past initial double-quote. */ 1239 if ('"' == *string) 1240 string++; 1241 1242 /* The rest is the value. */ 1243 roff_setstr(r, name, string, 0); 1244 return(ROFF_IGN); 1245 } 1246 1247 int 1248 roff_regisset(const struct roff *r, enum regs reg) 1249 { 1250 1251 return(r->regs[(int)reg].set); 1252 } 1253 1254 unsigned int 1255 roff_regget(const struct roff *r, enum regs reg) 1256 { 1257 1258 return(r->regs[(int)reg].u); 1259 } 1260 1261 void 1262 roff_regunset(struct roff *r, enum regs reg) 1263 { 1264 1265 r->regs[(int)reg].set = 0; 1266 } 1267 1268 /* ARGSUSED */ 1269 static enum rofferr 1270 roff_nr(ROFF_ARGS) 1271 { 1272 const char *key; 1273 char *val; 1274 int iv; 1275 1276 val = *bufp + pos; 1277 key = roff_getname(r, &val, ln, pos); 1278 1279 if (0 == strcmp(key, "nS")) { 1280 r->regs[(int)REG_nS].set = 1; 1281 if ((iv = mandoc_strntoi(val, strlen(val), 10)) >= 0) 1282 r->regs[(int)REG_nS].u = (unsigned)iv; 1283 else 1284 r->regs[(int)REG_nS].u = 0u; 1285 } 1286 1287 return(ROFF_IGN); 1288 } 1289 1290 /* ARGSUSED */ 1291 static enum rofferr 1292 roff_rm(ROFF_ARGS) 1293 { 1294 const char *name; 1295 char *cp; 1296 1297 cp = *bufp + pos; 1298 while ('\0' != *cp) { 1299 name = roff_getname(r, &cp, ln, (int)(cp - *bufp)); 1300 if ('\0' != *name) 1301 roff_setstr(r, name, NULL, 0); 1302 } 1303 return(ROFF_IGN); 1304 } 1305 1306 /* ARGSUSED */ 1307 static enum rofferr 1308 roff_Dd(ROFF_ARGS) 1309 { 1310 const char *const *cp; 1311 1312 if (MPARSE_MDOC != r->parsetype) 1313 for (cp = __mdoc_reserved; *cp; cp++) 1314 roff_setstr(r, *cp, NULL, 0); 1315 1316 return(ROFF_CONT); 1317 } 1318 1319 /* ARGSUSED */ 1320 static enum rofferr 1321 roff_TH(ROFF_ARGS) 1322 { 1323 const char *const *cp; 1324 1325 if (MPARSE_MDOC != r->parsetype) 1326 for (cp = __man_reserved; *cp; cp++) 1327 roff_setstr(r, *cp, NULL, 0); 1328 1329 return(ROFF_CONT); 1330 } 1331 1332 /* ARGSUSED */ 1333 static enum rofferr 1334 roff_TE(ROFF_ARGS) 1335 { 1336 1337 if (NULL == r->tbl) 1338 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 1339 else 1340 tbl_end(&r->tbl); 1341 1342 return(ROFF_IGN); 1343 } 1344 1345 /* ARGSUSED */ 1346 static enum rofferr 1347 roff_T_(ROFF_ARGS) 1348 { 1349 1350 if (NULL == r->tbl) 1351 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 1352 else 1353 tbl_restart(ppos, ln, r->tbl); 1354 1355 return(ROFF_IGN); 1356 } 1357 1358 #if 0 1359 static int 1360 roff_closeeqn(struct roff *r) 1361 { 1362 1363 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0); 1364 } 1365 #endif 1366 1367 static void 1368 roff_openeqn(struct roff *r, const char *name, int line, 1369 int offs, const char *buf) 1370 { 1371 struct eqn_node *e; 1372 int poff; 1373 1374 assert(NULL == r->eqn); 1375 e = eqn_alloc(name, offs, line, r->parse); 1376 1377 if (r->last_eqn) 1378 r->last_eqn->next = e; 1379 else 1380 r->first_eqn = r->last_eqn = e; 1381 1382 r->eqn = r->last_eqn = e; 1383 1384 if (buf) { 1385 poff = 0; 1386 eqn_read(&r->eqn, line, buf, offs, &poff); 1387 } 1388 } 1389 1390 /* ARGSUSED */ 1391 static enum rofferr 1392 roff_EQ(ROFF_ARGS) 1393 { 1394 1395 roff_openeqn(r, *bufp + pos, ln, ppos, NULL); 1396 return(ROFF_IGN); 1397 } 1398 1399 /* ARGSUSED */ 1400 static enum rofferr 1401 roff_EN(ROFF_ARGS) 1402 { 1403 1404 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 1405 return(ROFF_IGN); 1406 } 1407 1408 /* ARGSUSED */ 1409 static enum rofferr 1410 roff_TS(ROFF_ARGS) 1411 { 1412 struct tbl_node *t; 1413 1414 if (r->tbl) { 1415 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL); 1416 tbl_end(&r->tbl); 1417 } 1418 1419 t = tbl_alloc(ppos, ln, r->parse); 1420 1421 if (r->last_tbl) 1422 r->last_tbl->next = t; 1423 else 1424 r->first_tbl = r->last_tbl = t; 1425 1426 r->tbl = r->last_tbl = t; 1427 return(ROFF_IGN); 1428 } 1429 1430 /* ARGSUSED */ 1431 static enum rofferr 1432 roff_cc(ROFF_ARGS) 1433 { 1434 const char *p; 1435 1436 p = *bufp + pos; 1437 1438 if ('\0' == *p || '.' == (r->control = *p++)) 1439 r->control = 0; 1440 1441 if ('\0' != *p) 1442 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL); 1443 1444 return(ROFF_IGN); 1445 } 1446 1447 /* ARGSUSED */ 1448 static enum rofferr 1449 roff_tr(ROFF_ARGS) 1450 { 1451 const char *p, *first, *second; 1452 size_t fsz, ssz; 1453 enum mandoc_esc esc; 1454 1455 p = *bufp + pos; 1456 1457 if ('\0' == *p) { 1458 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL); 1459 return(ROFF_IGN); 1460 } 1461 1462 while ('\0' != *p) { 1463 fsz = ssz = 1; 1464 1465 first = p++; 1466 if ('\\' == *first) { 1467 esc = mandoc_escape(&p, NULL, NULL); 1468 if (ESCAPE_ERROR == esc) { 1469 mandoc_msg 1470 (MANDOCERR_BADESCAPE, r->parse, 1471 ln, (int)(p - *bufp), NULL); 1472 return(ROFF_IGN); 1473 } 1474 fsz = (size_t)(p - first); 1475 } 1476 1477 second = p++; 1478 if ('\\' == *second) { 1479 esc = mandoc_escape(&p, NULL, NULL); 1480 if (ESCAPE_ERROR == esc) { 1481 mandoc_msg 1482 (MANDOCERR_BADESCAPE, r->parse, 1483 ln, (int)(p - *bufp), NULL); 1484 return(ROFF_IGN); 1485 } 1486 ssz = (size_t)(p - second); 1487 } else if ('\0' == *second) { 1488 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, 1489 ln, (int)(p - *bufp), NULL); 1490 second = " "; 1491 p--; 1492 } 1493 1494 if (fsz > 1) { 1495 roff_setstrn(&r->xmbtab, first, 1496 fsz, second, ssz, 0); 1497 continue; 1498 } 1499 1500 if (NULL == r->xtab) 1501 r->xtab = mandoc_calloc 1502 (128, sizeof(struct roffstr)); 1503 1504 free(r->xtab[(int)*first].p); 1505 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 1506 r->xtab[(int)*first].sz = ssz; 1507 } 1508 1509 return(ROFF_IGN); 1510 } 1511 1512 /* ARGSUSED */ 1513 static enum rofferr 1514 roff_so(ROFF_ARGS) 1515 { 1516 char *name; 1517 1518 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL); 1519 1520 /* 1521 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 1522 * opening anything that's not in our cwd or anything beneath 1523 * it. Thus, explicitly disallow traversing up the file-system 1524 * or using absolute paths. 1525 */ 1526 1527 name = *bufp + pos; 1528 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) { 1529 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL); 1530 return(ROFF_ERR); 1531 } 1532 1533 *offs = pos; 1534 return(ROFF_SO); 1535 } 1536 1537 /* ARGSUSED */ 1538 static enum rofferr 1539 roff_userdef(ROFF_ARGS) 1540 { 1541 const char *arg[9]; 1542 char *cp, *n1, *n2; 1543 int i; 1544 1545 /* 1546 * Collect pointers to macro argument strings 1547 * and null-terminate them. 1548 */ 1549 cp = *bufp + pos; 1550 for (i = 0; i < 9; i++) 1551 arg[i] = '\0' == *cp ? "" : 1552 mandoc_getarg(r->parse, &cp, ln, &pos); 1553 1554 /* 1555 * Expand macro arguments. 1556 */ 1557 *szp = 0; 1558 n1 = cp = mandoc_strdup(r->current_string); 1559 while (NULL != (cp = strstr(cp, "\\$"))) { 1560 i = cp[2] - '1'; 1561 if (0 > i || 8 < i) { 1562 /* Not an argument invocation. */ 1563 cp += 2; 1564 continue; 1565 } 1566 1567 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1; 1568 n2 = mandoc_malloc(*szp); 1569 1570 strlcpy(n2, n1, (size_t)(cp - n1 + 1)); 1571 strlcat(n2, arg[i], *szp); 1572 strlcat(n2, cp + 3, *szp); 1573 1574 cp = n2 + (cp - n1); 1575 free(n1); 1576 n1 = n2; 1577 } 1578 1579 /* 1580 * Replace the macro invocation 1581 * by the expanded macro. 1582 */ 1583 free(*bufp); 1584 *bufp = n1; 1585 if (0 == *szp) 1586 *szp = strlen(*bufp) + 1; 1587 1588 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ? 1589 ROFF_REPARSE : ROFF_APPEND); 1590 } 1591 1592 static char * 1593 roff_getname(struct roff *r, char **cpp, int ln, int pos) 1594 { 1595 char *name, *cp; 1596 1597 name = *cpp; 1598 if ('\0' == *name) 1599 return(name); 1600 1601 /* Read until end of name. */ 1602 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) { 1603 if ('\\' != *cp) 1604 continue; 1605 cp++; 1606 if ('\\' == *cp) 1607 continue; 1608 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL); 1609 *cp = '\0'; 1610 name = cp; 1611 } 1612 1613 /* Nil-terminate name. */ 1614 if ('\0' != *cp) 1615 *(cp++) = '\0'; 1616 1617 /* Read past spaces. */ 1618 while (' ' == *cp) 1619 cp++; 1620 1621 *cpp = cp; 1622 return(name); 1623 } 1624 1625 /* 1626 * Store *string into the user-defined string called *name. 1627 * In multiline mode, append to an existing entry and append '\n'; 1628 * else replace the existing entry, if there is one. 1629 * To clear an existing entry, call with (*r, *name, NULL, 0). 1630 */ 1631 static void 1632 roff_setstr(struct roff *r, const char *name, const char *string, 1633 int multiline) 1634 { 1635 1636 roff_setstrn(&r->strtab, name, strlen(name), string, 1637 string ? strlen(string) : 0, multiline); 1638 } 1639 1640 static void 1641 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 1642 const char *string, size_t stringsz, int multiline) 1643 { 1644 struct roffkv *n; 1645 char *c; 1646 int i; 1647 size_t oldch, newch; 1648 1649 /* Search for an existing string with the same name. */ 1650 n = *r; 1651 1652 while (n && strcmp(name, n->key.p)) 1653 n = n->next; 1654 1655 if (NULL == n) { 1656 /* Create a new string table entry. */ 1657 n = mandoc_malloc(sizeof(struct roffkv)); 1658 n->key.p = mandoc_strndup(name, namesz); 1659 n->key.sz = namesz; 1660 n->val.p = NULL; 1661 n->val.sz = 0; 1662 n->next = *r; 1663 *r = n; 1664 } else if (0 == multiline) { 1665 /* In multiline mode, append; else replace. */ 1666 free(n->val.p); 1667 n->val.p = NULL; 1668 n->val.sz = 0; 1669 } 1670 1671 if (NULL == string) 1672 return; 1673 1674 /* 1675 * One additional byte for the '\n' in multiline mode, 1676 * and one for the terminating '\0'. 1677 */ 1678 newch = stringsz + (multiline ? 2u : 1u); 1679 1680 if (NULL == n->val.p) { 1681 n->val.p = mandoc_malloc(newch); 1682 *n->val.p = '\0'; 1683 oldch = 0; 1684 } else { 1685 oldch = n->val.sz; 1686 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 1687 } 1688 1689 /* Skip existing content in the destination buffer. */ 1690 c = n->val.p + (int)oldch; 1691 1692 /* Append new content to the destination buffer. */ 1693 i = 0; 1694 while (i < (int)stringsz) { 1695 /* 1696 * Rudimentary roff copy mode: 1697 * Handle escaped backslashes. 1698 */ 1699 if ('\\' == string[i] && '\\' == string[i + 1]) 1700 i++; 1701 *c++ = string[i++]; 1702 } 1703 1704 /* Append terminating bytes. */ 1705 if (multiline) 1706 *c++ = '\n'; 1707 1708 *c = '\0'; 1709 n->val.sz = (int)(c - n->val.p); 1710 } 1711 1712 static const char * 1713 roff_getstrn(const struct roff *r, const char *name, size_t len) 1714 { 1715 const struct roffkv *n; 1716 1717 for (n = r->strtab; n; n = n->next) 1718 if (0 == strncmp(name, n->key.p, len) && 1719 '\0' == n->key.p[(int)len]) 1720 return(n->val.p); 1721 1722 return(NULL); 1723 } 1724 1725 static void 1726 roff_freestr(struct roffkv *r) 1727 { 1728 struct roffkv *n, *nn; 1729 1730 for (n = r; n; n = nn) { 1731 free(n->key.p); 1732 free(n->val.p); 1733 nn = n->next; 1734 free(n); 1735 } 1736 } 1737 1738 const struct tbl_span * 1739 roff_span(const struct roff *r) 1740 { 1741 1742 return(r->tbl ? tbl_span(r->tbl) : NULL); 1743 } 1744 1745 const struct eqn * 1746 roff_eqn(const struct roff *r) 1747 { 1748 1749 return(r->last_eqn ? &r->last_eqn->eqn : NULL); 1750 } 1751 1752 /* 1753 * Duplicate an input string, making the appropriate character 1754 * conversations (as stipulated by `tr') along the way. 1755 * Returns a heap-allocated string with all the replacements made. 1756 */ 1757 char * 1758 roff_strdup(const struct roff *r, const char *p) 1759 { 1760 const struct roffkv *cp; 1761 char *res; 1762 const char *pp; 1763 size_t ssz, sz; 1764 enum mandoc_esc esc; 1765 1766 if (NULL == r->xmbtab && NULL == r->xtab) 1767 return(mandoc_strdup(p)); 1768 else if ('\0' == *p) 1769 return(mandoc_strdup("")); 1770 1771 /* 1772 * Step through each character looking for term matches 1773 * (remember that a `tr' can be invoked with an escape, which is 1774 * a glyph but the escape is multi-character). 1775 * We only do this if the character hash has been initialised 1776 * and the string is >0 length. 1777 */ 1778 1779 res = NULL; 1780 ssz = 0; 1781 1782 while ('\0' != *p) { 1783 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) { 1784 sz = r->xtab[(int)*p].sz; 1785 res = mandoc_realloc(res, ssz + sz + 1); 1786 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 1787 ssz += sz; 1788 p++; 1789 continue; 1790 } else if ('\\' != *p) { 1791 res = mandoc_realloc(res, ssz + 2); 1792 res[ssz++] = *p++; 1793 continue; 1794 } 1795 1796 /* Search for term matches. */ 1797 for (cp = r->xmbtab; cp; cp = cp->next) 1798 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 1799 break; 1800 1801 if (NULL != cp) { 1802 /* 1803 * A match has been found. 1804 * Append the match to the array and move 1805 * forward by its keysize. 1806 */ 1807 res = mandoc_realloc 1808 (res, ssz + cp->val.sz + 1); 1809 memcpy(res + ssz, cp->val.p, cp->val.sz); 1810 ssz += cp->val.sz; 1811 p += (int)cp->key.sz; 1812 continue; 1813 } 1814 1815 /* 1816 * Handle escapes carefully: we need to copy 1817 * over just the escape itself, or else we might 1818 * do replacements within the escape itself. 1819 * Make sure to pass along the bogus string. 1820 */ 1821 pp = p++; 1822 esc = mandoc_escape(&p, NULL, NULL); 1823 if (ESCAPE_ERROR == esc) { 1824 sz = strlen(pp); 1825 res = mandoc_realloc(res, ssz + sz + 1); 1826 memcpy(res + ssz, pp, sz); 1827 break; 1828 } 1829 /* 1830 * We bail out on bad escapes. 1831 * No need to warn: we already did so when 1832 * roff_res() was called. 1833 */ 1834 sz = (int)(p - pp); 1835 res = mandoc_realloc(res, ssz + sz + 1); 1836 memcpy(res + ssz, pp, sz); 1837 ssz += sz; 1838 } 1839 1840 res[(int)ssz] = '\0'; 1841 return(res); 1842 } 1843 1844 /* 1845 * Find out whether a line is a macro line or not. 1846 * If it is, adjust the current position and return one; if it isn't, 1847 * return zero and don't change the current position. 1848 * If the control character has been set with `.cc', then let that grain 1849 * precedence. 1850 * This is slighly contrary to groff, where using the non-breaking 1851 * control character when `cc' has been invoked will cause the 1852 * non-breaking macro contents to be printed verbatim. 1853 */ 1854 int 1855 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 1856 { 1857 int pos; 1858 1859 pos = *ppos; 1860 1861 if (0 != r->control && cp[pos] == r->control) 1862 pos++; 1863 else if (0 != r->control) 1864 return(0); 1865 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 1866 pos += 2; 1867 else if ('.' == cp[pos] || '\'' == cp[pos]) 1868 pos++; 1869 else 1870 return(0); 1871 1872 while (' ' == cp[pos] || '\t' == cp[pos]) 1873 pos++; 1874 1875 *ppos = pos; 1876 return(1); 1877 } 1878