1 /* $Id: roff.c,v 1.42 2011/09/18 15:54:48 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <assert.h> 19 #include <ctype.h> 20 #include <stdlib.h> 21 #include <string.h> 22 23 #include "mandoc.h" 24 #include "libroff.h" 25 #include "libmandoc.h" 26 27 /* Maximum number of nested if-else conditionals. */ 28 #define RSTACK_MAX 128 29 30 enum rofft { 31 ROFF_ad, 32 ROFF_am, 33 ROFF_ami, 34 ROFF_am1, 35 ROFF_de, 36 ROFF_dei, 37 ROFF_de1, 38 ROFF_ds, 39 ROFF_el, 40 ROFF_hy, 41 ROFF_ie, 42 ROFF_if, 43 ROFF_ig, 44 ROFF_it, 45 ROFF_ne, 46 ROFF_nh, 47 ROFF_nr, 48 ROFF_ns, 49 ROFF_ps, 50 ROFF_rm, 51 ROFF_so, 52 ROFF_ta, 53 ROFF_tr, 54 ROFF_TS, 55 ROFF_TE, 56 ROFF_T_, 57 ROFF_EQ, 58 ROFF_EN, 59 ROFF_cblock, 60 ROFF_ccond, 61 ROFF_USERDEF, 62 ROFF_MAX 63 }; 64 65 enum roffrule { 66 ROFFRULE_ALLOW, 67 ROFFRULE_DENY 68 }; 69 70 /* 71 * A single register entity. If "set" is zero, the value of the 72 * register should be the default one, which is per-register. 73 * Registers are assumed to be unsigned ints for now. 74 */ 75 struct reg { 76 int set; /* whether set or not */ 77 unsigned int u; /* unsigned integer */ 78 }; 79 80 /* 81 * An incredibly-simple string buffer. 82 */ 83 struct roffstr { 84 char *p; /* nil-terminated buffer */ 85 size_t sz; /* saved strlen(p) */ 86 }; 87 88 /* 89 * A key-value roffstr pair as part of a singly-linked list. 90 */ 91 struct roffkv { 92 struct roffstr key; 93 struct roffstr val; 94 struct roffkv *next; /* next in list */ 95 }; 96 97 struct roff { 98 struct mparse *parse; /* parse point */ 99 struct roffnode *last; /* leaf of stack */ 100 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */ 101 int rstackpos; /* position in rstack */ 102 struct reg regs[REG__MAX]; 103 struct roffkv *strtab; /* user-defined strings & macros */ 104 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 105 struct roffstr *xtab; /* single-byte trans table (`tr') */ 106 const char *current_string; /* value of last called user macro */ 107 struct tbl_node *first_tbl; /* first table parsed */ 108 struct tbl_node *last_tbl; /* last table parsed */ 109 struct tbl_node *tbl; /* current table being parsed */ 110 struct eqn_node *last_eqn; /* last equation parsed */ 111 struct eqn_node *first_eqn; /* first equation parsed */ 112 struct eqn_node *eqn; /* current equation being parsed */ 113 }; 114 115 struct roffnode { 116 enum rofft tok; /* type of node */ 117 struct roffnode *parent; /* up one in stack */ 118 int line; /* parse line */ 119 int col; /* parse col */ 120 char *name; /* node name, e.g. macro name */ 121 char *end; /* end-rules: custom token */ 122 int endspan; /* end-rules: next-line or infty */ 123 enum roffrule rule; /* current evaluation rule */ 124 }; 125 126 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 127 enum rofft tok, /* tok of macro */ \ 128 char **bufp, /* input buffer */ \ 129 size_t *szp, /* size of input buffer */ \ 130 int ln, /* parse line */ \ 131 int ppos, /* original pos in buffer */ \ 132 int pos, /* current pos in buffer */ \ 133 int *offs /* reset offset of buffer data */ 134 135 typedef enum rofferr (*roffproc)(ROFF_ARGS); 136 137 struct roffmac { 138 const char *name; /* macro name */ 139 roffproc proc; /* process new macro */ 140 roffproc text; /* process as child text of macro */ 141 roffproc sub; /* process as child of macro */ 142 int flags; 143 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 144 struct roffmac *next; 145 }; 146 147 struct predef { 148 const char *name; /* predefined input name */ 149 const char *str; /* replacement symbol */ 150 }; 151 152 #define PREDEF(__name, __str) \ 153 { (__name), (__str) }, 154 155 static enum rofft roffhash_find(const char *, size_t); 156 static void roffhash_init(void); 157 static void roffnode_cleanscope(struct roff *); 158 static void roffnode_pop(struct roff *); 159 static void roffnode_push(struct roff *, enum rofft, 160 const char *, int, int); 161 static enum rofferr roff_block(ROFF_ARGS); 162 static enum rofferr roff_block_text(ROFF_ARGS); 163 static enum rofferr roff_block_sub(ROFF_ARGS); 164 static enum rofferr roff_cblock(ROFF_ARGS); 165 static enum rofferr roff_ccond(ROFF_ARGS); 166 static enum rofferr roff_cond(ROFF_ARGS); 167 static enum rofferr roff_cond_text(ROFF_ARGS); 168 static enum rofferr roff_cond_sub(ROFF_ARGS); 169 static enum rofferr roff_ds(ROFF_ARGS); 170 static enum roffrule roff_evalcond(const char *, int *); 171 static void roff_free1(struct roff *); 172 static void roff_freestr(struct roffkv *); 173 static char *roff_getname(struct roff *, char **, int, int); 174 static const char *roff_getstrn(const struct roff *, 175 const char *, size_t); 176 static enum rofferr roff_line_ignore(ROFF_ARGS); 177 static enum rofferr roff_nr(ROFF_ARGS); 178 static void roff_openeqn(struct roff *, const char *, 179 int, int, const char *); 180 static enum rofft roff_parse(struct roff *, const char *, int *); 181 static enum rofferr roff_parsetext(char *); 182 static void roff_res(struct roff *, 183 char **, size_t *, int, int); 184 static enum rofferr roff_rm(ROFF_ARGS); 185 static void roff_setstr(struct roff *, 186 const char *, const char *, int); 187 static void roff_setstrn(struct roffkv **, const char *, 188 size_t, const char *, size_t, int); 189 static enum rofferr roff_so(ROFF_ARGS); 190 static enum rofferr roff_tr(ROFF_ARGS); 191 static enum rofferr roff_TE(ROFF_ARGS); 192 static enum rofferr roff_TS(ROFF_ARGS); 193 static enum rofferr roff_EQ(ROFF_ARGS); 194 static enum rofferr roff_EN(ROFF_ARGS); 195 static enum rofferr roff_T_(ROFF_ARGS); 196 static enum rofferr roff_userdef(ROFF_ARGS); 197 198 /* See roffhash_find() */ 199 200 #define ASCII_HI 126 201 #define ASCII_LO 33 202 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1) 203 204 static struct roffmac *hash[HASHWIDTH]; 205 206 static struct roffmac roffs[ROFF_MAX] = { 207 { "ad", roff_line_ignore, NULL, NULL, 0, NULL }, 208 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 209 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 210 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 211 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 212 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 213 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 214 { "ds", roff_ds, NULL, NULL, 0, NULL }, 215 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 216 { "hy", roff_line_ignore, NULL, NULL, 0, NULL }, 217 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 218 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 219 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 220 { "it", roff_line_ignore, NULL, NULL, 0, NULL }, 221 { "ne", roff_line_ignore, NULL, NULL, 0, NULL }, 222 { "nh", roff_line_ignore, NULL, NULL, 0, NULL }, 223 { "nr", roff_nr, NULL, NULL, 0, NULL }, 224 { "ns", roff_line_ignore, NULL, NULL, 0, NULL }, 225 { "ps", roff_line_ignore, NULL, NULL, 0, NULL }, 226 { "rm", roff_rm, NULL, NULL, 0, NULL }, 227 { "so", roff_so, NULL, NULL, 0, NULL }, 228 { "ta", roff_line_ignore, NULL, NULL, 0, NULL }, 229 { "tr", roff_tr, NULL, NULL, 0, NULL }, 230 { "TS", roff_TS, NULL, NULL, 0, NULL }, 231 { "TE", roff_TE, NULL, NULL, 0, NULL }, 232 { "T&", roff_T_, NULL, NULL, 0, NULL }, 233 { "EQ", roff_EQ, NULL, NULL, 0, NULL }, 234 { "EN", roff_EN, NULL, NULL, 0, NULL }, 235 { ".", roff_cblock, NULL, NULL, 0, NULL }, 236 { "\\}", roff_ccond, NULL, NULL, 0, NULL }, 237 { NULL, roff_userdef, NULL, NULL, 0, NULL }, 238 }; 239 240 /* Array of injected predefined strings. */ 241 #define PREDEFS_MAX 38 242 static const struct predef predefs[PREDEFS_MAX] = { 243 #include "predefs.in" 244 }; 245 246 /* See roffhash_find() */ 247 #define ROFF_HASH(p) (p[0] - ASCII_LO) 248 249 static void 250 roffhash_init(void) 251 { 252 struct roffmac *n; 253 int buc, i; 254 255 for (i = 0; i < (int)ROFF_USERDEF; i++) { 256 assert(roffs[i].name[0] >= ASCII_LO); 257 assert(roffs[i].name[0] <= ASCII_HI); 258 259 buc = ROFF_HASH(roffs[i].name); 260 261 if (NULL != (n = hash[buc])) { 262 for ( ; n->next; n = n->next) 263 /* Do nothing. */ ; 264 n->next = &roffs[i]; 265 } else 266 hash[buc] = &roffs[i]; 267 } 268 } 269 270 /* 271 * Look up a roff token by its name. Returns ROFF_MAX if no macro by 272 * the nil-terminated string name could be found. 273 */ 274 static enum rofft 275 roffhash_find(const char *p, size_t s) 276 { 277 int buc; 278 struct roffmac *n; 279 280 /* 281 * libroff has an extremely simple hashtable, for the time 282 * being, which simply keys on the first character, which must 283 * be printable, then walks a chain. It works well enough until 284 * optimised. 285 */ 286 287 if (p[0] < ASCII_LO || p[0] > ASCII_HI) 288 return(ROFF_MAX); 289 290 buc = ROFF_HASH(p); 291 292 if (NULL == (n = hash[buc])) 293 return(ROFF_MAX); 294 for ( ; n; n = n->next) 295 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s]) 296 return((enum rofft)(n - roffs)); 297 298 return(ROFF_MAX); 299 } 300 301 302 /* 303 * Pop the current node off of the stack of roff instructions currently 304 * pending. 305 */ 306 static void 307 roffnode_pop(struct roff *r) 308 { 309 struct roffnode *p; 310 311 assert(r->last); 312 p = r->last; 313 314 r->last = r->last->parent; 315 free(p->name); 316 free(p->end); 317 free(p); 318 } 319 320 321 /* 322 * Push a roff node onto the instruction stack. This must later be 323 * removed with roffnode_pop(). 324 */ 325 static void 326 roffnode_push(struct roff *r, enum rofft tok, const char *name, 327 int line, int col) 328 { 329 struct roffnode *p; 330 331 p = mandoc_calloc(1, sizeof(struct roffnode)); 332 p->tok = tok; 333 if (name) 334 p->name = mandoc_strdup(name); 335 p->parent = r->last; 336 p->line = line; 337 p->col = col; 338 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY; 339 340 r->last = p; 341 } 342 343 344 static void 345 roff_free1(struct roff *r) 346 { 347 struct tbl_node *t; 348 struct eqn_node *e; 349 int i; 350 351 while (NULL != (t = r->first_tbl)) { 352 r->first_tbl = t->next; 353 tbl_free(t); 354 } 355 356 r->first_tbl = r->last_tbl = r->tbl = NULL; 357 358 while (NULL != (e = r->first_eqn)) { 359 r->first_eqn = e->next; 360 eqn_free(e); 361 } 362 363 r->first_eqn = r->last_eqn = r->eqn = NULL; 364 365 while (r->last) 366 roffnode_pop(r); 367 368 roff_freestr(r->strtab); 369 roff_freestr(r->xmbtab); 370 371 r->strtab = r->xmbtab = NULL; 372 373 if (r->xtab) 374 for (i = 0; i < 128; i++) 375 free(r->xtab[i].p); 376 377 free(r->xtab); 378 r->xtab = NULL; 379 } 380 381 void 382 roff_reset(struct roff *r) 383 { 384 int i; 385 386 roff_free1(r); 387 388 memset(&r->regs, 0, sizeof(struct reg) * REG__MAX); 389 390 for (i = 0; i < PREDEFS_MAX; i++) 391 roff_setstr(r, predefs[i].name, predefs[i].str, 0); 392 } 393 394 395 void 396 roff_free(struct roff *r) 397 { 398 399 roff_free1(r); 400 free(r); 401 } 402 403 404 struct roff * 405 roff_alloc(struct mparse *parse) 406 { 407 struct roff *r; 408 int i; 409 410 r = mandoc_calloc(1, sizeof(struct roff)); 411 r->parse = parse; 412 r->rstackpos = -1; 413 414 roffhash_init(); 415 416 for (i = 0; i < PREDEFS_MAX; i++) 417 roff_setstr(r, predefs[i].name, predefs[i].str, 0); 418 419 return(r); 420 } 421 422 /* 423 * Pre-filter each and every line for reserved words (one beginning with 424 * `\*', e.g., `\*(ab'). These must be handled before the actual line 425 * is processed. 426 * This also checks the syntax of regular escapes. 427 */ 428 static void 429 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) 430 { 431 enum mandoc_esc esc; 432 const char *stesc; /* start of an escape sequence ('\\') */ 433 const char *stnam; /* start of the name, after "[(*" */ 434 const char *cp; /* end of the name, e.g. before ']' */ 435 const char *res; /* the string to be substituted */ 436 int i, maxl; 437 size_t nsz; 438 char *n; 439 440 again: 441 cp = *bufp + pos; 442 while (NULL != (cp = strchr(cp, '\\'))) { 443 stesc = cp++; 444 445 /* 446 * The second character must be an asterisk. 447 * If it isn't, skip it anyway: It is escaped, 448 * so it can't start another escape sequence. 449 */ 450 451 if ('\0' == *cp) 452 return; 453 454 if ('*' != *cp) { 455 res = cp; 456 esc = mandoc_escape(&cp, NULL, NULL); 457 if (ESCAPE_ERROR != esc) 458 continue; 459 cp = res; 460 mandoc_msg 461 (MANDOCERR_BADESCAPE, r->parse, 462 ln, (int)(stesc - *bufp), NULL); 463 return; 464 } 465 466 cp++; 467 468 /* 469 * The third character decides the length 470 * of the name of the string. 471 * Save a pointer to the name. 472 */ 473 474 switch (*cp) { 475 case ('\0'): 476 return; 477 case ('('): 478 cp++; 479 maxl = 2; 480 break; 481 case ('['): 482 cp++; 483 maxl = 0; 484 break; 485 default: 486 maxl = 1; 487 break; 488 } 489 stnam = cp; 490 491 /* Advance to the end of the name. */ 492 493 for (i = 0; 0 == maxl || i < maxl; i++, cp++) { 494 if ('\0' == *cp) { 495 mandoc_msg 496 (MANDOCERR_BADESCAPE, 497 r->parse, ln, 498 (int)(stesc - *bufp), NULL); 499 return; 500 } 501 if (0 == maxl && ']' == *cp) 502 break; 503 } 504 505 /* 506 * Retrieve the replacement string; if it is 507 * undefined, resume searching for escapes. 508 */ 509 510 res = roff_getstrn(r, stnam, (size_t)i); 511 512 if (NULL == res) { 513 mandoc_msg 514 (MANDOCERR_BADESCAPE, r->parse, 515 ln, (int)(stesc - *bufp), NULL); 516 res = ""; 517 } 518 519 /* Replace the escape sequence by the string. */ 520 521 pos = stesc - *bufp; 522 523 nsz = *szp + strlen(res) + 1; 524 n = mandoc_malloc(nsz); 525 526 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1)); 527 strlcat(n, res, nsz); 528 strlcat(n, cp + (maxl ? 0 : 1), nsz); 529 530 free(*bufp); 531 532 *bufp = n; 533 *szp = nsz; 534 goto again; 535 } 536 } 537 538 /* 539 * Process text streams: convert all breakable hyphens into ASCII_HYPH. 540 */ 541 static enum rofferr 542 roff_parsetext(char *p) 543 { 544 char l, r; 545 size_t sz; 546 const char *start; 547 enum mandoc_esc esc; 548 549 start = p; 550 551 while ('\0' != *p) { 552 sz = strcspn(p, "-\\"); 553 p += sz; 554 555 if ('\0' == *p) 556 break; 557 558 if ('\\' == *p) { 559 /* Skip over escapes. */ 560 p++; 561 esc = mandoc_escape 562 ((const char **)&p, NULL, NULL); 563 if (ESCAPE_ERROR == esc) 564 break; 565 continue; 566 } else if (p == start) { 567 p++; 568 continue; 569 } 570 571 l = *(p - 1); 572 r = *(p + 1); 573 if ('\\' != l && 574 '\t' != r && '\t' != l && 575 ' ' != r && ' ' != l && 576 '-' != r && '-' != l && 577 ! isdigit((unsigned char)l) && 578 ! isdigit((unsigned char)r)) 579 *p = ASCII_HYPH; 580 p++; 581 } 582 583 return(ROFF_CONT); 584 } 585 586 enum rofferr 587 roff_parseln(struct roff *r, int ln, char **bufp, 588 size_t *szp, int pos, int *offs) 589 { 590 enum rofft t; 591 enum rofferr e; 592 int ppos, ctl; 593 594 /* 595 * Run the reserved-word filter only if we have some reserved 596 * words to fill in. 597 */ 598 599 roff_res(r, bufp, szp, ln, pos); 600 601 ppos = pos; 602 ctl = mandoc_getcontrol(*bufp, &pos); 603 604 /* 605 * First, if a scope is open and we're not a macro, pass the 606 * text through the macro's filter. If a scope isn't open and 607 * we're not a macro, just let it through. 608 * Finally, if there's an equation scope open, divert it into it 609 * no matter our state. 610 */ 611 612 if (r->last && ! ctl) { 613 t = r->last->tok; 614 assert(roffs[t].text); 615 e = (*roffs[t].text) 616 (r, t, bufp, szp, ln, pos, pos, offs); 617 assert(ROFF_IGN == e || ROFF_CONT == e); 618 if (ROFF_CONT != e) 619 return(e); 620 if (r->eqn) 621 return(eqn_read(&r->eqn, ln, *bufp, pos, offs)); 622 if (r->tbl) 623 return(tbl_read(r->tbl, ln, *bufp, pos)); 624 return(roff_parsetext(*bufp + pos)); 625 } else if ( ! ctl) { 626 if (r->eqn) 627 return(eqn_read(&r->eqn, ln, *bufp, pos, offs)); 628 if (r->tbl) 629 return(tbl_read(r->tbl, ln, *bufp, pos)); 630 return(roff_parsetext(*bufp + pos)); 631 } else if (r->eqn) 632 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs)); 633 634 /* 635 * If a scope is open, go to the child handler for that macro, 636 * as it may want to preprocess before doing anything with it. 637 * Don't do so if an equation is open. 638 */ 639 640 if (r->last) { 641 t = r->last->tok; 642 assert(roffs[t].sub); 643 return((*roffs[t].sub) 644 (r, t, bufp, szp, 645 ln, ppos, pos, offs)); 646 } 647 648 /* 649 * Lastly, as we've no scope open, try to look up and execute 650 * the new macro. If no macro is found, simply return and let 651 * the compilers handle it. 652 */ 653 654 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) 655 return(ROFF_CONT); 656 657 assert(roffs[t].proc); 658 return((*roffs[t].proc) 659 (r, t, bufp, szp, 660 ln, ppos, pos, offs)); 661 } 662 663 664 void 665 roff_endparse(struct roff *r) 666 { 667 668 if (r->last) 669 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, 670 r->last->line, r->last->col, NULL); 671 672 if (r->eqn) { 673 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, 674 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL); 675 eqn_end(&r->eqn); 676 } 677 678 if (r->tbl) { 679 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, 680 r->tbl->line, r->tbl->pos, NULL); 681 tbl_end(&r->tbl); 682 } 683 } 684 685 /* 686 * Parse a roff node's type from the input buffer. This must be in the 687 * form of ".foo xxx" in the usual way. 688 */ 689 static enum rofft 690 roff_parse(struct roff *r, const char *buf, int *pos) 691 { 692 const char *mac; 693 size_t maclen; 694 enum rofft t; 695 696 if ('\0' == buf[*pos] || '"' == buf[*pos] || 697 '\t' == buf[*pos] || ' ' == buf[*pos]) 698 return(ROFF_MAX); 699 700 /* 701 * We stop the macro parse at an escape, tab, space, or nil. 702 * However, `\}' is also a valid macro, so make sure we don't 703 * clobber it by seeing the `\' as the end of token. 704 */ 705 706 mac = buf + *pos; 707 maclen = strcspn(mac + 1, " \\\t\0") + 1; 708 709 t = (r->current_string = roff_getstrn(r, mac, maclen)) 710 ? ROFF_USERDEF : roffhash_find(mac, maclen); 711 712 *pos += (int)maclen; 713 714 while (buf[*pos] && ' ' == buf[*pos]) 715 (*pos)++; 716 717 return(t); 718 } 719 720 /* ARGSUSED */ 721 static enum rofferr 722 roff_cblock(ROFF_ARGS) 723 { 724 725 /* 726 * A block-close `..' should only be invoked as a child of an 727 * ignore macro, otherwise raise a warning and just ignore it. 728 */ 729 730 if (NULL == r->last) { 731 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 732 return(ROFF_IGN); 733 } 734 735 switch (r->last->tok) { 736 case (ROFF_am): 737 /* FALLTHROUGH */ 738 case (ROFF_ami): 739 /* FALLTHROUGH */ 740 case (ROFF_am1): 741 /* FALLTHROUGH */ 742 case (ROFF_de): 743 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 744 /* FALLTHROUGH */ 745 case (ROFF_dei): 746 /* FALLTHROUGH */ 747 case (ROFF_ig): 748 break; 749 default: 750 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 751 return(ROFF_IGN); 752 } 753 754 if ((*bufp)[pos]) 755 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); 756 757 roffnode_pop(r); 758 roffnode_cleanscope(r); 759 return(ROFF_IGN); 760 761 } 762 763 764 static void 765 roffnode_cleanscope(struct roff *r) 766 { 767 768 while (r->last) { 769 if (--r->last->endspan < 0) 770 break; 771 roffnode_pop(r); 772 } 773 } 774 775 776 /* ARGSUSED */ 777 static enum rofferr 778 roff_ccond(ROFF_ARGS) 779 { 780 781 if (NULL == r->last) { 782 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 783 return(ROFF_IGN); 784 } 785 786 switch (r->last->tok) { 787 case (ROFF_el): 788 /* FALLTHROUGH */ 789 case (ROFF_ie): 790 /* FALLTHROUGH */ 791 case (ROFF_if): 792 break; 793 default: 794 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 795 return(ROFF_IGN); 796 } 797 798 if (r->last->endspan > -1) { 799 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 800 return(ROFF_IGN); 801 } 802 803 if ((*bufp)[pos]) 804 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); 805 806 roffnode_pop(r); 807 roffnode_cleanscope(r); 808 return(ROFF_IGN); 809 } 810 811 812 /* ARGSUSED */ 813 static enum rofferr 814 roff_block(ROFF_ARGS) 815 { 816 int sv; 817 size_t sz; 818 char *name; 819 820 name = NULL; 821 822 if (ROFF_ig != tok) { 823 if ('\0' == (*bufp)[pos]) { 824 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); 825 return(ROFF_IGN); 826 } 827 828 /* 829 * Re-write `de1', since we don't really care about 830 * groff's strange compatibility mode, into `de'. 831 */ 832 833 if (ROFF_de1 == tok) 834 tok = ROFF_de; 835 if (ROFF_de == tok) 836 name = *bufp + pos; 837 else 838 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, 839 roffs[tok].name); 840 841 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos])) 842 pos++; 843 844 while (isspace((unsigned char)(*bufp)[pos])) 845 (*bufp)[pos++] = '\0'; 846 } 847 848 roffnode_push(r, tok, name, ln, ppos); 849 850 /* 851 * At the beginning of a `de' macro, clear the existing string 852 * with the same name, if there is one. New content will be 853 * added from roff_block_text() in multiline mode. 854 */ 855 856 if (ROFF_de == tok) 857 roff_setstr(r, name, "", 0); 858 859 if ('\0' == (*bufp)[pos]) 860 return(ROFF_IGN); 861 862 /* If present, process the custom end-of-line marker. */ 863 864 sv = pos; 865 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos])) 866 pos++; 867 868 /* 869 * Note: groff does NOT like escape characters in the input. 870 * Instead of detecting this, we're just going to let it fly and 871 * to hell with it. 872 */ 873 874 assert(pos > sv); 875 sz = (size_t)(pos - sv); 876 877 if (1 == sz && '.' == (*bufp)[sv]) 878 return(ROFF_IGN); 879 880 r->last->end = mandoc_malloc(sz + 1); 881 882 memcpy(r->last->end, *bufp + sv, sz); 883 r->last->end[(int)sz] = '\0'; 884 885 if ((*bufp)[pos]) 886 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); 887 888 return(ROFF_IGN); 889 } 890 891 892 /* ARGSUSED */ 893 static enum rofferr 894 roff_block_sub(ROFF_ARGS) 895 { 896 enum rofft t; 897 int i, j; 898 899 /* 900 * First check whether a custom macro exists at this level. If 901 * it does, then check against it. This is some of groff's 902 * stranger behaviours. If we encountered a custom end-scope 903 * tag and that tag also happens to be a "real" macro, then we 904 * need to try interpreting it again as a real macro. If it's 905 * not, then return ignore. Else continue. 906 */ 907 908 if (r->last->end) { 909 for (i = pos, j = 0; r->last->end[j]; j++, i++) 910 if ((*bufp)[i] != r->last->end[j]) 911 break; 912 913 if ('\0' == r->last->end[j] && 914 ('\0' == (*bufp)[i] || 915 ' ' == (*bufp)[i] || 916 '\t' == (*bufp)[i])) { 917 roffnode_pop(r); 918 roffnode_cleanscope(r); 919 920 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i]) 921 i++; 922 923 pos = i; 924 if (ROFF_MAX != roff_parse(r, *bufp, &pos)) 925 return(ROFF_RERUN); 926 return(ROFF_IGN); 927 } 928 } 929 930 /* 931 * If we have no custom end-query or lookup failed, then try 932 * pulling it out of the hashtable. 933 */ 934 935 t = roff_parse(r, *bufp, &pos); 936 937 /* 938 * Macros other than block-end are only significant 939 * in `de' blocks; elsewhere, simply throw them away. 940 */ 941 if (ROFF_cblock != t) { 942 if (ROFF_de == tok) 943 roff_setstr(r, r->last->name, *bufp + ppos, 1); 944 return(ROFF_IGN); 945 } 946 947 assert(roffs[t].proc); 948 return((*roffs[t].proc)(r, t, bufp, szp, 949 ln, ppos, pos, offs)); 950 } 951 952 953 /* ARGSUSED */ 954 static enum rofferr 955 roff_block_text(ROFF_ARGS) 956 { 957 958 if (ROFF_de == tok) 959 roff_setstr(r, r->last->name, *bufp + pos, 1); 960 961 return(ROFF_IGN); 962 } 963 964 965 /* ARGSUSED */ 966 static enum rofferr 967 roff_cond_sub(ROFF_ARGS) 968 { 969 enum rofft t; 970 enum roffrule rr; 971 char *ep; 972 973 rr = r->last->rule; 974 roffnode_cleanscope(r); 975 976 /* 977 * If the macro is unknown, first check if it contains a closing 978 * delimiter `\}'. If it does, close out our scope and return 979 * the currently-scoped rule (ignore or continue). Else, drop 980 * into the currently-scoped rule. 981 */ 982 983 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) { 984 ep = &(*bufp)[pos]; 985 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) { 986 ep++; 987 if ('}' != *ep) 988 continue; 989 990 /* 991 * Make the \} go away. 992 * This is a little haphazard, as it's not quite 993 * clear how nroff does this. 994 * If we're at the end of line, then just chop 995 * off the \} and resize the buffer. 996 * If we aren't, then conver it to spaces. 997 */ 998 999 if ('\0' == *(ep + 1)) { 1000 *--ep = '\0'; 1001 *szp -= 2; 1002 } else 1003 *(ep - 1) = *ep = ' '; 1004 1005 roff_ccond(r, ROFF_ccond, bufp, szp, 1006 ln, pos, pos + 2, offs); 1007 break; 1008 } 1009 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); 1010 } 1011 1012 /* 1013 * A denied conditional must evaluate its children if and only 1014 * if they're either structurally required (such as loops and 1015 * conditionals) or a closing macro. 1016 */ 1017 1018 if (ROFFRULE_DENY == rr) 1019 if ( ! (ROFFMAC_STRUCT & roffs[t].flags)) 1020 if (ROFF_ccond != t) 1021 return(ROFF_IGN); 1022 1023 assert(roffs[t].proc); 1024 return((*roffs[t].proc)(r, t, bufp, szp, 1025 ln, ppos, pos, offs)); 1026 } 1027 1028 /* ARGSUSED */ 1029 static enum rofferr 1030 roff_cond_text(ROFF_ARGS) 1031 { 1032 char *ep; 1033 enum roffrule rr; 1034 1035 rr = r->last->rule; 1036 roffnode_cleanscope(r); 1037 1038 ep = &(*bufp)[pos]; 1039 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) { 1040 ep++; 1041 if ('}' != *ep) 1042 continue; 1043 *ep = '&'; 1044 roff_ccond(r, ROFF_ccond, bufp, szp, 1045 ln, pos, pos + 2, offs); 1046 } 1047 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); 1048 } 1049 1050 static enum roffrule 1051 roff_evalcond(const char *v, int *pos) 1052 { 1053 1054 switch (v[*pos]) { 1055 case ('n'): 1056 (*pos)++; 1057 return(ROFFRULE_ALLOW); 1058 case ('e'): 1059 /* FALLTHROUGH */ 1060 case ('o'): 1061 /* FALLTHROUGH */ 1062 case ('t'): 1063 (*pos)++; 1064 return(ROFFRULE_DENY); 1065 default: 1066 break; 1067 } 1068 1069 while (v[*pos] && ' ' != v[*pos]) 1070 (*pos)++; 1071 return(ROFFRULE_DENY); 1072 } 1073 1074 /* ARGSUSED */ 1075 static enum rofferr 1076 roff_line_ignore(ROFF_ARGS) 1077 { 1078 1079 if (ROFF_it == tok) 1080 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, "it"); 1081 1082 return(ROFF_IGN); 1083 } 1084 1085 /* ARGSUSED */ 1086 static enum rofferr 1087 roff_cond(ROFF_ARGS) 1088 { 1089 int sv; 1090 enum roffrule rule; 1091 1092 /* 1093 * An `.el' has no conditional body: it will consume the value 1094 * of the current rstack entry set in prior `ie' calls or 1095 * defaults to DENY. 1096 * 1097 * If we're not an `el', however, then evaluate the conditional. 1098 */ 1099 1100 rule = ROFF_el == tok ? 1101 (r->rstackpos < 0 ? 1102 ROFFRULE_DENY : r->rstack[r->rstackpos--]) : 1103 roff_evalcond(*bufp, &pos); 1104 1105 sv = pos; 1106 while (' ' == (*bufp)[pos]) 1107 pos++; 1108 1109 /* 1110 * Roff is weird. If we have just white-space after the 1111 * conditional, it's considered the BODY and we exit without 1112 * really doing anything. Warn about this. It's probably 1113 * wrong. 1114 */ 1115 1116 if ('\0' == (*bufp)[pos] && sv != pos) { 1117 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); 1118 return(ROFF_IGN); 1119 } 1120 1121 roffnode_push(r, tok, NULL, ln, ppos); 1122 1123 r->last->rule = rule; 1124 1125 /* 1126 * An if-else will put the NEGATION of the current evaluated 1127 * conditional into the stack of rules. 1128 */ 1129 1130 if (ROFF_ie == tok) { 1131 if (r->rstackpos == RSTACK_MAX - 1) { 1132 mandoc_msg(MANDOCERR_MEM, 1133 r->parse, ln, ppos, NULL); 1134 return(ROFF_ERR); 1135 } 1136 r->rstack[++r->rstackpos] = 1137 ROFFRULE_DENY == r->last->rule ? 1138 ROFFRULE_ALLOW : ROFFRULE_DENY; 1139 } 1140 1141 /* If the parent has false as its rule, then so do we. */ 1142 1143 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule) 1144 r->last->rule = ROFFRULE_DENY; 1145 1146 /* 1147 * Determine scope. If we're invoked with "\{" trailing the 1148 * conditional, then we're in a multiline scope. Else our scope 1149 * expires on the next line. 1150 */ 1151 1152 r->last->endspan = 1; 1153 1154 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) { 1155 r->last->endspan = -1; 1156 pos += 2; 1157 } 1158 1159 /* 1160 * If there are no arguments on the line, the next-line scope is 1161 * assumed. 1162 */ 1163 1164 if ('\0' == (*bufp)[pos]) 1165 return(ROFF_IGN); 1166 1167 /* Otherwise re-run the roff parser after recalculating. */ 1168 1169 *offs = pos; 1170 return(ROFF_RERUN); 1171 } 1172 1173 1174 /* ARGSUSED */ 1175 static enum rofferr 1176 roff_ds(ROFF_ARGS) 1177 { 1178 char *name, *string; 1179 1180 /* 1181 * A symbol is named by the first word following the macro 1182 * invocation up to a space. Its value is anything after the 1183 * name's trailing whitespace and optional double-quote. Thus, 1184 * 1185 * [.ds foo "bar " ] 1186 * 1187 * will have `bar " ' as its value. 1188 */ 1189 1190 string = *bufp + pos; 1191 name = roff_getname(r, &string, ln, pos); 1192 if ('\0' == *name) 1193 return(ROFF_IGN); 1194 1195 /* Read past initial double-quote. */ 1196 if ('"' == *string) 1197 string++; 1198 1199 /* The rest is the value. */ 1200 roff_setstr(r, name, string, 0); 1201 return(ROFF_IGN); 1202 } 1203 1204 int 1205 roff_regisset(const struct roff *r, enum regs reg) 1206 { 1207 1208 return(r->regs[(int)reg].set); 1209 } 1210 1211 unsigned int 1212 roff_regget(const struct roff *r, enum regs reg) 1213 { 1214 1215 return(r->regs[(int)reg].u); 1216 } 1217 1218 void 1219 roff_regunset(struct roff *r, enum regs reg) 1220 { 1221 1222 r->regs[(int)reg].set = 0; 1223 } 1224 1225 /* ARGSUSED */ 1226 static enum rofferr 1227 roff_nr(ROFF_ARGS) 1228 { 1229 const char *key; 1230 char *val; 1231 int iv; 1232 1233 val = *bufp + pos; 1234 key = roff_getname(r, &val, ln, pos); 1235 1236 if (0 == strcmp(key, "nS")) { 1237 r->regs[(int)REG_nS].set = 1; 1238 if ((iv = mandoc_strntoi(val, strlen(val), 10)) >= 0) 1239 r->regs[(int)REG_nS].u = (unsigned)iv; 1240 else 1241 r->regs[(int)REG_nS].u = 0u; 1242 } 1243 1244 return(ROFF_IGN); 1245 } 1246 1247 /* ARGSUSED */ 1248 static enum rofferr 1249 roff_rm(ROFF_ARGS) 1250 { 1251 const char *name; 1252 char *cp; 1253 1254 cp = *bufp + pos; 1255 while ('\0' != *cp) { 1256 name = roff_getname(r, &cp, ln, (int)(cp - *bufp)); 1257 if ('\0' != *name) 1258 roff_setstr(r, name, NULL, 0); 1259 } 1260 return(ROFF_IGN); 1261 } 1262 1263 /* ARGSUSED */ 1264 static enum rofferr 1265 roff_TE(ROFF_ARGS) 1266 { 1267 1268 if (NULL == r->tbl) 1269 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 1270 else 1271 tbl_end(&r->tbl); 1272 1273 return(ROFF_IGN); 1274 } 1275 1276 /* ARGSUSED */ 1277 static enum rofferr 1278 roff_T_(ROFF_ARGS) 1279 { 1280 1281 if (NULL == r->tbl) 1282 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 1283 else 1284 tbl_restart(ppos, ln, r->tbl); 1285 1286 return(ROFF_IGN); 1287 } 1288 1289 #if 0 1290 static int 1291 roff_closeeqn(struct roff *r) 1292 { 1293 1294 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0); 1295 } 1296 #endif 1297 1298 static void 1299 roff_openeqn(struct roff *r, const char *name, int line, 1300 int offs, const char *buf) 1301 { 1302 struct eqn_node *e; 1303 int poff; 1304 1305 assert(NULL == r->eqn); 1306 e = eqn_alloc(name, offs, line, r->parse); 1307 1308 if (r->last_eqn) 1309 r->last_eqn->next = e; 1310 else 1311 r->first_eqn = r->last_eqn = e; 1312 1313 r->eqn = r->last_eqn = e; 1314 1315 if (buf) { 1316 poff = 0; 1317 eqn_read(&r->eqn, line, buf, offs, &poff); 1318 } 1319 } 1320 1321 /* ARGSUSED */ 1322 static enum rofferr 1323 roff_EQ(ROFF_ARGS) 1324 { 1325 1326 roff_openeqn(r, *bufp + pos, ln, ppos, NULL); 1327 return(ROFF_IGN); 1328 } 1329 1330 /* ARGSUSED */ 1331 static enum rofferr 1332 roff_EN(ROFF_ARGS) 1333 { 1334 1335 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); 1336 return(ROFF_IGN); 1337 } 1338 1339 /* ARGSUSED */ 1340 static enum rofferr 1341 roff_TS(ROFF_ARGS) 1342 { 1343 struct tbl_node *t; 1344 1345 if (r->tbl) { 1346 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL); 1347 tbl_end(&r->tbl); 1348 } 1349 1350 t = tbl_alloc(ppos, ln, r->parse); 1351 1352 if (r->last_tbl) 1353 r->last_tbl->next = t; 1354 else 1355 r->first_tbl = r->last_tbl = t; 1356 1357 r->tbl = r->last_tbl = t; 1358 return(ROFF_IGN); 1359 } 1360 1361 /* ARGSUSED */ 1362 static enum rofferr 1363 roff_tr(ROFF_ARGS) 1364 { 1365 const char *p, *first, *second; 1366 size_t fsz, ssz; 1367 enum mandoc_esc esc; 1368 1369 p = *bufp + pos; 1370 1371 if ('\0' == *p) { 1372 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL); 1373 return(ROFF_IGN); 1374 } 1375 1376 while ('\0' != *p) { 1377 fsz = ssz = 1; 1378 1379 first = p++; 1380 if ('\\' == *first) { 1381 esc = mandoc_escape(&p, NULL, NULL); 1382 if (ESCAPE_ERROR == esc) { 1383 mandoc_msg 1384 (MANDOCERR_BADESCAPE, r->parse, 1385 ln, (int)(p - *bufp), NULL); 1386 return(ROFF_IGN); 1387 } 1388 fsz = (size_t)(p - first); 1389 } 1390 1391 second = p++; 1392 if ('\\' == *second) { 1393 esc = mandoc_escape(&p, NULL, NULL); 1394 if (ESCAPE_ERROR == esc) { 1395 mandoc_msg 1396 (MANDOCERR_BADESCAPE, r->parse, 1397 ln, (int)(p - *bufp), NULL); 1398 return(ROFF_IGN); 1399 } 1400 ssz = (size_t)(p - second); 1401 } else if ('\0' == *second) { 1402 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, 1403 ln, (int)(p - *bufp), NULL); 1404 second = " "; 1405 p--; 1406 } 1407 1408 if (fsz > 1) { 1409 roff_setstrn(&r->xmbtab, first, 1410 fsz, second, ssz, 0); 1411 continue; 1412 } 1413 1414 if (NULL == r->xtab) 1415 r->xtab = mandoc_calloc 1416 (128, sizeof(struct roffstr)); 1417 1418 free(r->xtab[(int)*first].p); 1419 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 1420 r->xtab[(int)*first].sz = ssz; 1421 } 1422 1423 return(ROFF_IGN); 1424 } 1425 1426 /* ARGSUSED */ 1427 static enum rofferr 1428 roff_so(ROFF_ARGS) 1429 { 1430 char *name; 1431 1432 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL); 1433 1434 /* 1435 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 1436 * opening anything that's not in our cwd or anything beneath 1437 * it. Thus, explicitly disallow traversing up the file-system 1438 * or using absolute paths. 1439 */ 1440 1441 name = *bufp + pos; 1442 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) { 1443 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL); 1444 return(ROFF_ERR); 1445 } 1446 1447 *offs = pos; 1448 return(ROFF_SO); 1449 } 1450 1451 /* ARGSUSED */ 1452 static enum rofferr 1453 roff_userdef(ROFF_ARGS) 1454 { 1455 const char *arg[9]; 1456 char *cp, *n1, *n2; 1457 int i; 1458 1459 /* 1460 * Collect pointers to macro argument strings 1461 * and null-terminate them. 1462 */ 1463 cp = *bufp + pos; 1464 for (i = 0; i < 9; i++) 1465 arg[i] = '\0' == *cp ? "" : 1466 mandoc_getarg(r->parse, &cp, ln, &pos); 1467 1468 /* 1469 * Expand macro arguments. 1470 */ 1471 *szp = 0; 1472 n1 = cp = mandoc_strdup(r->current_string); 1473 while (NULL != (cp = strstr(cp, "\\$"))) { 1474 i = cp[2] - '1'; 1475 if (0 > i || 8 < i) { 1476 /* Not an argument invocation. */ 1477 cp += 2; 1478 continue; 1479 } 1480 1481 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1; 1482 n2 = mandoc_malloc(*szp); 1483 1484 strlcpy(n2, n1, (size_t)(cp - n1 + 1)); 1485 strlcat(n2, arg[i], *szp); 1486 strlcat(n2, cp + 3, *szp); 1487 1488 cp = n2 + (cp - n1); 1489 free(n1); 1490 n1 = n2; 1491 } 1492 1493 /* 1494 * Replace the macro invocation 1495 * by the expanded macro. 1496 */ 1497 free(*bufp); 1498 *bufp = n1; 1499 if (0 == *szp) 1500 *szp = strlen(*bufp) + 1; 1501 1502 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ? 1503 ROFF_REPARSE : ROFF_APPEND); 1504 } 1505 1506 static char * 1507 roff_getname(struct roff *r, char **cpp, int ln, int pos) 1508 { 1509 char *name, *cp; 1510 1511 name = *cpp; 1512 if ('\0' == *name) 1513 return(name); 1514 1515 /* Read until end of name. */ 1516 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) { 1517 if ('\\' != *cp) 1518 continue; 1519 cp++; 1520 if ('\\' == *cp) 1521 continue; 1522 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL); 1523 *cp = '\0'; 1524 name = cp; 1525 } 1526 1527 /* Nil-terminate name. */ 1528 if ('\0' != *cp) 1529 *(cp++) = '\0'; 1530 1531 /* Read past spaces. */ 1532 while (' ' == *cp) 1533 cp++; 1534 1535 *cpp = cp; 1536 return(name); 1537 } 1538 1539 /* 1540 * Store *string into the user-defined string called *name. 1541 * In multiline mode, append to an existing entry and append '\n'; 1542 * else replace the existing entry, if there is one. 1543 * To clear an existing entry, call with (*r, *name, NULL, 0). 1544 */ 1545 static void 1546 roff_setstr(struct roff *r, const char *name, const char *string, 1547 int multiline) 1548 { 1549 1550 roff_setstrn(&r->strtab, name, strlen(name), string, 1551 string ? strlen(string) : 0, multiline); 1552 } 1553 1554 static void 1555 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 1556 const char *string, size_t stringsz, int multiline) 1557 { 1558 struct roffkv *n; 1559 char *c; 1560 int i; 1561 size_t oldch, newch; 1562 1563 /* Search for an existing string with the same name. */ 1564 n = *r; 1565 1566 while (n && strcmp(name, n->key.p)) 1567 n = n->next; 1568 1569 if (NULL == n) { 1570 /* Create a new string table entry. */ 1571 n = mandoc_malloc(sizeof(struct roffkv)); 1572 n->key.p = mandoc_strndup(name, namesz); 1573 n->key.sz = namesz; 1574 n->val.p = NULL; 1575 n->val.sz = 0; 1576 n->next = *r; 1577 *r = n; 1578 } else if (0 == multiline) { 1579 /* In multiline mode, append; else replace. */ 1580 free(n->val.p); 1581 n->val.p = NULL; 1582 n->val.sz = 0; 1583 } 1584 1585 if (NULL == string) 1586 return; 1587 1588 /* 1589 * One additional byte for the '\n' in multiline mode, 1590 * and one for the terminating '\0'. 1591 */ 1592 newch = stringsz + (multiline ? 2u : 1u); 1593 1594 if (NULL == n->val.p) { 1595 n->val.p = mandoc_malloc(newch); 1596 *n->val.p = '\0'; 1597 oldch = 0; 1598 } else { 1599 oldch = n->val.sz; 1600 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 1601 } 1602 1603 /* Skip existing content in the destination buffer. */ 1604 c = n->val.p + (int)oldch; 1605 1606 /* Append new content to the destination buffer. */ 1607 i = 0; 1608 while (i < (int)stringsz) { 1609 /* 1610 * Rudimentary roff copy mode: 1611 * Handle escaped backslashes. 1612 */ 1613 if ('\\' == string[i] && '\\' == string[i + 1]) 1614 i++; 1615 *c++ = string[i++]; 1616 } 1617 1618 /* Append terminating bytes. */ 1619 if (multiline) 1620 *c++ = '\n'; 1621 1622 *c = '\0'; 1623 n->val.sz = (int)(c - n->val.p); 1624 } 1625 1626 static const char * 1627 roff_getstrn(const struct roff *r, const char *name, size_t len) 1628 { 1629 const struct roffkv *n; 1630 1631 for (n = r->strtab; n; n = n->next) 1632 if (0 == strncmp(name, n->key.p, len) && 1633 '\0' == n->key.p[(int)len]) 1634 return(n->val.p); 1635 1636 return(NULL); 1637 } 1638 1639 static void 1640 roff_freestr(struct roffkv *r) 1641 { 1642 struct roffkv *n, *nn; 1643 1644 for (n = r; n; n = nn) { 1645 free(n->key.p); 1646 free(n->val.p); 1647 nn = n->next; 1648 free(n); 1649 } 1650 } 1651 1652 const struct tbl_span * 1653 roff_span(const struct roff *r) 1654 { 1655 1656 return(r->tbl ? tbl_span(r->tbl) : NULL); 1657 } 1658 1659 const struct eqn * 1660 roff_eqn(const struct roff *r) 1661 { 1662 1663 return(r->last_eqn ? &r->last_eqn->eqn : NULL); 1664 } 1665 1666 /* 1667 * Duplicate an input string, making the appropriate character 1668 * conversations (as stipulated by `tr') along the way. 1669 * Returns a heap-allocated string with all the replacements made. 1670 */ 1671 char * 1672 roff_strdup(const struct roff *r, const char *p) 1673 { 1674 const struct roffkv *cp; 1675 char *res; 1676 const char *pp; 1677 size_t ssz, sz; 1678 enum mandoc_esc esc; 1679 1680 if (NULL == r->xmbtab && NULL == r->xtab) 1681 return(mandoc_strdup(p)); 1682 else if ('\0' == *p) 1683 return(mandoc_strdup("")); 1684 1685 /* 1686 * Step through each character looking for term matches 1687 * (remember that a `tr' can be invoked with an escape, which is 1688 * a glyph but the escape is multi-character). 1689 * We only do this if the character hash has been initialised 1690 * and the string is >0 length. 1691 */ 1692 1693 res = NULL; 1694 ssz = 0; 1695 1696 while ('\0' != *p) { 1697 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) { 1698 sz = r->xtab[(int)*p].sz; 1699 res = mandoc_realloc(res, ssz + sz + 1); 1700 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 1701 ssz += sz; 1702 p++; 1703 continue; 1704 } else if ('\\' != *p) { 1705 res = mandoc_realloc(res, ssz + 2); 1706 res[ssz++] = *p++; 1707 continue; 1708 } 1709 1710 /* Search for term matches. */ 1711 for (cp = r->xmbtab; cp; cp = cp->next) 1712 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 1713 break; 1714 1715 if (NULL != cp) { 1716 /* 1717 * A match has been found. 1718 * Append the match to the array and move 1719 * forward by its keysize. 1720 */ 1721 res = mandoc_realloc 1722 (res, ssz + cp->val.sz + 1); 1723 memcpy(res + ssz, cp->val.p, cp->val.sz); 1724 ssz += cp->val.sz; 1725 p += (int)cp->key.sz; 1726 continue; 1727 } 1728 1729 /* 1730 * Handle escapes carefully: we need to copy 1731 * over just the escape itself, or else we might 1732 * do replacements within the escape itself. 1733 * Make sure to pass along the bogus string. 1734 */ 1735 pp = p++; 1736 esc = mandoc_escape(&p, NULL, NULL); 1737 if (ESCAPE_ERROR == esc) { 1738 sz = strlen(pp); 1739 res = mandoc_realloc(res, ssz + sz + 1); 1740 memcpy(res + ssz, pp, sz); 1741 break; 1742 } 1743 /* 1744 * We bail out on bad escapes. 1745 * No need to warn: we already did so when 1746 * roff_res() was called. 1747 */ 1748 sz = (int)(p - pp); 1749 res = mandoc_realloc(res, ssz + sz + 1); 1750 memcpy(res + ssz, pp, sz); 1751 ssz += sz; 1752 } 1753 1754 res[(int)ssz] = '\0'; 1755 return(res); 1756 } 1757