1 /* $OpenBSD: eqn.c,v 1.24 2016/01/08 00:50:20 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <limits.h> 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <time.h> 26 27 #include "mandoc.h" 28 #include "mandoc_aux.h" 29 #include "libmandoc.h" 30 #include "libroff.h" 31 32 #define EQN_NEST_MAX 128 /* maximum nesting of defines */ 33 #define STRNEQ(p1, sz1, p2, sz2) \ 34 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1))) 35 36 enum eqn_tok { 37 EQN_TOK_DYAD = 0, 38 EQN_TOK_VEC, 39 EQN_TOK_UNDER, 40 EQN_TOK_BAR, 41 EQN_TOK_TILDE, 42 EQN_TOK_HAT, 43 EQN_TOK_DOT, 44 EQN_TOK_DOTDOT, 45 EQN_TOK_FWD, 46 EQN_TOK_BACK, 47 EQN_TOK_DOWN, 48 EQN_TOK_UP, 49 EQN_TOK_FAT, 50 EQN_TOK_ROMAN, 51 EQN_TOK_ITALIC, 52 EQN_TOK_BOLD, 53 EQN_TOK_SIZE, 54 EQN_TOK_SUB, 55 EQN_TOK_SUP, 56 EQN_TOK_SQRT, 57 EQN_TOK_OVER, 58 EQN_TOK_FROM, 59 EQN_TOK_TO, 60 EQN_TOK_BRACE_OPEN, 61 EQN_TOK_BRACE_CLOSE, 62 EQN_TOK_GSIZE, 63 EQN_TOK_GFONT, 64 EQN_TOK_MARK, 65 EQN_TOK_LINEUP, 66 EQN_TOK_LEFT, 67 EQN_TOK_RIGHT, 68 EQN_TOK_PILE, 69 EQN_TOK_LPILE, 70 EQN_TOK_RPILE, 71 EQN_TOK_CPILE, 72 EQN_TOK_MATRIX, 73 EQN_TOK_CCOL, 74 EQN_TOK_LCOL, 75 EQN_TOK_RCOL, 76 EQN_TOK_DELIM, 77 EQN_TOK_DEFINE, 78 EQN_TOK_TDEFINE, 79 EQN_TOK_NDEFINE, 80 EQN_TOK_UNDEF, 81 EQN_TOK_EOF, 82 EQN_TOK_ABOVE, 83 EQN_TOK__MAX 84 }; 85 86 static const char *eqn_toks[EQN_TOK__MAX] = { 87 "dyad", /* EQN_TOK_DYAD */ 88 "vec", /* EQN_TOK_VEC */ 89 "under", /* EQN_TOK_UNDER */ 90 "bar", /* EQN_TOK_BAR */ 91 "tilde", /* EQN_TOK_TILDE */ 92 "hat", /* EQN_TOK_HAT */ 93 "dot", /* EQN_TOK_DOT */ 94 "dotdot", /* EQN_TOK_DOTDOT */ 95 "fwd", /* EQN_TOK_FWD * */ 96 "back", /* EQN_TOK_BACK */ 97 "down", /* EQN_TOK_DOWN */ 98 "up", /* EQN_TOK_UP */ 99 "fat", /* EQN_TOK_FAT */ 100 "roman", /* EQN_TOK_ROMAN */ 101 "italic", /* EQN_TOK_ITALIC */ 102 "bold", /* EQN_TOK_BOLD */ 103 "size", /* EQN_TOK_SIZE */ 104 "sub", /* EQN_TOK_SUB */ 105 "sup", /* EQN_TOK_SUP */ 106 "sqrt", /* EQN_TOK_SQRT */ 107 "over", /* EQN_TOK_OVER */ 108 "from", /* EQN_TOK_FROM */ 109 "to", /* EQN_TOK_TO */ 110 "{", /* EQN_TOK_BRACE_OPEN */ 111 "}", /* EQN_TOK_BRACE_CLOSE */ 112 "gsize", /* EQN_TOK_GSIZE */ 113 "gfont", /* EQN_TOK_GFONT */ 114 "mark", /* EQN_TOK_MARK */ 115 "lineup", /* EQN_TOK_LINEUP */ 116 "left", /* EQN_TOK_LEFT */ 117 "right", /* EQN_TOK_RIGHT */ 118 "pile", /* EQN_TOK_PILE */ 119 "lpile", /* EQN_TOK_LPILE */ 120 "rpile", /* EQN_TOK_RPILE */ 121 "cpile", /* EQN_TOK_CPILE */ 122 "matrix", /* EQN_TOK_MATRIX */ 123 "ccol", /* EQN_TOK_CCOL */ 124 "lcol", /* EQN_TOK_LCOL */ 125 "rcol", /* EQN_TOK_RCOL */ 126 "delim", /* EQN_TOK_DELIM */ 127 "define", /* EQN_TOK_DEFINE */ 128 "tdefine", /* EQN_TOK_TDEFINE */ 129 "ndefine", /* EQN_TOK_NDEFINE */ 130 "undef", /* EQN_TOK_UNDEF */ 131 NULL, /* EQN_TOK_EOF */ 132 "above", /* EQN_TOK_ABOVE */ 133 }; 134 135 enum eqn_symt { 136 EQNSYM_alpha, 137 EQNSYM_beta, 138 EQNSYM_chi, 139 EQNSYM_delta, 140 EQNSYM_epsilon, 141 EQNSYM_eta, 142 EQNSYM_gamma, 143 EQNSYM_iota, 144 EQNSYM_kappa, 145 EQNSYM_lambda, 146 EQNSYM_mu, 147 EQNSYM_nu, 148 EQNSYM_omega, 149 EQNSYM_omicron, 150 EQNSYM_phi, 151 EQNSYM_pi, 152 EQNSYM_ps, 153 EQNSYM_rho, 154 EQNSYM_sigma, 155 EQNSYM_tau, 156 EQNSYM_theta, 157 EQNSYM_upsilon, 158 EQNSYM_xi, 159 EQNSYM_zeta, 160 EQNSYM_DELTA, 161 EQNSYM_GAMMA, 162 EQNSYM_LAMBDA, 163 EQNSYM_OMEGA, 164 EQNSYM_PHI, 165 EQNSYM_PI, 166 EQNSYM_PSI, 167 EQNSYM_SIGMA, 168 EQNSYM_THETA, 169 EQNSYM_UPSILON, 170 EQNSYM_XI, 171 EQNSYM_inter, 172 EQNSYM_union, 173 EQNSYM_prod, 174 EQNSYM_int, 175 EQNSYM_sum, 176 EQNSYM_grad, 177 EQNSYM_del, 178 EQNSYM_times, 179 EQNSYM_cdot, 180 EQNSYM_nothing, 181 EQNSYM_approx, 182 EQNSYM_prime, 183 EQNSYM_half, 184 EQNSYM_partial, 185 EQNSYM_inf, 186 EQNSYM_muchgreat, 187 EQNSYM_muchless, 188 EQNSYM_larrow, 189 EQNSYM_rarrow, 190 EQNSYM_pm, 191 EQNSYM_nequal, 192 EQNSYM_equiv, 193 EQNSYM_lessequal, 194 EQNSYM_moreequal, 195 EQNSYM_minus, 196 EQNSYM__MAX 197 }; 198 199 struct eqnsym { 200 const char *str; 201 const char *sym; 202 }; 203 204 static const struct eqnsym eqnsyms[EQNSYM__MAX] = { 205 { "alpha", "*a" }, /* EQNSYM_alpha */ 206 { "beta", "*b" }, /* EQNSYM_beta */ 207 { "chi", "*x" }, /* EQNSYM_chi */ 208 { "delta", "*d" }, /* EQNSYM_delta */ 209 { "epsilon", "*e" }, /* EQNSYM_epsilon */ 210 { "eta", "*y" }, /* EQNSYM_eta */ 211 { "gamma", "*g" }, /* EQNSYM_gamma */ 212 { "iota", "*i" }, /* EQNSYM_iota */ 213 { "kappa", "*k" }, /* EQNSYM_kappa */ 214 { "lambda", "*l" }, /* EQNSYM_lambda */ 215 { "mu", "*m" }, /* EQNSYM_mu */ 216 { "nu", "*n" }, /* EQNSYM_nu */ 217 { "omega", "*w" }, /* EQNSYM_omega */ 218 { "omicron", "*o" }, /* EQNSYM_omicron */ 219 { "phi", "*f" }, /* EQNSYM_phi */ 220 { "pi", "*p" }, /* EQNSYM_pi */ 221 { "psi", "*q" }, /* EQNSYM_psi */ 222 { "rho", "*r" }, /* EQNSYM_rho */ 223 { "sigma", "*s" }, /* EQNSYM_sigma */ 224 { "tau", "*t" }, /* EQNSYM_tau */ 225 { "theta", "*h" }, /* EQNSYM_theta */ 226 { "upsilon", "*u" }, /* EQNSYM_upsilon */ 227 { "xi", "*c" }, /* EQNSYM_xi */ 228 { "zeta", "*z" }, /* EQNSYM_zeta */ 229 { "DELTA", "*D" }, /* EQNSYM_DELTA */ 230 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */ 231 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */ 232 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */ 233 { "PHI", "*F" }, /* EQNSYM_PHI */ 234 { "PI", "*P" }, /* EQNSYM_PI */ 235 { "PSI", "*Q" }, /* EQNSYM_PSI */ 236 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */ 237 { "THETA", "*H" }, /* EQNSYM_THETA */ 238 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */ 239 { "XI", "*C" }, /* EQNSYM_XI */ 240 { "inter", "ca" }, /* EQNSYM_inter */ 241 { "union", "cu" }, /* EQNSYM_union */ 242 { "prod", "product" }, /* EQNSYM_prod */ 243 { "int", "integral" }, /* EQNSYM_int */ 244 { "sum", "sum" }, /* EQNSYM_sum */ 245 { "grad", "gr" }, /* EQNSYM_grad */ 246 { "del", "gr" }, /* EQNSYM_del */ 247 { "times", "mu" }, /* EQNSYM_times */ 248 { "cdot", "pc" }, /* EQNSYM_cdot */ 249 { "nothing", "&" }, /* EQNSYM_nothing */ 250 { "approx", "~~" }, /* EQNSYM_approx */ 251 { "prime", "fm" }, /* EQNSYM_prime */ 252 { "half", "12" }, /* EQNSYM_half */ 253 { "partial", "pd" }, /* EQNSYM_partial */ 254 { "inf", "if" }, /* EQNSYM_inf */ 255 { ">>", ">>" }, /* EQNSYM_muchgreat */ 256 { "<<", "<<" }, /* EQNSYM_muchless */ 257 { "<-", "<-" }, /* EQNSYM_larrow */ 258 { "->", "->" }, /* EQNSYM_rarrow */ 259 { "+-", "+-" }, /* EQNSYM_pm */ 260 { "!=", "!=" }, /* EQNSYM_nequal */ 261 { "==", "==" }, /* EQNSYM_equiv */ 262 { "<=", "<=" }, /* EQNSYM_lessequal */ 263 { ">=", ">=" }, /* EQNSYM_moreequal */ 264 { "-", "mi" }, /* EQNSYM_minus */ 265 }; 266 267 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *); 268 static void eqn_box_free(struct eqn_box *); 269 static struct eqn_box *eqn_box_makebinary(struct eqn_node *, 270 enum eqn_post, struct eqn_box *); 271 static void eqn_def(struct eqn_node *); 272 static struct eqn_def *eqn_def_find(struct eqn_node *, const char *, size_t); 273 static void eqn_delim(struct eqn_node *); 274 static const char *eqn_next(struct eqn_node *, char, size_t *, int); 275 static const char *eqn_nextrawtok(struct eqn_node *, size_t *); 276 static const char *eqn_nexttok(struct eqn_node *, size_t *); 277 static enum rofferr eqn_parse(struct eqn_node *, struct eqn_box *); 278 static enum eqn_tok eqn_tok_parse(struct eqn_node *, char **); 279 static void eqn_undef(struct eqn_node *); 280 281 282 enum rofferr 283 eqn_read(struct eqn_node **epp, int ln, 284 const char *p, int pos, int *offs) 285 { 286 size_t sz; 287 struct eqn_node *ep; 288 enum rofferr er; 289 290 ep = *epp; 291 292 /* 293 * If we're the terminating mark, unset our equation status and 294 * validate the full equation. 295 */ 296 297 if (0 == strncmp(p, ".EN", 3)) { 298 er = eqn_end(epp); 299 p += 3; 300 while (' ' == *p || '\t' == *p) 301 p++; 302 if ('\0' == *p) 303 return er; 304 mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse, 305 ln, pos, "EN %s", p); 306 return er; 307 } 308 309 /* 310 * Build up the full string, replacing all newlines with regular 311 * whitespace. 312 */ 313 314 sz = strlen(p + pos) + 1; 315 ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1); 316 317 /* First invocation: nil terminate the string. */ 318 319 if (0 == ep->sz) 320 *ep->data = '\0'; 321 322 ep->sz += sz; 323 strlcat(ep->data, p + pos, ep->sz + 1); 324 strlcat(ep->data, " ", ep->sz + 1); 325 return ROFF_IGN; 326 } 327 328 struct eqn_node * 329 eqn_alloc(int pos, int line, struct mparse *parse) 330 { 331 struct eqn_node *p; 332 333 p = mandoc_calloc(1, sizeof(struct eqn_node)); 334 335 p->parse = parse; 336 p->eqn.ln = line; 337 p->eqn.pos = pos; 338 p->gsize = EQN_DEFSIZE; 339 340 return p; 341 } 342 343 /* 344 * Find the key "key" of the give size within our eqn-defined values. 345 */ 346 static struct eqn_def * 347 eqn_def_find(struct eqn_node *ep, const char *key, size_t sz) 348 { 349 int i; 350 351 for (i = 0; i < (int)ep->defsz; i++) 352 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, 353 ep->defs[i].keysz, key, sz)) 354 return &ep->defs[i]; 355 356 return NULL; 357 } 358 359 /* 360 * Get the next token from the input stream using the given quote 361 * character. 362 * Optionally make any replacements. 363 */ 364 static const char * 365 eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl) 366 { 367 char *start, *next; 368 int q, diff, lim; 369 size_t ssz, dummy; 370 struct eqn_def *def; 371 372 if (NULL == sz) 373 sz = &dummy; 374 375 lim = 0; 376 ep->rew = ep->cur; 377 again: 378 /* Prevent self-definitions. */ 379 380 if (lim >= EQN_NEST_MAX) { 381 mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse, 382 ep->eqn.ln, ep->eqn.pos, NULL); 383 return NULL; 384 } 385 386 ep->cur = ep->rew; 387 start = &ep->data[(int)ep->cur]; 388 q = 0; 389 390 if ('\0' == *start) 391 return NULL; 392 393 if (quote == *start) { 394 ep->cur++; 395 q = 1; 396 } 397 398 start = &ep->data[(int)ep->cur]; 399 400 if ( ! q) { 401 if ('{' == *start || '}' == *start) 402 ssz = 1; 403 else 404 ssz = strcspn(start + 1, " ^~\"{}\t") + 1; 405 next = start + (int)ssz; 406 if ('\0' == *next) 407 next = NULL; 408 } else 409 next = strchr(start, quote); 410 411 if (NULL != next) { 412 *sz = (size_t)(next - start); 413 ep->cur += *sz; 414 if (q) 415 ep->cur++; 416 while (' ' == ep->data[(int)ep->cur] || 417 '\t' == ep->data[(int)ep->cur] || 418 '^' == ep->data[(int)ep->cur] || 419 '~' == ep->data[(int)ep->cur]) 420 ep->cur++; 421 } else { 422 if (q) 423 mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse, 424 ep->eqn.ln, ep->eqn.pos, NULL); 425 next = strchr(start, '\0'); 426 *sz = (size_t)(next - start); 427 ep->cur += *sz; 428 } 429 430 /* Quotes aren't expanded for values. */ 431 432 if (q || ! repl) 433 return start; 434 435 if (NULL != (def = eqn_def_find(ep, start, *sz))) { 436 diff = def->valsz - *sz; 437 438 if (def->valsz > *sz) { 439 ep->sz += diff; 440 ep->data = mandoc_realloc(ep->data, ep->sz + 1); 441 ep->data[ep->sz] = '\0'; 442 start = &ep->data[(int)ep->rew]; 443 } 444 445 diff = def->valsz - *sz; 446 memmove(start + *sz + diff, start + *sz, 447 (strlen(start) - *sz) + 1); 448 memcpy(start, def->val, def->valsz); 449 lim++; 450 goto again; 451 } 452 453 return start; 454 } 455 456 /* 457 * Get the next delimited token using the default current quote 458 * character. 459 */ 460 static const char * 461 eqn_nexttok(struct eqn_node *ep, size_t *sz) 462 { 463 464 return eqn_next(ep, '"', sz, 1); 465 } 466 467 /* 468 * Get next token without replacement. 469 */ 470 static const char * 471 eqn_nextrawtok(struct eqn_node *ep, size_t *sz) 472 { 473 474 return eqn_next(ep, '"', sz, 0); 475 } 476 477 /* 478 * Parse a token from the stream of text. 479 * A token consists of one of the recognised eqn(7) strings. 480 * Strings are separated by delimiting marks. 481 * This returns EQN_TOK_EOF when there are no more tokens. 482 * If the token is an unrecognised string literal, then it returns 483 * EQN_TOK__MAX and sets the "p" pointer to an allocated, nil-terminated 484 * string. 485 * This must be later freed with free(3). 486 */ 487 static enum eqn_tok 488 eqn_tok_parse(struct eqn_node *ep, char **p) 489 { 490 const char *start; 491 size_t i, sz; 492 int quoted; 493 494 if (NULL != p) 495 *p = NULL; 496 497 quoted = ep->data[ep->cur] == '"'; 498 499 if (NULL == (start = eqn_nexttok(ep, &sz))) 500 return EQN_TOK_EOF; 501 502 if (quoted) { 503 if (p != NULL) 504 *p = mandoc_strndup(start, sz); 505 return EQN_TOK__MAX; 506 } 507 508 for (i = 0; i < EQN_TOK__MAX; i++) { 509 if (NULL == eqn_toks[i]) 510 continue; 511 if (STRNEQ(start, sz, eqn_toks[i], strlen(eqn_toks[i]))) 512 break; 513 } 514 515 if (i == EQN_TOK__MAX && NULL != p) 516 *p = mandoc_strndup(start, sz); 517 518 return i; 519 } 520 521 static void 522 eqn_box_free(struct eqn_box *bp) 523 { 524 525 if (bp->first) 526 eqn_box_free(bp->first); 527 if (bp->next) 528 eqn_box_free(bp->next); 529 530 free(bp->text); 531 free(bp->left); 532 free(bp->right); 533 free(bp->top); 534 free(bp->bottom); 535 free(bp); 536 } 537 538 /* 539 * Allocate a box as the last child of the parent node. 540 */ 541 static struct eqn_box * 542 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) 543 { 544 struct eqn_box *bp; 545 546 bp = mandoc_calloc(1, sizeof(struct eqn_box)); 547 bp->parent = parent; 548 bp->parent->args++; 549 bp->expectargs = UINT_MAX; 550 bp->size = ep->gsize; 551 552 if (NULL != parent->first) { 553 parent->last->next = bp; 554 bp->prev = parent->last; 555 } else 556 parent->first = bp; 557 558 parent->last = bp; 559 return bp; 560 } 561 562 /* 563 * Reparent the current last node (of the current parent) under a new 564 * EQN_SUBEXPR as the first element. 565 * Then return the new parent. 566 * The new EQN_SUBEXPR will have a two-child limit. 567 */ 568 static struct eqn_box * 569 eqn_box_makebinary(struct eqn_node *ep, 570 enum eqn_post pos, struct eqn_box *parent) 571 { 572 struct eqn_box *b, *newb; 573 574 assert(NULL != parent->last); 575 b = parent->last; 576 if (parent->last == parent->first) 577 parent->first = NULL; 578 parent->args--; 579 parent->last = b->prev; 580 b->prev = NULL; 581 newb = eqn_box_alloc(ep, parent); 582 newb->pos = pos; 583 newb->type = EQN_SUBEXPR; 584 newb->expectargs = 2; 585 newb->args = 1; 586 newb->first = newb->last = b; 587 newb->first->next = NULL; 588 b->parent = newb; 589 return newb; 590 } 591 592 /* 593 * Parse the "delim" control statement. 594 */ 595 static void 596 eqn_delim(struct eqn_node *ep) 597 { 598 const char *start; 599 size_t sz; 600 601 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) 602 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 603 ep->eqn.ln, ep->eqn.pos, "delim"); 604 else if (strncmp(start, "off", 3) == 0) 605 ep->delim = 0; 606 else if (strncmp(start, "on", 2) == 0) { 607 if (ep->odelim && ep->cdelim) 608 ep->delim = 1; 609 } else if (start[1] != '\0') { 610 ep->odelim = start[0]; 611 ep->cdelim = start[1]; 612 ep->delim = 1; 613 } 614 } 615 616 /* 617 * Undefine a previously-defined string. 618 */ 619 static void 620 eqn_undef(struct eqn_node *ep) 621 { 622 const char *start; 623 struct eqn_def *def; 624 size_t sz; 625 626 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) { 627 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 628 ep->eqn.ln, ep->eqn.pos, "undef"); 629 return; 630 } 631 if ((def = eqn_def_find(ep, start, sz)) == NULL) 632 return; 633 free(def->key); 634 free(def->val); 635 def->key = def->val = NULL; 636 def->keysz = def->valsz = 0; 637 } 638 639 static void 640 eqn_def(struct eqn_node *ep) 641 { 642 const char *start; 643 size_t sz; 644 struct eqn_def *def; 645 int i; 646 647 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) { 648 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 649 ep->eqn.ln, ep->eqn.pos, "define"); 650 return; 651 } 652 653 /* 654 * Search for a key that already exists. 655 * Create a new key if none is found. 656 */ 657 if (NULL == (def = eqn_def_find(ep, start, sz))) { 658 /* Find holes in string array. */ 659 for (i = 0; i < (int)ep->defsz; i++) 660 if (0 == ep->defs[i].keysz) 661 break; 662 663 if (i == (int)ep->defsz) { 664 ep->defsz++; 665 ep->defs = mandoc_reallocarray(ep->defs, 666 ep->defsz, sizeof(struct eqn_def)); 667 ep->defs[i].key = ep->defs[i].val = NULL; 668 } 669 670 def = ep->defs + i; 671 free(def->key); 672 def->key = mandoc_strndup(start, sz); 673 def->keysz = sz; 674 } 675 676 start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0); 677 if (start == NULL) { 678 mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse, 679 ep->eqn.ln, ep->eqn.pos, "define %s", def->key); 680 free(def->key); 681 free(def->val); 682 def->key = def->val = NULL; 683 def->keysz = def->valsz = 0; 684 return; 685 } 686 free(def->val); 687 def->val = mandoc_strndup(start, sz); 688 def->valsz = sz; 689 } 690 691 /* 692 * Recursively parse an eqn(7) expression. 693 */ 694 static enum rofferr 695 eqn_parse(struct eqn_node *ep, struct eqn_box *parent) 696 { 697 char sym[64]; 698 struct eqn_box *cur; 699 const char *start; 700 char *p; 701 size_t i, sz; 702 enum eqn_tok tok, subtok; 703 enum eqn_post pos; 704 int size; 705 706 assert(parent != NULL); 707 708 /* 709 * Empty equation. 710 * Do not add it to the high-level syntax tree. 711 */ 712 713 if (ep->data == NULL) 714 return ROFF_IGN; 715 716 next_tok: 717 tok = eqn_tok_parse(ep, &p); 718 719 this_tok: 720 switch (tok) { 721 case (EQN_TOK_UNDEF): 722 eqn_undef(ep); 723 break; 724 case (EQN_TOK_NDEFINE): 725 case (EQN_TOK_DEFINE): 726 eqn_def(ep); 727 break; 728 case (EQN_TOK_TDEFINE): 729 if (eqn_nextrawtok(ep, NULL) == NULL || 730 eqn_next(ep, ep->data[(int)ep->cur], NULL, 0) == NULL) 731 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 732 ep->eqn.ln, ep->eqn.pos, "tdefine"); 733 break; 734 case (EQN_TOK_DELIM): 735 eqn_delim(ep); 736 break; 737 case (EQN_TOK_GFONT): 738 if (eqn_nextrawtok(ep, NULL) == NULL) 739 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 740 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 741 break; 742 case (EQN_TOK_MARK): 743 case (EQN_TOK_LINEUP): 744 /* Ignore these. */ 745 break; 746 case (EQN_TOK_DYAD): 747 case (EQN_TOK_VEC): 748 case (EQN_TOK_UNDER): 749 case (EQN_TOK_BAR): 750 case (EQN_TOK_TILDE): 751 case (EQN_TOK_HAT): 752 case (EQN_TOK_DOT): 753 case (EQN_TOK_DOTDOT): 754 if (parent->last == NULL) { 755 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, 756 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 757 cur = eqn_box_alloc(ep, parent); 758 cur->type = EQN_TEXT; 759 cur->text = mandoc_strdup(""); 760 } 761 parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent); 762 parent->type = EQN_LISTONE; 763 parent->expectargs = 1; 764 switch (tok) { 765 case (EQN_TOK_DOTDOT): 766 strlcpy(sym, "\\[ad]", sizeof(sym)); 767 break; 768 case (EQN_TOK_VEC): 769 strlcpy(sym, "\\[->]", sizeof(sym)); 770 break; 771 case (EQN_TOK_DYAD): 772 strlcpy(sym, "\\[<>]", sizeof(sym)); 773 break; 774 case (EQN_TOK_TILDE): 775 strlcpy(sym, "\\[a~]", sizeof(sym)); 776 break; 777 case (EQN_TOK_UNDER): 778 strlcpy(sym, "\\[ul]", sizeof(sym)); 779 break; 780 case (EQN_TOK_BAR): 781 strlcpy(sym, "\\[rl]", sizeof(sym)); 782 break; 783 case (EQN_TOK_DOT): 784 strlcpy(sym, "\\[a.]", sizeof(sym)); 785 break; 786 case (EQN_TOK_HAT): 787 strlcpy(sym, "\\[ha]", sizeof(sym)); 788 break; 789 default: 790 abort(); 791 } 792 793 switch (tok) { 794 case (EQN_TOK_DOTDOT): 795 case (EQN_TOK_VEC): 796 case (EQN_TOK_DYAD): 797 case (EQN_TOK_TILDE): 798 case (EQN_TOK_BAR): 799 case (EQN_TOK_DOT): 800 case (EQN_TOK_HAT): 801 parent->top = mandoc_strdup(sym); 802 break; 803 case (EQN_TOK_UNDER): 804 parent->bottom = mandoc_strdup(sym); 805 break; 806 default: 807 abort(); 808 } 809 parent = parent->parent; 810 break; 811 case (EQN_TOK_FWD): 812 case (EQN_TOK_BACK): 813 case (EQN_TOK_DOWN): 814 case (EQN_TOK_UP): 815 subtok = eqn_tok_parse(ep, NULL); 816 if (subtok != EQN_TOK__MAX) { 817 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 818 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 819 tok = subtok; 820 goto this_tok; 821 } 822 break; 823 case (EQN_TOK_FAT): 824 case (EQN_TOK_ROMAN): 825 case (EQN_TOK_ITALIC): 826 case (EQN_TOK_BOLD): 827 while (parent->args == parent->expectargs) 828 parent = parent->parent; 829 /* 830 * These values apply to the next word or sequence of 831 * words; thus, we mark that we'll have a child with 832 * exactly one of those. 833 */ 834 parent = eqn_box_alloc(ep, parent); 835 parent->type = EQN_LISTONE; 836 parent->expectargs = 1; 837 switch (tok) { 838 case (EQN_TOK_FAT): 839 parent->font = EQNFONT_FAT; 840 break; 841 case (EQN_TOK_ROMAN): 842 parent->font = EQNFONT_ROMAN; 843 break; 844 case (EQN_TOK_ITALIC): 845 parent->font = EQNFONT_ITALIC; 846 break; 847 case (EQN_TOK_BOLD): 848 parent->font = EQNFONT_BOLD; 849 break; 850 default: 851 abort(); 852 } 853 break; 854 case (EQN_TOK_SIZE): 855 case (EQN_TOK_GSIZE): 856 /* Accept two values: integral size and a single. */ 857 if (NULL == (start = eqn_nexttok(ep, &sz))) { 858 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 859 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 860 break; 861 } 862 size = mandoc_strntoi(start, sz, 10); 863 if (-1 == size) { 864 mandoc_msg(MANDOCERR_IT_NONUM, ep->parse, 865 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 866 break; 867 } 868 if (EQN_TOK_GSIZE == tok) { 869 ep->gsize = size; 870 break; 871 } 872 parent = eqn_box_alloc(ep, parent); 873 parent->type = EQN_LISTONE; 874 parent->expectargs = 1; 875 parent->size = size; 876 break; 877 case (EQN_TOK_FROM): 878 case (EQN_TOK_TO): 879 case (EQN_TOK_SUB): 880 case (EQN_TOK_SUP): 881 /* 882 * We have a left-right-associative expression. 883 * Repivot under a positional node, open a child scope 884 * and keep on reading. 885 */ 886 if (parent->last == NULL) { 887 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, 888 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 889 cur = eqn_box_alloc(ep, parent); 890 cur->type = EQN_TEXT; 891 cur->text = mandoc_strdup(""); 892 } 893 /* Handle the "subsup" and "fromto" positions. */ 894 if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) { 895 parent->expectargs = 3; 896 parent->pos = EQNPOS_SUBSUP; 897 break; 898 } 899 if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) { 900 parent->expectargs = 3; 901 parent->pos = EQNPOS_FROMTO; 902 break; 903 } 904 switch (tok) { 905 case (EQN_TOK_FROM): 906 pos = EQNPOS_FROM; 907 break; 908 case (EQN_TOK_TO): 909 pos = EQNPOS_TO; 910 break; 911 case (EQN_TOK_SUP): 912 pos = EQNPOS_SUP; 913 break; 914 case (EQN_TOK_SUB): 915 pos = EQNPOS_SUB; 916 break; 917 default: 918 abort(); 919 } 920 parent = eqn_box_makebinary(ep, pos, parent); 921 break; 922 case (EQN_TOK_SQRT): 923 while (parent->args == parent->expectargs) 924 parent = parent->parent; 925 /* 926 * Accept a left-right-associative set of arguments just 927 * like sub and sup and friends but without rebalancing 928 * under a pivot. 929 */ 930 parent = eqn_box_alloc(ep, parent); 931 parent->type = EQN_SUBEXPR; 932 parent->pos = EQNPOS_SQRT; 933 parent->expectargs = 1; 934 break; 935 case (EQN_TOK_OVER): 936 /* 937 * We have a right-left-associative fraction. 938 * Close out anything that's currently open, then 939 * rebalance and continue reading. 940 */ 941 if (parent->last == NULL) { 942 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, 943 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 944 cur = eqn_box_alloc(ep, parent); 945 cur->type = EQN_TEXT; 946 cur->text = mandoc_strdup(""); 947 } 948 while (EQN_SUBEXPR == parent->type) 949 parent = parent->parent; 950 parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent); 951 break; 952 case (EQN_TOK_RIGHT): 953 case (EQN_TOK_BRACE_CLOSE): 954 /* 955 * Close out the existing brace. 956 * FIXME: this is a shitty sentinel: we should really 957 * have a native EQN_BRACE type or whatnot. 958 */ 959 for (cur = parent; cur != NULL; cur = cur->parent) 960 if (cur->type == EQN_LIST && 961 (tok == EQN_TOK_BRACE_CLOSE || 962 cur->left != NULL)) 963 break; 964 if (cur == NULL) { 965 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse, 966 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 967 break; 968 } 969 parent = cur; 970 if (EQN_TOK_RIGHT == tok) { 971 if (NULL == (start = eqn_nexttok(ep, &sz))) { 972 mandoc_msg(MANDOCERR_REQ_EMPTY, 973 ep->parse, ep->eqn.ln, 974 ep->eqn.pos, eqn_toks[tok]); 975 break; 976 } 977 /* Handling depends on right/left. */ 978 if (STRNEQ(start, sz, "ceiling", 7)) { 979 strlcpy(sym, "\\[rc]", sizeof(sym)); 980 parent->right = mandoc_strdup(sym); 981 } else if (STRNEQ(start, sz, "floor", 5)) { 982 strlcpy(sym, "\\[rf]", sizeof(sym)); 983 parent->right = mandoc_strdup(sym); 984 } else 985 parent->right = mandoc_strndup(start, sz); 986 } 987 parent = parent->parent; 988 if (tok == EQN_TOK_BRACE_CLOSE && 989 (parent->type == EQN_PILE || 990 parent->type == EQN_MATRIX)) 991 parent = parent->parent; 992 /* Close out any "singleton" lists. */ 993 while (parent->type == EQN_LISTONE && 994 parent->args == parent->expectargs) 995 parent = parent->parent; 996 break; 997 case (EQN_TOK_BRACE_OPEN): 998 case (EQN_TOK_LEFT): 999 /* 1000 * If we already have something in the stack and we're 1001 * in an expression, then rewind til we're not any more 1002 * (just like with the text node). 1003 */ 1004 while (parent->args == parent->expectargs) 1005 parent = parent->parent; 1006 if (EQN_TOK_LEFT == tok && 1007 (start = eqn_nexttok(ep, &sz)) == NULL) { 1008 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 1009 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 1010 break; 1011 } 1012 parent = eqn_box_alloc(ep, parent); 1013 parent->type = EQN_LIST; 1014 if (EQN_TOK_LEFT == tok) { 1015 if (STRNEQ(start, sz, "ceiling", 7)) { 1016 strlcpy(sym, "\\[lc]", sizeof(sym)); 1017 parent->left = mandoc_strdup(sym); 1018 } else if (STRNEQ(start, sz, "floor", 5)) { 1019 strlcpy(sym, "\\[lf]", sizeof(sym)); 1020 parent->left = mandoc_strdup(sym); 1021 } else 1022 parent->left = mandoc_strndup(start, sz); 1023 } 1024 break; 1025 case (EQN_TOK_PILE): 1026 case (EQN_TOK_LPILE): 1027 case (EQN_TOK_RPILE): 1028 case (EQN_TOK_CPILE): 1029 case (EQN_TOK_CCOL): 1030 case (EQN_TOK_LCOL): 1031 case (EQN_TOK_RCOL): 1032 while (parent->args == parent->expectargs) 1033 parent = parent->parent; 1034 parent = eqn_box_alloc(ep, parent); 1035 parent->type = EQN_PILE; 1036 parent->expectargs = 1; 1037 break; 1038 case (EQN_TOK_ABOVE): 1039 for (cur = parent; cur != NULL; cur = cur->parent) 1040 if (cur->type == EQN_PILE) 1041 break; 1042 if (cur == NULL) { 1043 mandoc_msg(MANDOCERR_IT_STRAY, ep->parse, 1044 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 1045 break; 1046 } 1047 parent = eqn_box_alloc(ep, cur); 1048 parent->type = EQN_LIST; 1049 break; 1050 case (EQN_TOK_MATRIX): 1051 while (parent->args == parent->expectargs) 1052 parent = parent->parent; 1053 parent = eqn_box_alloc(ep, parent); 1054 parent->type = EQN_MATRIX; 1055 parent->expectargs = 1; 1056 break; 1057 case (EQN_TOK_EOF): 1058 /* 1059 * End of file! 1060 * TODO: make sure we're not in an open subexpression. 1061 */ 1062 return ROFF_EQN; 1063 default: 1064 assert(tok == EQN_TOK__MAX); 1065 assert(NULL != p); 1066 /* 1067 * If we already have something in the stack and we're 1068 * in an expression, then rewind til we're not any more. 1069 */ 1070 while (parent->args == parent->expectargs) 1071 parent = parent->parent; 1072 cur = eqn_box_alloc(ep, parent); 1073 cur->type = EQN_TEXT; 1074 for (i = 0; i < EQNSYM__MAX; i++) 1075 if (0 == strcmp(eqnsyms[i].str, p)) { 1076 (void)snprintf(sym, sizeof(sym), 1077 "\\[%s]", eqnsyms[i].sym); 1078 cur->text = mandoc_strdup(sym); 1079 free(p); 1080 break; 1081 } 1082 1083 if (i == EQNSYM__MAX) 1084 cur->text = p; 1085 /* 1086 * Post-process list status. 1087 */ 1088 while (parent->type == EQN_LISTONE && 1089 parent->args == parent->expectargs) 1090 parent = parent->parent; 1091 break; 1092 } 1093 goto next_tok; 1094 } 1095 1096 enum rofferr 1097 eqn_end(struct eqn_node **epp) 1098 { 1099 struct eqn_node *ep; 1100 1101 ep = *epp; 1102 *epp = NULL; 1103 1104 ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box)); 1105 ep->eqn.root->expectargs = UINT_MAX; 1106 return eqn_parse(ep, ep->eqn.root); 1107 } 1108 1109 void 1110 eqn_free(struct eqn_node *p) 1111 { 1112 int i; 1113 1114 eqn_box_free(p->eqn.root); 1115 1116 for (i = 0; i < (int)p->defsz; i++) { 1117 free(p->defs[i].key); 1118 free(p->defs[i].val); 1119 } 1120 1121 free(p->data); 1122 free(p->defs); 1123 free(p); 1124 } 1125