1 /* $OpenBSD: eqn.c,v 1.46 2018/12/14 06:33:03 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <limits.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <time.h> 27 28 #include "mandoc_aux.h" 29 #include "mandoc.h" 30 #include "roff.h" 31 #include "eqn.h" 32 #include "libmandoc.h" 33 #include "eqn_parse.h" 34 35 #define EQN_NEST_MAX 128 /* maximum nesting of defines */ 36 #define STRNEQ(p1, sz1, p2, sz2) \ 37 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1))) 38 39 enum eqn_tok { 40 EQN_TOK_DYAD = 0, 41 EQN_TOK_VEC, 42 EQN_TOK_UNDER, 43 EQN_TOK_BAR, 44 EQN_TOK_TILDE, 45 EQN_TOK_HAT, 46 EQN_TOK_DOT, 47 EQN_TOK_DOTDOT, 48 EQN_TOK_FWD, 49 EQN_TOK_BACK, 50 EQN_TOK_DOWN, 51 EQN_TOK_UP, 52 EQN_TOK_FAT, 53 EQN_TOK_ROMAN, 54 EQN_TOK_ITALIC, 55 EQN_TOK_BOLD, 56 EQN_TOK_SIZE, 57 EQN_TOK_SUB, 58 EQN_TOK_SUP, 59 EQN_TOK_SQRT, 60 EQN_TOK_OVER, 61 EQN_TOK_FROM, 62 EQN_TOK_TO, 63 EQN_TOK_BRACE_OPEN, 64 EQN_TOK_BRACE_CLOSE, 65 EQN_TOK_GSIZE, 66 EQN_TOK_GFONT, 67 EQN_TOK_MARK, 68 EQN_TOK_LINEUP, 69 EQN_TOK_LEFT, 70 EQN_TOK_RIGHT, 71 EQN_TOK_PILE, 72 EQN_TOK_LPILE, 73 EQN_TOK_RPILE, 74 EQN_TOK_CPILE, 75 EQN_TOK_MATRIX, 76 EQN_TOK_CCOL, 77 EQN_TOK_LCOL, 78 EQN_TOK_RCOL, 79 EQN_TOK_DELIM, 80 EQN_TOK_DEFINE, 81 EQN_TOK_TDEFINE, 82 EQN_TOK_NDEFINE, 83 EQN_TOK_UNDEF, 84 EQN_TOK_ABOVE, 85 EQN_TOK__MAX, 86 EQN_TOK_FUNC, 87 EQN_TOK_QUOTED, 88 EQN_TOK_SYM, 89 EQN_TOK_EOF 90 }; 91 92 static const char *eqn_toks[EQN_TOK__MAX] = { 93 "dyad", /* EQN_TOK_DYAD */ 94 "vec", /* EQN_TOK_VEC */ 95 "under", /* EQN_TOK_UNDER */ 96 "bar", /* EQN_TOK_BAR */ 97 "tilde", /* EQN_TOK_TILDE */ 98 "hat", /* EQN_TOK_HAT */ 99 "dot", /* EQN_TOK_DOT */ 100 "dotdot", /* EQN_TOK_DOTDOT */ 101 "fwd", /* EQN_TOK_FWD * */ 102 "back", /* EQN_TOK_BACK */ 103 "down", /* EQN_TOK_DOWN */ 104 "up", /* EQN_TOK_UP */ 105 "fat", /* EQN_TOK_FAT */ 106 "roman", /* EQN_TOK_ROMAN */ 107 "italic", /* EQN_TOK_ITALIC */ 108 "bold", /* EQN_TOK_BOLD */ 109 "size", /* EQN_TOK_SIZE */ 110 "sub", /* EQN_TOK_SUB */ 111 "sup", /* EQN_TOK_SUP */ 112 "sqrt", /* EQN_TOK_SQRT */ 113 "over", /* EQN_TOK_OVER */ 114 "from", /* EQN_TOK_FROM */ 115 "to", /* EQN_TOK_TO */ 116 "{", /* EQN_TOK_BRACE_OPEN */ 117 "}", /* EQN_TOK_BRACE_CLOSE */ 118 "gsize", /* EQN_TOK_GSIZE */ 119 "gfont", /* EQN_TOK_GFONT */ 120 "mark", /* EQN_TOK_MARK */ 121 "lineup", /* EQN_TOK_LINEUP */ 122 "left", /* EQN_TOK_LEFT */ 123 "right", /* EQN_TOK_RIGHT */ 124 "pile", /* EQN_TOK_PILE */ 125 "lpile", /* EQN_TOK_LPILE */ 126 "rpile", /* EQN_TOK_RPILE */ 127 "cpile", /* EQN_TOK_CPILE */ 128 "matrix", /* EQN_TOK_MATRIX */ 129 "ccol", /* EQN_TOK_CCOL */ 130 "lcol", /* EQN_TOK_LCOL */ 131 "rcol", /* EQN_TOK_RCOL */ 132 "delim", /* EQN_TOK_DELIM */ 133 "define", /* EQN_TOK_DEFINE */ 134 "tdefine", /* EQN_TOK_TDEFINE */ 135 "ndefine", /* EQN_TOK_NDEFINE */ 136 "undef", /* EQN_TOK_UNDEF */ 137 "above", /* EQN_TOK_ABOVE */ 138 }; 139 140 static const char *const eqn_func[] = { 141 "acos", "acsc", "and", "arc", "asec", "asin", "atan", 142 "cos", "cosh", "coth", "csc", "det", "exp", "for", 143 "if", "lim", "ln", "log", "max", "min", 144 "sec", "sin", "sinh", "tan", "tanh", "Im", "Re", 145 }; 146 147 enum eqn_symt { 148 EQNSYM_alpha = 0, 149 EQNSYM_beta, 150 EQNSYM_chi, 151 EQNSYM_delta, 152 EQNSYM_epsilon, 153 EQNSYM_eta, 154 EQNSYM_gamma, 155 EQNSYM_iota, 156 EQNSYM_kappa, 157 EQNSYM_lambda, 158 EQNSYM_mu, 159 EQNSYM_nu, 160 EQNSYM_omega, 161 EQNSYM_omicron, 162 EQNSYM_phi, 163 EQNSYM_pi, 164 EQNSYM_ps, 165 EQNSYM_rho, 166 EQNSYM_sigma, 167 EQNSYM_tau, 168 EQNSYM_theta, 169 EQNSYM_upsilon, 170 EQNSYM_xi, 171 EQNSYM_zeta, 172 EQNSYM_DELTA, 173 EQNSYM_GAMMA, 174 EQNSYM_LAMBDA, 175 EQNSYM_OMEGA, 176 EQNSYM_PHI, 177 EQNSYM_PI, 178 EQNSYM_PSI, 179 EQNSYM_SIGMA, 180 EQNSYM_THETA, 181 EQNSYM_UPSILON, 182 EQNSYM_XI, 183 EQNSYM_inter, 184 EQNSYM_union, 185 EQNSYM_prod, 186 EQNSYM_int, 187 EQNSYM_sum, 188 EQNSYM_grad, 189 EQNSYM_del, 190 EQNSYM_times, 191 EQNSYM_cdot, 192 EQNSYM_nothing, 193 EQNSYM_approx, 194 EQNSYM_prime, 195 EQNSYM_half, 196 EQNSYM_partial, 197 EQNSYM_inf, 198 EQNSYM_muchgreat, 199 EQNSYM_muchless, 200 EQNSYM_larrow, 201 EQNSYM_rarrow, 202 EQNSYM_pm, 203 EQNSYM_nequal, 204 EQNSYM_equiv, 205 EQNSYM_lessequal, 206 EQNSYM_moreequal, 207 EQNSYM_minus, 208 EQNSYM__MAX 209 }; 210 211 struct eqnsym { 212 const char *str; 213 const char *sym; 214 }; 215 216 static const struct eqnsym eqnsyms[EQNSYM__MAX] = { 217 { "alpha", "*a" }, /* EQNSYM_alpha */ 218 { "beta", "*b" }, /* EQNSYM_beta */ 219 { "chi", "*x" }, /* EQNSYM_chi */ 220 { "delta", "*d" }, /* EQNSYM_delta */ 221 { "epsilon", "*e" }, /* EQNSYM_epsilon */ 222 { "eta", "*y" }, /* EQNSYM_eta */ 223 { "gamma", "*g" }, /* EQNSYM_gamma */ 224 { "iota", "*i" }, /* EQNSYM_iota */ 225 { "kappa", "*k" }, /* EQNSYM_kappa */ 226 { "lambda", "*l" }, /* EQNSYM_lambda */ 227 { "mu", "*m" }, /* EQNSYM_mu */ 228 { "nu", "*n" }, /* EQNSYM_nu */ 229 { "omega", "*w" }, /* EQNSYM_omega */ 230 { "omicron", "*o" }, /* EQNSYM_omicron */ 231 { "phi", "*f" }, /* EQNSYM_phi */ 232 { "pi", "*p" }, /* EQNSYM_pi */ 233 { "psi", "*q" }, /* EQNSYM_psi */ 234 { "rho", "*r" }, /* EQNSYM_rho */ 235 { "sigma", "*s" }, /* EQNSYM_sigma */ 236 { "tau", "*t" }, /* EQNSYM_tau */ 237 { "theta", "*h" }, /* EQNSYM_theta */ 238 { "upsilon", "*u" }, /* EQNSYM_upsilon */ 239 { "xi", "*c" }, /* EQNSYM_xi */ 240 { "zeta", "*z" }, /* EQNSYM_zeta */ 241 { "DELTA", "*D" }, /* EQNSYM_DELTA */ 242 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */ 243 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */ 244 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */ 245 { "PHI", "*F" }, /* EQNSYM_PHI */ 246 { "PI", "*P" }, /* EQNSYM_PI */ 247 { "PSI", "*Q" }, /* EQNSYM_PSI */ 248 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */ 249 { "THETA", "*H" }, /* EQNSYM_THETA */ 250 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */ 251 { "XI", "*C" }, /* EQNSYM_XI */ 252 { "inter", "ca" }, /* EQNSYM_inter */ 253 { "union", "cu" }, /* EQNSYM_union */ 254 { "prod", "product" }, /* EQNSYM_prod */ 255 { "int", "integral" }, /* EQNSYM_int */ 256 { "sum", "sum" }, /* EQNSYM_sum */ 257 { "grad", "gr" }, /* EQNSYM_grad */ 258 { "del", "gr" }, /* EQNSYM_del */ 259 { "times", "mu" }, /* EQNSYM_times */ 260 { "cdot", "pc" }, /* EQNSYM_cdot */ 261 { "nothing", "&" }, /* EQNSYM_nothing */ 262 { "approx", "~~" }, /* EQNSYM_approx */ 263 { "prime", "fm" }, /* EQNSYM_prime */ 264 { "half", "12" }, /* EQNSYM_half */ 265 { "partial", "pd" }, /* EQNSYM_partial */ 266 { "inf", "if" }, /* EQNSYM_inf */ 267 { ">>", ">>" }, /* EQNSYM_muchgreat */ 268 { "<<", "<<" }, /* EQNSYM_muchless */ 269 { "<-", "<-" }, /* EQNSYM_larrow */ 270 { "->", "->" }, /* EQNSYM_rarrow */ 271 { "+-", "+-" }, /* EQNSYM_pm */ 272 { "!=", "!=" }, /* EQNSYM_nequal */ 273 { "==", "==" }, /* EQNSYM_equiv */ 274 { "<=", "<=" }, /* EQNSYM_lessequal */ 275 { ">=", ">=" }, /* EQNSYM_moreequal */ 276 { "-", "mi" }, /* EQNSYM_minus */ 277 }; 278 279 enum parse_mode { 280 MODE_QUOTED, 281 MODE_NOSUB, 282 MODE_SUB, 283 MODE_TOK 284 }; 285 286 struct eqn_def { 287 char *key; 288 size_t keysz; 289 char *val; 290 size_t valsz; 291 }; 292 293 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *); 294 static struct eqn_box *eqn_box_makebinary(struct eqn_node *, 295 struct eqn_box *); 296 static void eqn_def(struct eqn_node *); 297 static struct eqn_def *eqn_def_find(struct eqn_node *); 298 static void eqn_delim(struct eqn_node *); 299 static enum eqn_tok eqn_next(struct eqn_node *, enum parse_mode); 300 static void eqn_undef(struct eqn_node *); 301 302 303 struct eqn_node * 304 eqn_alloc(void) 305 { 306 struct eqn_node *ep; 307 308 ep = mandoc_calloc(1, sizeof(*ep)); 309 ep->gsize = EQN_DEFSIZE; 310 return ep; 311 } 312 313 void 314 eqn_reset(struct eqn_node *ep) 315 { 316 free(ep->data); 317 ep->data = ep->start = ep->end = NULL; 318 ep->sz = ep->toksz = 0; 319 } 320 321 void 322 eqn_read(struct eqn_node *ep, const char *p) 323 { 324 char *cp; 325 326 if (ep->data == NULL) { 327 ep->sz = strlen(p); 328 ep->data = mandoc_strdup(p); 329 } else { 330 ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p); 331 free(ep->data); 332 ep->data = cp; 333 } 334 ep->sz += 1; 335 } 336 337 /* 338 * Find the key "key" of the give size within our eqn-defined values. 339 */ 340 static struct eqn_def * 341 eqn_def_find(struct eqn_node *ep) 342 { 343 int i; 344 345 for (i = 0; i < (int)ep->defsz; i++) 346 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, 347 ep->defs[i].keysz, ep->start, ep->toksz)) 348 return &ep->defs[i]; 349 350 return NULL; 351 } 352 353 /* 354 * Parse a token from the input text. The modes are: 355 * MODE_QUOTED: Use *ep->start as the delimiter; the token ends 356 * before its next occurence. Do not interpret the token in any 357 * way and return EQN_TOK_QUOTED. All other modes behave like 358 * MODE_QUOTED when *ep->start is '"'. 359 * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it; 360 * otherwise, it ends before the next whitespace or brace. 361 * Do not interpret the token and return EQN_TOK__MAX. 362 * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an 363 * alias created with define. If it is an alias, replace it with 364 * its string value and reparse. 365 * MODE_TOK: Like MODE_SUB, but also check the token against the list 366 * of tokens, and if there is a match, return that token. Otherwise, 367 * if the token matches a symbol, return EQN_TOK_SYM; if it matches 368 * a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX. Except for 369 * a token match, *ep->start is set to an allocated string that the 370 * caller is expected to free. 371 * All modes skip whitespace following the end of the token. 372 */ 373 static enum eqn_tok 374 eqn_next(struct eqn_node *ep, enum parse_mode mode) 375 { 376 static int last_len, lim; 377 378 struct eqn_def *def; 379 size_t start; 380 int diff, i, quoted; 381 enum eqn_tok tok; 382 383 /* 384 * Reset the recursion counter after advancing 385 * beyond the end of the previous substitution. 386 */ 387 if (ep->end - ep->data >= last_len) 388 lim = 0; 389 390 ep->start = ep->end; 391 quoted = mode == MODE_QUOTED; 392 for (;;) { 393 switch (*ep->start) { 394 case '\0': 395 ep->toksz = 0; 396 return EQN_TOK_EOF; 397 case '"': 398 quoted = 1; 399 break; 400 default: 401 break; 402 } 403 if (quoted) { 404 ep->end = strchr(ep->start + 1, *ep->start); 405 ep->start++; /* Skip opening quote. */ 406 if (ep->end == NULL) { 407 mandoc_msg(MANDOCERR_ARG_QUOTE, 408 ep->node->line, ep->node->pos, NULL); 409 ep->end = strchr(ep->start, '\0'); 410 } 411 } else { 412 ep->end = ep->start + 1; 413 if (*ep->start != '{' && *ep->start != '}') 414 ep->end += strcspn(ep->end, " ^~\"{}\t"); 415 } 416 ep->toksz = ep->end - ep->start; 417 if (quoted && *ep->end != '\0') 418 ep->end++; /* Skip closing quote. */ 419 while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL) 420 ep->end++; 421 if (quoted) /* Cannot return, may have to strndup. */ 422 break; 423 if (mode == MODE_NOSUB) 424 return EQN_TOK__MAX; 425 if ((def = eqn_def_find(ep)) == NULL) 426 break; 427 if (++lim > EQN_NEST_MAX) { 428 mandoc_msg(MANDOCERR_ROFFLOOP, 429 ep->node->line, ep->node->pos, NULL); 430 return EQN_TOK_EOF; 431 } 432 433 /* Replace a defined name with its string value. */ 434 if ((diff = def->valsz - ep->toksz) > 0) { 435 start = ep->start - ep->data; 436 ep->sz += diff; 437 ep->data = mandoc_realloc(ep->data, ep->sz + 1); 438 ep->start = ep->data + start; 439 } 440 if (diff) 441 memmove(ep->start + def->valsz, ep->start + ep->toksz, 442 strlen(ep->start + ep->toksz) + 1); 443 memcpy(ep->start, def->val, def->valsz); 444 last_len = ep->start - ep->data + def->valsz; 445 } 446 if (mode != MODE_TOK) 447 return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX; 448 if (quoted) { 449 ep->start = mandoc_strndup(ep->start, ep->toksz); 450 return EQN_TOK_QUOTED; 451 } 452 for (tok = 0; tok < EQN_TOK__MAX; tok++) 453 if (STRNEQ(ep->start, ep->toksz, 454 eqn_toks[tok], strlen(eqn_toks[tok]))) 455 return tok; 456 457 for (i = 0; i < EQNSYM__MAX; i++) { 458 if (STRNEQ(ep->start, ep->toksz, 459 eqnsyms[i].str, strlen(eqnsyms[i].str))) { 460 mandoc_asprintf(&ep->start, 461 "\\[%s]", eqnsyms[i].sym); 462 return EQN_TOK_SYM; 463 } 464 } 465 ep->start = mandoc_strndup(ep->start, ep->toksz); 466 for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++) 467 if (STRNEQ(ep->start, ep->toksz, 468 eqn_func[i], strlen(eqn_func[i]))) 469 return EQN_TOK_FUNC; 470 return EQN_TOK__MAX; 471 } 472 473 void 474 eqn_box_free(struct eqn_box *bp) 475 { 476 if (bp == NULL) 477 return; 478 479 if (bp->first) 480 eqn_box_free(bp->first); 481 if (bp->next) 482 eqn_box_free(bp->next); 483 484 free(bp->text); 485 free(bp->left); 486 free(bp->right); 487 free(bp->top); 488 free(bp->bottom); 489 free(bp); 490 } 491 492 struct eqn_box * 493 eqn_box_new(void) 494 { 495 struct eqn_box *bp; 496 497 bp = mandoc_calloc(1, sizeof(*bp)); 498 bp->expectargs = UINT_MAX; 499 return bp; 500 } 501 502 /* 503 * Allocate a box as the last child of the parent node. 504 */ 505 static struct eqn_box * 506 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) 507 { 508 struct eqn_box *bp; 509 510 bp = eqn_box_new(); 511 bp->parent = parent; 512 bp->parent->args++; 513 bp->font = bp->parent->font; 514 bp->size = ep->gsize; 515 516 if (NULL != parent->first) { 517 parent->last->next = bp; 518 bp->prev = parent->last; 519 } else 520 parent->first = bp; 521 522 parent->last = bp; 523 return bp; 524 } 525 526 /* 527 * Reparent the current last node (of the current parent) under a new 528 * EQN_SUBEXPR as the first element. 529 * Then return the new parent. 530 * The new EQN_SUBEXPR will have a two-child limit. 531 */ 532 static struct eqn_box * 533 eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent) 534 { 535 struct eqn_box *b, *newb; 536 537 assert(NULL != parent->last); 538 b = parent->last; 539 if (parent->last == parent->first) 540 parent->first = NULL; 541 parent->args--; 542 parent->last = b->prev; 543 b->prev = NULL; 544 newb = eqn_box_alloc(ep, parent); 545 newb->type = EQN_SUBEXPR; 546 newb->expectargs = 2; 547 newb->args = 1; 548 newb->first = newb->last = b; 549 newb->first->next = NULL; 550 b->parent = newb; 551 return newb; 552 } 553 554 /* 555 * Parse the "delim" control statement. 556 */ 557 static void 558 eqn_delim(struct eqn_node *ep) 559 { 560 if (ep->end[0] == '\0' || ep->end[1] == '\0') { 561 mandoc_msg(MANDOCERR_REQ_EMPTY, 562 ep->node->line, ep->node->pos, "delim"); 563 if (ep->end[0] != '\0') 564 ep->end++; 565 } else if (strncmp(ep->end, "off", 3) == 0) { 566 ep->delim = 0; 567 ep->end += 3; 568 } else if (strncmp(ep->end, "on", 2) == 0) { 569 if (ep->odelim && ep->cdelim) 570 ep->delim = 1; 571 ep->end += 2; 572 } else { 573 ep->odelim = *ep->end++; 574 ep->cdelim = *ep->end++; 575 ep->delim = 1; 576 } 577 } 578 579 /* 580 * Undefine a previously-defined string. 581 */ 582 static void 583 eqn_undef(struct eqn_node *ep) 584 { 585 struct eqn_def *def; 586 587 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { 588 mandoc_msg(MANDOCERR_REQ_EMPTY, 589 ep->node->line, ep->node->pos, "undef"); 590 return; 591 } 592 if ((def = eqn_def_find(ep)) == NULL) 593 return; 594 free(def->key); 595 free(def->val); 596 def->key = def->val = NULL; 597 def->keysz = def->valsz = 0; 598 } 599 600 static void 601 eqn_def(struct eqn_node *ep) 602 { 603 struct eqn_def *def; 604 int i; 605 606 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { 607 mandoc_msg(MANDOCERR_REQ_EMPTY, 608 ep->node->line, ep->node->pos, "define"); 609 return; 610 } 611 612 /* 613 * Search for a key that already exists. 614 * Create a new key if none is found. 615 */ 616 if ((def = eqn_def_find(ep)) == NULL) { 617 /* Find holes in string array. */ 618 for (i = 0; i < (int)ep->defsz; i++) 619 if (0 == ep->defs[i].keysz) 620 break; 621 622 if (i == (int)ep->defsz) { 623 ep->defsz++; 624 ep->defs = mandoc_reallocarray(ep->defs, 625 ep->defsz, sizeof(struct eqn_def)); 626 ep->defs[i].key = ep->defs[i].val = NULL; 627 } 628 629 def = ep->defs + i; 630 free(def->key); 631 def->key = mandoc_strndup(ep->start, ep->toksz); 632 def->keysz = ep->toksz; 633 } 634 635 if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) { 636 mandoc_msg(MANDOCERR_REQ_EMPTY, 637 ep->node->line, ep->node->pos, "define %s", def->key); 638 free(def->key); 639 free(def->val); 640 def->key = def->val = NULL; 641 def->keysz = def->valsz = 0; 642 return; 643 } 644 free(def->val); 645 def->val = mandoc_strndup(ep->start, ep->toksz); 646 def->valsz = ep->toksz; 647 } 648 649 void 650 eqn_parse(struct eqn_node *ep) 651 { 652 struct eqn_box *cur, *nbox, *parent, *split; 653 const char *cp, *cpn; 654 char *p; 655 enum eqn_tok tok; 656 enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln; 657 int size; 658 659 parent = ep->node->eqn; 660 assert(parent != NULL); 661 662 /* 663 * Empty equation. 664 * Do not add it to the high-level syntax tree. 665 */ 666 667 if (ep->data == NULL) 668 return; 669 670 ep->start = ep->end = ep->data + strspn(ep->data, " ^~"); 671 672 next_tok: 673 tok = eqn_next(ep, MODE_TOK); 674 switch (tok) { 675 case EQN_TOK_UNDEF: 676 eqn_undef(ep); 677 break; 678 case EQN_TOK_NDEFINE: 679 case EQN_TOK_DEFINE: 680 eqn_def(ep); 681 break; 682 case EQN_TOK_TDEFINE: 683 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF || 684 eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) 685 mandoc_msg(MANDOCERR_REQ_EMPTY, 686 ep->node->line, ep->node->pos, "tdefine"); 687 break; 688 case EQN_TOK_DELIM: 689 eqn_delim(ep); 690 break; 691 case EQN_TOK_GFONT: 692 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) 693 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 694 ep->node->pos, "%s", eqn_toks[tok]); 695 break; 696 case EQN_TOK_MARK: 697 case EQN_TOK_LINEUP: 698 /* Ignore these. */ 699 break; 700 case EQN_TOK_DYAD: 701 case EQN_TOK_VEC: 702 case EQN_TOK_UNDER: 703 case EQN_TOK_BAR: 704 case EQN_TOK_TILDE: 705 case EQN_TOK_HAT: 706 case EQN_TOK_DOT: 707 case EQN_TOK_DOTDOT: 708 if (parent->last == NULL) { 709 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 710 ep->node->pos, "%s", eqn_toks[tok]); 711 cur = eqn_box_alloc(ep, parent); 712 cur->type = EQN_TEXT; 713 cur->text = mandoc_strdup(""); 714 } 715 parent = eqn_box_makebinary(ep, parent); 716 parent->type = EQN_LIST; 717 parent->expectargs = 1; 718 parent->font = EQNFONT_ROMAN; 719 switch (tok) { 720 case EQN_TOK_DOTDOT: 721 parent->top = mandoc_strdup("\\[ad]"); 722 break; 723 case EQN_TOK_VEC: 724 parent->top = mandoc_strdup("\\[->]"); 725 break; 726 case EQN_TOK_DYAD: 727 parent->top = mandoc_strdup("\\[<>]"); 728 break; 729 case EQN_TOK_TILDE: 730 parent->top = mandoc_strdup("\\[a~]"); 731 break; 732 case EQN_TOK_UNDER: 733 parent->bottom = mandoc_strdup("\\[ul]"); 734 break; 735 case EQN_TOK_BAR: 736 parent->top = mandoc_strdup("\\[rn]"); 737 break; 738 case EQN_TOK_DOT: 739 parent->top = mandoc_strdup("\\[a.]"); 740 break; 741 case EQN_TOK_HAT: 742 parent->top = mandoc_strdup("\\[ha]"); 743 break; 744 default: 745 abort(); 746 } 747 parent = parent->parent; 748 break; 749 case EQN_TOK_FWD: 750 case EQN_TOK_BACK: 751 case EQN_TOK_DOWN: 752 case EQN_TOK_UP: 753 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) 754 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 755 ep->node->pos, "%s", eqn_toks[tok]); 756 break; 757 case EQN_TOK_FAT: 758 case EQN_TOK_ROMAN: 759 case EQN_TOK_ITALIC: 760 case EQN_TOK_BOLD: 761 while (parent->args == parent->expectargs) 762 parent = parent->parent; 763 /* 764 * These values apply to the next word or sequence of 765 * words; thus, we mark that we'll have a child with 766 * exactly one of those. 767 */ 768 parent = eqn_box_alloc(ep, parent); 769 parent->type = EQN_LIST; 770 parent->expectargs = 1; 771 switch (tok) { 772 case EQN_TOK_FAT: 773 parent->font = EQNFONT_FAT; 774 break; 775 case EQN_TOK_ROMAN: 776 parent->font = EQNFONT_ROMAN; 777 break; 778 case EQN_TOK_ITALIC: 779 parent->font = EQNFONT_ITALIC; 780 break; 781 case EQN_TOK_BOLD: 782 parent->font = EQNFONT_BOLD; 783 break; 784 default: 785 abort(); 786 } 787 break; 788 case EQN_TOK_SIZE: 789 case EQN_TOK_GSIZE: 790 /* Accept two values: integral size and a single. */ 791 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 792 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 793 ep->node->pos, "%s", eqn_toks[tok]); 794 break; 795 } 796 size = mandoc_strntoi(ep->start, ep->toksz, 10); 797 if (-1 == size) { 798 mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line, 799 ep->node->pos, "%s", eqn_toks[tok]); 800 break; 801 } 802 if (EQN_TOK_GSIZE == tok) { 803 ep->gsize = size; 804 break; 805 } 806 while (parent->args == parent->expectargs) 807 parent = parent->parent; 808 parent = eqn_box_alloc(ep, parent); 809 parent->type = EQN_LIST; 810 parent->expectargs = 1; 811 parent->size = size; 812 break; 813 case EQN_TOK_FROM: 814 case EQN_TOK_TO: 815 case EQN_TOK_SUB: 816 case EQN_TOK_SUP: 817 /* 818 * We have a left-right-associative expression. 819 * Repivot under a positional node, open a child scope 820 * and keep on reading. 821 */ 822 if (parent->last == NULL) { 823 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 824 ep->node->pos, "%s", eqn_toks[tok]); 825 cur = eqn_box_alloc(ep, parent); 826 cur->type = EQN_TEXT; 827 cur->text = mandoc_strdup(""); 828 } 829 while (parent->expectargs == 1 && parent->args == 1) 830 parent = parent->parent; 831 if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO) { 832 for (cur = parent; cur != NULL; cur = cur->parent) 833 if (cur->pos == EQNPOS_SUB || 834 cur->pos == EQNPOS_SUP || 835 cur->pos == EQNPOS_SUBSUP || 836 cur->pos == EQNPOS_SQRT || 837 cur->pos == EQNPOS_OVER) 838 break; 839 if (cur != NULL) 840 parent = cur->parent; 841 } 842 if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) { 843 parent->expectargs = 3; 844 parent->pos = EQNPOS_SUBSUP; 845 break; 846 } 847 if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) { 848 parent->expectargs = 3; 849 parent->pos = EQNPOS_FROMTO; 850 break; 851 } 852 parent = eqn_box_makebinary(ep, parent); 853 switch (tok) { 854 case EQN_TOK_FROM: 855 parent->pos = EQNPOS_FROM; 856 break; 857 case EQN_TOK_TO: 858 parent->pos = EQNPOS_TO; 859 break; 860 case EQN_TOK_SUP: 861 parent->pos = EQNPOS_SUP; 862 break; 863 case EQN_TOK_SUB: 864 parent->pos = EQNPOS_SUB; 865 break; 866 default: 867 abort(); 868 } 869 break; 870 case EQN_TOK_SQRT: 871 while (parent->args == parent->expectargs) 872 parent = parent->parent; 873 /* 874 * Accept a left-right-associative set of arguments just 875 * like sub and sup and friends but without rebalancing 876 * under a pivot. 877 */ 878 parent = eqn_box_alloc(ep, parent); 879 parent->type = EQN_SUBEXPR; 880 parent->pos = EQNPOS_SQRT; 881 parent->expectargs = 1; 882 break; 883 case EQN_TOK_OVER: 884 /* 885 * We have a right-left-associative fraction. 886 * Close out anything that's currently open, then 887 * rebalance and continue reading. 888 */ 889 if (parent->last == NULL) { 890 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 891 ep->node->pos, "%s", eqn_toks[tok]); 892 cur = eqn_box_alloc(ep, parent); 893 cur->type = EQN_TEXT; 894 cur->text = mandoc_strdup(""); 895 } 896 while (parent->args == parent->expectargs) 897 parent = parent->parent; 898 while (EQN_SUBEXPR == parent->type) 899 parent = parent->parent; 900 parent = eqn_box_makebinary(ep, parent); 901 parent->pos = EQNPOS_OVER; 902 break; 903 case EQN_TOK_RIGHT: 904 case EQN_TOK_BRACE_CLOSE: 905 /* 906 * Close out the existing brace. 907 * FIXME: this is a shitty sentinel: we should really 908 * have a native EQN_BRACE type or whatnot. 909 */ 910 for (cur = parent; cur != NULL; cur = cur->parent) 911 if (cur->type == EQN_LIST && 912 cur->expectargs > 1 && 913 (tok == EQN_TOK_BRACE_CLOSE || 914 cur->left != NULL)) 915 break; 916 if (cur == NULL) { 917 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line, 918 ep->node->pos, "%s", eqn_toks[tok]); 919 break; 920 } 921 parent = cur; 922 if (EQN_TOK_RIGHT == tok) { 923 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 924 mandoc_msg(MANDOCERR_REQ_EMPTY, 925 ep->node->line, ep->node->pos, 926 "%s", eqn_toks[tok]); 927 break; 928 } 929 /* Handling depends on right/left. */ 930 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) 931 parent->right = mandoc_strdup("\\[rc]"); 932 else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) 933 parent->right = mandoc_strdup("\\[rf]"); 934 else 935 parent->right = 936 mandoc_strndup(ep->start, ep->toksz); 937 } 938 parent = parent->parent; 939 if (tok == EQN_TOK_BRACE_CLOSE && 940 (parent->type == EQN_PILE || 941 parent->type == EQN_MATRIX)) 942 parent = parent->parent; 943 /* Close out any "singleton" lists. */ 944 while (parent->type == EQN_LIST && 945 parent->expectargs == 1 && 946 parent->args == 1) 947 parent = parent->parent; 948 break; 949 case EQN_TOK_BRACE_OPEN: 950 case EQN_TOK_LEFT: 951 /* 952 * If we already have something in the stack and we're 953 * in an expression, then rewind til we're not any more 954 * (just like with the text node). 955 */ 956 while (parent->args == parent->expectargs) 957 parent = parent->parent; 958 if (EQN_TOK_LEFT == tok && 959 eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 960 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 961 ep->node->pos, "%s", eqn_toks[tok]); 962 break; 963 } 964 parent = eqn_box_alloc(ep, parent); 965 parent->type = EQN_LIST; 966 if (EQN_TOK_LEFT == tok) { 967 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) 968 parent->left = mandoc_strdup("\\[lc]"); 969 else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) 970 parent->left = mandoc_strdup("\\[lf]"); 971 else 972 parent->left = 973 mandoc_strndup(ep->start, ep->toksz); 974 } 975 break; 976 case EQN_TOK_PILE: 977 case EQN_TOK_LPILE: 978 case EQN_TOK_RPILE: 979 case EQN_TOK_CPILE: 980 case EQN_TOK_CCOL: 981 case EQN_TOK_LCOL: 982 case EQN_TOK_RCOL: 983 while (parent->args == parent->expectargs) 984 parent = parent->parent; 985 parent = eqn_box_alloc(ep, parent); 986 parent->type = EQN_PILE; 987 parent->expectargs = 1; 988 break; 989 case EQN_TOK_ABOVE: 990 for (cur = parent; cur != NULL; cur = cur->parent) 991 if (cur->type == EQN_PILE) 992 break; 993 if (cur == NULL) { 994 mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line, 995 ep->node->pos, "%s", eqn_toks[tok]); 996 break; 997 } 998 parent = eqn_box_alloc(ep, cur); 999 parent->type = EQN_LIST; 1000 break; 1001 case EQN_TOK_MATRIX: 1002 while (parent->args == parent->expectargs) 1003 parent = parent->parent; 1004 parent = eqn_box_alloc(ep, parent); 1005 parent->type = EQN_MATRIX; 1006 parent->expectargs = 1; 1007 break; 1008 case EQN_TOK_EOF: 1009 return; 1010 case EQN_TOK__MAX: 1011 case EQN_TOK_FUNC: 1012 case EQN_TOK_QUOTED: 1013 case EQN_TOK_SYM: 1014 p = ep->start; 1015 assert(p != NULL); 1016 /* 1017 * If we already have something in the stack and we're 1018 * in an expression, then rewind til we're not any more. 1019 */ 1020 while (parent->args == parent->expectargs) 1021 parent = parent->parent; 1022 cur = eqn_box_alloc(ep, parent); 1023 cur->type = EQN_TEXT; 1024 cur->text = p; 1025 switch (tok) { 1026 case EQN_TOK_FUNC: 1027 cur->font = EQNFONT_ROMAN; 1028 break; 1029 case EQN_TOK_QUOTED: 1030 if (cur->font == EQNFONT_NONE) 1031 cur->font = EQNFONT_ITALIC; 1032 break; 1033 case EQN_TOK_SYM: 1034 break; 1035 default: 1036 if (cur->font != EQNFONT_NONE || *p == '\0') 1037 break; 1038 cpn = p - 1; 1039 ccln = CCL_LET; 1040 split = NULL; 1041 for (;;) { 1042 /* Advance to next character. */ 1043 cp = cpn++; 1044 ccl = ccln; 1045 ccln = isalpha((unsigned char)*cpn) ? CCL_LET : 1046 isdigit((unsigned char)*cpn) || 1047 (*cpn == '.' && (ccl == CCL_DIG || 1048 isdigit((unsigned char)cpn[1]))) ? 1049 CCL_DIG : CCL_PUN; 1050 /* No boundary before first character. */ 1051 if (cp < p) 1052 continue; 1053 cur->font = ccl == CCL_LET ? 1054 EQNFONT_ITALIC : EQNFONT_ROMAN; 1055 if (*cp == '\\') 1056 mandoc_escape(&cpn, NULL, NULL); 1057 /* No boundary after last character. */ 1058 if (*cpn == '\0') 1059 break; 1060 if (ccln == ccl && *cp != ',' && *cpn != ',') 1061 continue; 1062 /* Boundary found, split the text. */ 1063 if (parent->args == parent->expectargs) { 1064 /* Remove the text from the tree. */ 1065 if (cur->prev == NULL) 1066 parent->first = cur->next; 1067 else 1068 cur->prev->next = NULL; 1069 parent->last = cur->prev; 1070 parent->args--; 1071 /* Set up a list instead. */ 1072 split = eqn_box_alloc(ep, parent); 1073 split->type = EQN_LIST; 1074 /* Insert the word into the list. */ 1075 split->first = split->last = cur; 1076 cur->parent = split; 1077 cur->prev = NULL; 1078 parent = split; 1079 } 1080 /* Append a new text box. */ 1081 nbox = eqn_box_alloc(ep, parent); 1082 nbox->type = EQN_TEXT; 1083 nbox->text = mandoc_strdup(cpn); 1084 /* Truncate the old box. */ 1085 p = mandoc_strndup(cur->text, 1086 cpn - cur->text); 1087 free(cur->text); 1088 cur->text = p; 1089 /* Setup to process the new box. */ 1090 cur = nbox; 1091 p = nbox->text; 1092 cpn = p - 1; 1093 ccln = CCL_LET; 1094 } 1095 if (split != NULL) 1096 parent = split->parent; 1097 break; 1098 } 1099 break; 1100 default: 1101 abort(); 1102 } 1103 goto next_tok; 1104 } 1105 1106 void 1107 eqn_free(struct eqn_node *p) 1108 { 1109 int i; 1110 1111 if (p == NULL) 1112 return; 1113 1114 for (i = 0; i < (int)p->defsz; i++) { 1115 free(p->defs[i].key); 1116 free(p->defs[i].val); 1117 } 1118 1119 free(p->data); 1120 free(p->defs); 1121 free(p); 1122 } 1123