1 /* $OpenBSD: eqn.c,v 1.41 2017/07/15 15:06:31 bentley Exp $ */ 2 /* 3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <limits.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <time.h> 27 28 #include "mandoc_aux.h" 29 #include "mandoc.h" 30 #include "roff.h" 31 #include "libmandoc.h" 32 #include "libroff.h" 33 34 #define EQN_NEST_MAX 128 /* maximum nesting of defines */ 35 #define STRNEQ(p1, sz1, p2, sz2) \ 36 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1))) 37 38 enum eqn_tok { 39 EQN_TOK_DYAD = 0, 40 EQN_TOK_VEC, 41 EQN_TOK_UNDER, 42 EQN_TOK_BAR, 43 EQN_TOK_TILDE, 44 EQN_TOK_HAT, 45 EQN_TOK_DOT, 46 EQN_TOK_DOTDOT, 47 EQN_TOK_FWD, 48 EQN_TOK_BACK, 49 EQN_TOK_DOWN, 50 EQN_TOK_UP, 51 EQN_TOK_FAT, 52 EQN_TOK_ROMAN, 53 EQN_TOK_ITALIC, 54 EQN_TOK_BOLD, 55 EQN_TOK_SIZE, 56 EQN_TOK_SUB, 57 EQN_TOK_SUP, 58 EQN_TOK_SQRT, 59 EQN_TOK_OVER, 60 EQN_TOK_FROM, 61 EQN_TOK_TO, 62 EQN_TOK_BRACE_OPEN, 63 EQN_TOK_BRACE_CLOSE, 64 EQN_TOK_GSIZE, 65 EQN_TOK_GFONT, 66 EQN_TOK_MARK, 67 EQN_TOK_LINEUP, 68 EQN_TOK_LEFT, 69 EQN_TOK_RIGHT, 70 EQN_TOK_PILE, 71 EQN_TOK_LPILE, 72 EQN_TOK_RPILE, 73 EQN_TOK_CPILE, 74 EQN_TOK_MATRIX, 75 EQN_TOK_CCOL, 76 EQN_TOK_LCOL, 77 EQN_TOK_RCOL, 78 EQN_TOK_DELIM, 79 EQN_TOK_DEFINE, 80 EQN_TOK_TDEFINE, 81 EQN_TOK_NDEFINE, 82 EQN_TOK_UNDEF, 83 EQN_TOK_ABOVE, 84 EQN_TOK__MAX, 85 EQN_TOK_FUNC, 86 EQN_TOK_QUOTED, 87 EQN_TOK_SYM, 88 EQN_TOK_EOF 89 }; 90 91 static const char *eqn_toks[EQN_TOK__MAX] = { 92 "dyad", /* EQN_TOK_DYAD */ 93 "vec", /* EQN_TOK_VEC */ 94 "under", /* EQN_TOK_UNDER */ 95 "bar", /* EQN_TOK_BAR */ 96 "tilde", /* EQN_TOK_TILDE */ 97 "hat", /* EQN_TOK_HAT */ 98 "dot", /* EQN_TOK_DOT */ 99 "dotdot", /* EQN_TOK_DOTDOT */ 100 "fwd", /* EQN_TOK_FWD * */ 101 "back", /* EQN_TOK_BACK */ 102 "down", /* EQN_TOK_DOWN */ 103 "up", /* EQN_TOK_UP */ 104 "fat", /* EQN_TOK_FAT */ 105 "roman", /* EQN_TOK_ROMAN */ 106 "italic", /* EQN_TOK_ITALIC */ 107 "bold", /* EQN_TOK_BOLD */ 108 "size", /* EQN_TOK_SIZE */ 109 "sub", /* EQN_TOK_SUB */ 110 "sup", /* EQN_TOK_SUP */ 111 "sqrt", /* EQN_TOK_SQRT */ 112 "over", /* EQN_TOK_OVER */ 113 "from", /* EQN_TOK_FROM */ 114 "to", /* EQN_TOK_TO */ 115 "{", /* EQN_TOK_BRACE_OPEN */ 116 "}", /* EQN_TOK_BRACE_CLOSE */ 117 "gsize", /* EQN_TOK_GSIZE */ 118 "gfont", /* EQN_TOK_GFONT */ 119 "mark", /* EQN_TOK_MARK */ 120 "lineup", /* EQN_TOK_LINEUP */ 121 "left", /* EQN_TOK_LEFT */ 122 "right", /* EQN_TOK_RIGHT */ 123 "pile", /* EQN_TOK_PILE */ 124 "lpile", /* EQN_TOK_LPILE */ 125 "rpile", /* EQN_TOK_RPILE */ 126 "cpile", /* EQN_TOK_CPILE */ 127 "matrix", /* EQN_TOK_MATRIX */ 128 "ccol", /* EQN_TOK_CCOL */ 129 "lcol", /* EQN_TOK_LCOL */ 130 "rcol", /* EQN_TOK_RCOL */ 131 "delim", /* EQN_TOK_DELIM */ 132 "define", /* EQN_TOK_DEFINE */ 133 "tdefine", /* EQN_TOK_TDEFINE */ 134 "ndefine", /* EQN_TOK_NDEFINE */ 135 "undef", /* EQN_TOK_UNDEF */ 136 "above", /* EQN_TOK_ABOVE */ 137 }; 138 139 static const char *const eqn_func[] = { 140 "acos", "acsc", "and", "arc", "asec", "asin", "atan", 141 "cos", "cosh", "coth", "csc", "det", "exp", "for", 142 "if", "lim", "ln", "log", "max", "min", 143 "sec", "sin", "sinh", "tan", "tanh", "Im", "Re", 144 }; 145 146 enum eqn_symt { 147 EQNSYM_alpha = 0, 148 EQNSYM_beta, 149 EQNSYM_chi, 150 EQNSYM_delta, 151 EQNSYM_epsilon, 152 EQNSYM_eta, 153 EQNSYM_gamma, 154 EQNSYM_iota, 155 EQNSYM_kappa, 156 EQNSYM_lambda, 157 EQNSYM_mu, 158 EQNSYM_nu, 159 EQNSYM_omega, 160 EQNSYM_omicron, 161 EQNSYM_phi, 162 EQNSYM_pi, 163 EQNSYM_ps, 164 EQNSYM_rho, 165 EQNSYM_sigma, 166 EQNSYM_tau, 167 EQNSYM_theta, 168 EQNSYM_upsilon, 169 EQNSYM_xi, 170 EQNSYM_zeta, 171 EQNSYM_DELTA, 172 EQNSYM_GAMMA, 173 EQNSYM_LAMBDA, 174 EQNSYM_OMEGA, 175 EQNSYM_PHI, 176 EQNSYM_PI, 177 EQNSYM_PSI, 178 EQNSYM_SIGMA, 179 EQNSYM_THETA, 180 EQNSYM_UPSILON, 181 EQNSYM_XI, 182 EQNSYM_inter, 183 EQNSYM_union, 184 EQNSYM_prod, 185 EQNSYM_int, 186 EQNSYM_sum, 187 EQNSYM_grad, 188 EQNSYM_del, 189 EQNSYM_times, 190 EQNSYM_cdot, 191 EQNSYM_nothing, 192 EQNSYM_approx, 193 EQNSYM_prime, 194 EQNSYM_half, 195 EQNSYM_partial, 196 EQNSYM_inf, 197 EQNSYM_muchgreat, 198 EQNSYM_muchless, 199 EQNSYM_larrow, 200 EQNSYM_rarrow, 201 EQNSYM_pm, 202 EQNSYM_nequal, 203 EQNSYM_equiv, 204 EQNSYM_lessequal, 205 EQNSYM_moreequal, 206 EQNSYM_minus, 207 EQNSYM__MAX 208 }; 209 210 struct eqnsym { 211 const char *str; 212 const char *sym; 213 }; 214 215 static const struct eqnsym eqnsyms[EQNSYM__MAX] = { 216 { "alpha", "*a" }, /* EQNSYM_alpha */ 217 { "beta", "*b" }, /* EQNSYM_beta */ 218 { "chi", "*x" }, /* EQNSYM_chi */ 219 { "delta", "*d" }, /* EQNSYM_delta */ 220 { "epsilon", "*e" }, /* EQNSYM_epsilon */ 221 { "eta", "*y" }, /* EQNSYM_eta */ 222 { "gamma", "*g" }, /* EQNSYM_gamma */ 223 { "iota", "*i" }, /* EQNSYM_iota */ 224 { "kappa", "*k" }, /* EQNSYM_kappa */ 225 { "lambda", "*l" }, /* EQNSYM_lambda */ 226 { "mu", "*m" }, /* EQNSYM_mu */ 227 { "nu", "*n" }, /* EQNSYM_nu */ 228 { "omega", "*w" }, /* EQNSYM_omega */ 229 { "omicron", "*o" }, /* EQNSYM_omicron */ 230 { "phi", "*f" }, /* EQNSYM_phi */ 231 { "pi", "*p" }, /* EQNSYM_pi */ 232 { "psi", "*q" }, /* EQNSYM_psi */ 233 { "rho", "*r" }, /* EQNSYM_rho */ 234 { "sigma", "*s" }, /* EQNSYM_sigma */ 235 { "tau", "*t" }, /* EQNSYM_tau */ 236 { "theta", "*h" }, /* EQNSYM_theta */ 237 { "upsilon", "*u" }, /* EQNSYM_upsilon */ 238 { "xi", "*c" }, /* EQNSYM_xi */ 239 { "zeta", "*z" }, /* EQNSYM_zeta */ 240 { "DELTA", "*D" }, /* EQNSYM_DELTA */ 241 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */ 242 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */ 243 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */ 244 { "PHI", "*F" }, /* EQNSYM_PHI */ 245 { "PI", "*P" }, /* EQNSYM_PI */ 246 { "PSI", "*Q" }, /* EQNSYM_PSI */ 247 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */ 248 { "THETA", "*H" }, /* EQNSYM_THETA */ 249 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */ 250 { "XI", "*C" }, /* EQNSYM_XI */ 251 { "inter", "ca" }, /* EQNSYM_inter */ 252 { "union", "cu" }, /* EQNSYM_union */ 253 { "prod", "product" }, /* EQNSYM_prod */ 254 { "int", "integral" }, /* EQNSYM_int */ 255 { "sum", "sum" }, /* EQNSYM_sum */ 256 { "grad", "gr" }, /* EQNSYM_grad */ 257 { "del", "gr" }, /* EQNSYM_del */ 258 { "times", "mu" }, /* EQNSYM_times */ 259 { "cdot", "pc" }, /* EQNSYM_cdot */ 260 { "nothing", "&" }, /* EQNSYM_nothing */ 261 { "approx", "~~" }, /* EQNSYM_approx */ 262 { "prime", "fm" }, /* EQNSYM_prime */ 263 { "half", "12" }, /* EQNSYM_half */ 264 { "partial", "pd" }, /* EQNSYM_partial */ 265 { "inf", "if" }, /* EQNSYM_inf */ 266 { ">>", ">>" }, /* EQNSYM_muchgreat */ 267 { "<<", "<<" }, /* EQNSYM_muchless */ 268 { "<-", "<-" }, /* EQNSYM_larrow */ 269 { "->", "->" }, /* EQNSYM_rarrow */ 270 { "+-", "+-" }, /* EQNSYM_pm */ 271 { "!=", "!=" }, /* EQNSYM_nequal */ 272 { "==", "==" }, /* EQNSYM_equiv */ 273 { "<=", "<=" }, /* EQNSYM_lessequal */ 274 { ">=", ">=" }, /* EQNSYM_moreequal */ 275 { "-", "mi" }, /* EQNSYM_minus */ 276 }; 277 278 enum parse_mode { 279 MODE_QUOTED, 280 MODE_NOSUB, 281 MODE_SUB, 282 MODE_TOK 283 }; 284 285 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *); 286 static struct eqn_box *eqn_box_makebinary(struct eqn_node *, 287 struct eqn_box *); 288 static void eqn_def(struct eqn_node *); 289 static struct eqn_def *eqn_def_find(struct eqn_node *); 290 static void eqn_delim(struct eqn_node *); 291 static enum eqn_tok eqn_next(struct eqn_node *, enum parse_mode); 292 static void eqn_undef(struct eqn_node *); 293 294 295 struct eqn_node * 296 eqn_alloc(struct mparse *parse) 297 { 298 struct eqn_node *ep; 299 300 ep = mandoc_calloc(1, sizeof(*ep)); 301 ep->parse = parse; 302 ep->gsize = EQN_DEFSIZE; 303 return ep; 304 } 305 306 void 307 eqn_reset(struct eqn_node *ep) 308 { 309 free(ep->data); 310 ep->data = ep->start = ep->end = NULL; 311 ep->sz = ep->toksz = 0; 312 } 313 314 void 315 eqn_read(struct eqn_node *ep, const char *p) 316 { 317 char *cp; 318 319 if (ep->data == NULL) { 320 ep->sz = strlen(p); 321 ep->data = mandoc_strdup(p); 322 } else { 323 ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p); 324 free(ep->data); 325 ep->data = cp; 326 } 327 ep->sz += 1; 328 } 329 330 /* 331 * Find the key "key" of the give size within our eqn-defined values. 332 */ 333 static struct eqn_def * 334 eqn_def_find(struct eqn_node *ep) 335 { 336 int i; 337 338 for (i = 0; i < (int)ep->defsz; i++) 339 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, 340 ep->defs[i].keysz, ep->start, ep->toksz)) 341 return &ep->defs[i]; 342 343 return NULL; 344 } 345 346 /* 347 * Parse a token from the input text. The modes are: 348 * MODE_QUOTED: Use *ep->start as the delimiter; the token ends 349 * before its next occurence. Do not interpret the token in any 350 * way and return EQN_TOK_QUOTED. All other modes behave like 351 * MODE_QUOTED when *ep->start is '"'. 352 * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it; 353 * otherwise, it ends before the next whitespace or brace. 354 * Do not interpret the token and return EQN_TOK__MAX. 355 * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an 356 * alias created with define. If it is an alias, replace it with 357 * its string value and reparse. 358 * MODE_TOK: Like MODE_SUB, but also check the token against the list 359 * of tokens, and if there is a match, return that token. Otherwise, 360 * if the token matches a symbol, return EQN_TOK_SYM; if it matches 361 * a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX. Except for 362 * a token match, *ep->start is set to an allocated string that the 363 * caller is expected to free. 364 * All modes skip whitespace following the end of the token. 365 */ 366 static enum eqn_tok 367 eqn_next(struct eqn_node *ep, enum parse_mode mode) 368 { 369 static int last_len, lim; 370 371 struct eqn_def *def; 372 size_t start; 373 int diff, i, quoted; 374 enum eqn_tok tok; 375 376 /* 377 * Reset the recursion counter after advancing 378 * beyond the end of the previous substitution. 379 */ 380 if (ep->end - ep->data >= last_len) 381 lim = 0; 382 383 ep->start = ep->end; 384 quoted = mode == MODE_QUOTED; 385 for (;;) { 386 switch (*ep->start) { 387 case '\0': 388 ep->toksz = 0; 389 return EQN_TOK_EOF; 390 case '"': 391 quoted = 1; 392 break; 393 default: 394 break; 395 } 396 if (quoted) { 397 ep->end = strchr(ep->start + 1, *ep->start); 398 ep->start++; /* Skip opening quote. */ 399 if (ep->end == NULL) { 400 mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse, 401 ep->node->line, ep->node->pos, NULL); 402 ep->end = strchr(ep->start, '\0'); 403 } 404 } else { 405 ep->end = ep->start + 1; 406 if (*ep->start != '{' && *ep->start != '}') 407 ep->end += strcspn(ep->end, " ^~\"{}\t"); 408 } 409 ep->toksz = ep->end - ep->start; 410 if (quoted && *ep->end != '\0') 411 ep->end++; /* Skip closing quote. */ 412 while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL) 413 ep->end++; 414 if (quoted) /* Cannot return, may have to strndup. */ 415 break; 416 if (mode == MODE_NOSUB) 417 return EQN_TOK__MAX; 418 if ((def = eqn_def_find(ep)) == NULL) 419 break; 420 if (++lim > EQN_NEST_MAX) { 421 mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse, 422 ep->node->line, ep->node->pos, NULL); 423 return EQN_TOK_EOF; 424 } 425 426 /* Replace a defined name with its string value. */ 427 if ((diff = def->valsz - ep->toksz) > 0) { 428 start = ep->start - ep->data; 429 ep->sz += diff; 430 ep->data = mandoc_realloc(ep->data, ep->sz + 1); 431 ep->start = ep->data + start; 432 } 433 if (diff) 434 memmove(ep->start + def->valsz, ep->start + ep->toksz, 435 strlen(ep->start + ep->toksz) + 1); 436 memcpy(ep->start, def->val, def->valsz); 437 last_len = ep->start - ep->data + def->valsz; 438 } 439 if (mode != MODE_TOK) 440 return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX; 441 if (quoted) { 442 ep->start = mandoc_strndup(ep->start, ep->toksz); 443 return EQN_TOK_QUOTED; 444 } 445 for (tok = 0; tok < EQN_TOK__MAX; tok++) 446 if (STRNEQ(ep->start, ep->toksz, 447 eqn_toks[tok], strlen(eqn_toks[tok]))) 448 return tok; 449 450 for (i = 0; i < EQNSYM__MAX; i++) { 451 if (STRNEQ(ep->start, ep->toksz, 452 eqnsyms[i].str, strlen(eqnsyms[i].str))) { 453 mandoc_asprintf(&ep->start, 454 "\\[%s]", eqnsyms[i].sym); 455 return EQN_TOK_SYM; 456 } 457 } 458 ep->start = mandoc_strndup(ep->start, ep->toksz); 459 for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++) 460 if (STRNEQ(ep->start, ep->toksz, 461 eqn_func[i], strlen(eqn_func[i]))) 462 return EQN_TOK_FUNC; 463 return EQN_TOK__MAX; 464 } 465 466 void 467 eqn_box_free(struct eqn_box *bp) 468 { 469 470 if (bp->first) 471 eqn_box_free(bp->first); 472 if (bp->next) 473 eqn_box_free(bp->next); 474 475 free(bp->text); 476 free(bp->left); 477 free(bp->right); 478 free(bp->top); 479 free(bp->bottom); 480 free(bp); 481 } 482 483 /* 484 * Allocate a box as the last child of the parent node. 485 */ 486 static struct eqn_box * 487 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) 488 { 489 struct eqn_box *bp; 490 491 bp = mandoc_calloc(1, sizeof(struct eqn_box)); 492 bp->parent = parent; 493 bp->parent->args++; 494 bp->expectargs = UINT_MAX; 495 bp->font = bp->parent->font; 496 bp->size = ep->gsize; 497 498 if (NULL != parent->first) { 499 parent->last->next = bp; 500 bp->prev = parent->last; 501 } else 502 parent->first = bp; 503 504 parent->last = bp; 505 return bp; 506 } 507 508 /* 509 * Reparent the current last node (of the current parent) under a new 510 * EQN_SUBEXPR as the first element. 511 * Then return the new parent. 512 * The new EQN_SUBEXPR will have a two-child limit. 513 */ 514 static struct eqn_box * 515 eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent) 516 { 517 struct eqn_box *b, *newb; 518 519 assert(NULL != parent->last); 520 b = parent->last; 521 if (parent->last == parent->first) 522 parent->first = NULL; 523 parent->args--; 524 parent->last = b->prev; 525 b->prev = NULL; 526 newb = eqn_box_alloc(ep, parent); 527 newb->type = EQN_SUBEXPR; 528 newb->expectargs = 2; 529 newb->args = 1; 530 newb->first = newb->last = b; 531 newb->first->next = NULL; 532 b->parent = newb; 533 return newb; 534 } 535 536 /* 537 * Parse the "delim" control statement. 538 */ 539 static void 540 eqn_delim(struct eqn_node *ep) 541 { 542 if (ep->end[0] == '\0' || ep->end[1] == '\0') { 543 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 544 ep->node->line, ep->node->pos, "delim"); 545 if (ep->end[0] != '\0') 546 ep->end++; 547 } else if (strncmp(ep->end, "off", 3) == 0) { 548 ep->delim = 0; 549 ep->end += 3; 550 } else if (strncmp(ep->end, "on", 2) == 0) { 551 if (ep->odelim && ep->cdelim) 552 ep->delim = 1; 553 ep->end += 2; 554 } else { 555 ep->odelim = *ep->end++; 556 ep->cdelim = *ep->end++; 557 ep->delim = 1; 558 } 559 } 560 561 /* 562 * Undefine a previously-defined string. 563 */ 564 static void 565 eqn_undef(struct eqn_node *ep) 566 { 567 struct eqn_def *def; 568 569 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { 570 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 571 ep->node->line, ep->node->pos, "undef"); 572 return; 573 } 574 if ((def = eqn_def_find(ep)) == NULL) 575 return; 576 free(def->key); 577 free(def->val); 578 def->key = def->val = NULL; 579 def->keysz = def->valsz = 0; 580 } 581 582 static void 583 eqn_def(struct eqn_node *ep) 584 { 585 struct eqn_def *def; 586 int i; 587 588 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { 589 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 590 ep->node->line, ep->node->pos, "define"); 591 return; 592 } 593 594 /* 595 * Search for a key that already exists. 596 * Create a new key if none is found. 597 */ 598 if ((def = eqn_def_find(ep)) == NULL) { 599 /* Find holes in string array. */ 600 for (i = 0; i < (int)ep->defsz; i++) 601 if (0 == ep->defs[i].keysz) 602 break; 603 604 if (i == (int)ep->defsz) { 605 ep->defsz++; 606 ep->defs = mandoc_reallocarray(ep->defs, 607 ep->defsz, sizeof(struct eqn_def)); 608 ep->defs[i].key = ep->defs[i].val = NULL; 609 } 610 611 def = ep->defs + i; 612 free(def->key); 613 def->key = mandoc_strndup(ep->start, ep->toksz); 614 def->keysz = ep->toksz; 615 } 616 617 if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) { 618 mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse, 619 ep->node->line, ep->node->pos, "define %s", def->key); 620 free(def->key); 621 free(def->val); 622 def->key = def->val = NULL; 623 def->keysz = def->valsz = 0; 624 return; 625 } 626 free(def->val); 627 def->val = mandoc_strndup(ep->start, ep->toksz); 628 def->valsz = ep->toksz; 629 } 630 631 void 632 eqn_parse(struct eqn_node *ep) 633 { 634 struct eqn_box *cur, *nbox, *parent, *split; 635 const char *cp, *cpn; 636 char *p; 637 enum eqn_tok tok; 638 enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln; 639 int size; 640 641 parent = ep->node->eqn; 642 assert(parent != NULL); 643 644 /* 645 * Empty equation. 646 * Do not add it to the high-level syntax tree. 647 */ 648 649 if (ep->data == NULL) 650 return; 651 652 ep->start = ep->end = ep->data + strspn(ep->data, " ^~"); 653 654 next_tok: 655 tok = eqn_next(ep, MODE_TOK); 656 switch (tok) { 657 case EQN_TOK_UNDEF: 658 eqn_undef(ep); 659 break; 660 case EQN_TOK_NDEFINE: 661 case EQN_TOK_DEFINE: 662 eqn_def(ep); 663 break; 664 case EQN_TOK_TDEFINE: 665 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF || 666 eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) 667 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 668 ep->node->line, ep->node->pos, "tdefine"); 669 break; 670 case EQN_TOK_DELIM: 671 eqn_delim(ep); 672 break; 673 case EQN_TOK_GFONT: 674 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) 675 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 676 ep->node->line, ep->node->pos, eqn_toks[tok]); 677 break; 678 case EQN_TOK_MARK: 679 case EQN_TOK_LINEUP: 680 /* Ignore these. */ 681 break; 682 case EQN_TOK_DYAD: 683 case EQN_TOK_VEC: 684 case EQN_TOK_UNDER: 685 case EQN_TOK_BAR: 686 case EQN_TOK_TILDE: 687 case EQN_TOK_HAT: 688 case EQN_TOK_DOT: 689 case EQN_TOK_DOTDOT: 690 if (parent->last == NULL) { 691 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, 692 ep->node->line, ep->node->pos, eqn_toks[tok]); 693 cur = eqn_box_alloc(ep, parent); 694 cur->type = EQN_TEXT; 695 cur->text = mandoc_strdup(""); 696 } 697 parent = eqn_box_makebinary(ep, parent); 698 parent->type = EQN_LIST; 699 parent->expectargs = 1; 700 parent->font = EQNFONT_ROMAN; 701 switch (tok) { 702 case EQN_TOK_DOTDOT: 703 parent->top = mandoc_strdup("\\[ad]"); 704 break; 705 case EQN_TOK_VEC: 706 parent->top = mandoc_strdup("\\[->]"); 707 break; 708 case EQN_TOK_DYAD: 709 parent->top = mandoc_strdup("\\[<>]"); 710 break; 711 case EQN_TOK_TILDE: 712 parent->top = mandoc_strdup("\\[a~]"); 713 break; 714 case EQN_TOK_UNDER: 715 parent->bottom = mandoc_strdup("\\[ul]"); 716 break; 717 case EQN_TOK_BAR: 718 parent->top = mandoc_strdup("\\[rn]"); 719 break; 720 case EQN_TOK_DOT: 721 parent->top = mandoc_strdup("\\[a.]"); 722 break; 723 case EQN_TOK_HAT: 724 parent->top = mandoc_strdup("\\[ha]"); 725 break; 726 default: 727 abort(); 728 } 729 parent = parent->parent; 730 break; 731 case EQN_TOK_FWD: 732 case EQN_TOK_BACK: 733 case EQN_TOK_DOWN: 734 case EQN_TOK_UP: 735 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) 736 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 737 ep->node->line, ep->node->pos, eqn_toks[tok]); 738 break; 739 case EQN_TOK_FAT: 740 case EQN_TOK_ROMAN: 741 case EQN_TOK_ITALIC: 742 case EQN_TOK_BOLD: 743 while (parent->args == parent->expectargs) 744 parent = parent->parent; 745 /* 746 * These values apply to the next word or sequence of 747 * words; thus, we mark that we'll have a child with 748 * exactly one of those. 749 */ 750 parent = eqn_box_alloc(ep, parent); 751 parent->type = EQN_LIST; 752 parent->expectargs = 1; 753 switch (tok) { 754 case EQN_TOK_FAT: 755 parent->font = EQNFONT_FAT; 756 break; 757 case EQN_TOK_ROMAN: 758 parent->font = EQNFONT_ROMAN; 759 break; 760 case EQN_TOK_ITALIC: 761 parent->font = EQNFONT_ITALIC; 762 break; 763 case EQN_TOK_BOLD: 764 parent->font = EQNFONT_BOLD; 765 break; 766 default: 767 abort(); 768 } 769 break; 770 case EQN_TOK_SIZE: 771 case EQN_TOK_GSIZE: 772 /* Accept two values: integral size and a single. */ 773 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 774 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 775 ep->node->line, ep->node->pos, eqn_toks[tok]); 776 break; 777 } 778 size = mandoc_strntoi(ep->start, ep->toksz, 10); 779 if (-1 == size) { 780 mandoc_msg(MANDOCERR_IT_NONUM, ep->parse, 781 ep->node->line, ep->node->pos, eqn_toks[tok]); 782 break; 783 } 784 if (EQN_TOK_GSIZE == tok) { 785 ep->gsize = size; 786 break; 787 } 788 while (parent->args == parent->expectargs) 789 parent = parent->parent; 790 parent = eqn_box_alloc(ep, parent); 791 parent->type = EQN_LIST; 792 parent->expectargs = 1; 793 parent->size = size; 794 break; 795 case EQN_TOK_FROM: 796 case EQN_TOK_TO: 797 case EQN_TOK_SUB: 798 case EQN_TOK_SUP: 799 /* 800 * We have a left-right-associative expression. 801 * Repivot under a positional node, open a child scope 802 * and keep on reading. 803 */ 804 if (parent->last == NULL) { 805 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, 806 ep->node->line, ep->node->pos, eqn_toks[tok]); 807 cur = eqn_box_alloc(ep, parent); 808 cur->type = EQN_TEXT; 809 cur->text = mandoc_strdup(""); 810 } 811 while (parent->expectargs == 1 && parent->args == 1) 812 parent = parent->parent; 813 if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO) { 814 for (cur = parent; cur != NULL; cur = cur->parent) 815 if (cur->pos == EQNPOS_SUB || 816 cur->pos == EQNPOS_SUP || 817 cur->pos == EQNPOS_SUBSUP || 818 cur->pos == EQNPOS_SQRT || 819 cur->pos == EQNPOS_OVER) 820 break; 821 if (cur != NULL) 822 parent = cur->parent; 823 } 824 if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) { 825 parent->expectargs = 3; 826 parent->pos = EQNPOS_SUBSUP; 827 break; 828 } 829 if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) { 830 parent->expectargs = 3; 831 parent->pos = EQNPOS_FROMTO; 832 break; 833 } 834 parent = eqn_box_makebinary(ep, parent); 835 switch (tok) { 836 case EQN_TOK_FROM: 837 parent->pos = EQNPOS_FROM; 838 break; 839 case EQN_TOK_TO: 840 parent->pos = EQNPOS_TO; 841 break; 842 case EQN_TOK_SUP: 843 parent->pos = EQNPOS_SUP; 844 break; 845 case EQN_TOK_SUB: 846 parent->pos = EQNPOS_SUB; 847 break; 848 default: 849 abort(); 850 } 851 break; 852 case EQN_TOK_SQRT: 853 while (parent->args == parent->expectargs) 854 parent = parent->parent; 855 /* 856 * Accept a left-right-associative set of arguments just 857 * like sub and sup and friends but without rebalancing 858 * under a pivot. 859 */ 860 parent = eqn_box_alloc(ep, parent); 861 parent->type = EQN_SUBEXPR; 862 parent->pos = EQNPOS_SQRT; 863 parent->expectargs = 1; 864 break; 865 case EQN_TOK_OVER: 866 /* 867 * We have a right-left-associative fraction. 868 * Close out anything that's currently open, then 869 * rebalance and continue reading. 870 */ 871 if (parent->last == NULL) { 872 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, 873 ep->node->line, ep->node->pos, eqn_toks[tok]); 874 cur = eqn_box_alloc(ep, parent); 875 cur->type = EQN_TEXT; 876 cur->text = mandoc_strdup(""); 877 } 878 while (parent->args == parent->expectargs) 879 parent = parent->parent; 880 while (EQN_SUBEXPR == parent->type) 881 parent = parent->parent; 882 parent = eqn_box_makebinary(ep, parent); 883 parent->pos = EQNPOS_OVER; 884 break; 885 case EQN_TOK_RIGHT: 886 case EQN_TOK_BRACE_CLOSE: 887 /* 888 * Close out the existing brace. 889 * FIXME: this is a shitty sentinel: we should really 890 * have a native EQN_BRACE type or whatnot. 891 */ 892 for (cur = parent; cur != NULL; cur = cur->parent) 893 if (cur->type == EQN_LIST && 894 cur->expectargs > 1 && 895 (tok == EQN_TOK_BRACE_CLOSE || 896 cur->left != NULL)) 897 break; 898 if (cur == NULL) { 899 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse, 900 ep->node->line, ep->node->pos, eqn_toks[tok]); 901 break; 902 } 903 parent = cur; 904 if (EQN_TOK_RIGHT == tok) { 905 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 906 mandoc_msg(MANDOCERR_REQ_EMPTY, 907 ep->parse, ep->node->line, 908 ep->node->pos, eqn_toks[tok]); 909 break; 910 } 911 /* Handling depends on right/left. */ 912 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) 913 parent->right = mandoc_strdup("\\[rc]"); 914 else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) 915 parent->right = mandoc_strdup("\\[rf]"); 916 else 917 parent->right = 918 mandoc_strndup(ep->start, ep->toksz); 919 } 920 parent = parent->parent; 921 if (tok == EQN_TOK_BRACE_CLOSE && 922 (parent->type == EQN_PILE || 923 parent->type == EQN_MATRIX)) 924 parent = parent->parent; 925 /* Close out any "singleton" lists. */ 926 while (parent->type == EQN_LIST && 927 parent->expectargs == 1 && 928 parent->args == 1) 929 parent = parent->parent; 930 break; 931 case EQN_TOK_BRACE_OPEN: 932 case EQN_TOK_LEFT: 933 /* 934 * If we already have something in the stack and we're 935 * in an expression, then rewind til we're not any more 936 * (just like with the text node). 937 */ 938 while (parent->args == parent->expectargs) 939 parent = parent->parent; 940 if (EQN_TOK_LEFT == tok && 941 eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 942 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 943 ep->node->line, ep->node->pos, eqn_toks[tok]); 944 break; 945 } 946 parent = eqn_box_alloc(ep, parent); 947 parent->type = EQN_LIST; 948 if (EQN_TOK_LEFT == tok) { 949 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) 950 parent->left = mandoc_strdup("\\[lc]"); 951 else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) 952 parent->left = mandoc_strdup("\\[lf]"); 953 else 954 parent->left = 955 mandoc_strndup(ep->start, ep->toksz); 956 } 957 break; 958 case EQN_TOK_PILE: 959 case EQN_TOK_LPILE: 960 case EQN_TOK_RPILE: 961 case EQN_TOK_CPILE: 962 case EQN_TOK_CCOL: 963 case EQN_TOK_LCOL: 964 case EQN_TOK_RCOL: 965 while (parent->args == parent->expectargs) 966 parent = parent->parent; 967 parent = eqn_box_alloc(ep, parent); 968 parent->type = EQN_PILE; 969 parent->expectargs = 1; 970 break; 971 case EQN_TOK_ABOVE: 972 for (cur = parent; cur != NULL; cur = cur->parent) 973 if (cur->type == EQN_PILE) 974 break; 975 if (cur == NULL) { 976 mandoc_msg(MANDOCERR_IT_STRAY, ep->parse, 977 ep->node->line, ep->node->pos, eqn_toks[tok]); 978 break; 979 } 980 parent = eqn_box_alloc(ep, cur); 981 parent->type = EQN_LIST; 982 break; 983 case EQN_TOK_MATRIX: 984 while (parent->args == parent->expectargs) 985 parent = parent->parent; 986 parent = eqn_box_alloc(ep, parent); 987 parent->type = EQN_MATRIX; 988 parent->expectargs = 1; 989 break; 990 case EQN_TOK_EOF: 991 return; 992 case EQN_TOK__MAX: 993 case EQN_TOK_FUNC: 994 case EQN_TOK_QUOTED: 995 case EQN_TOK_SYM: 996 p = ep->start; 997 assert(p != NULL); 998 /* 999 * If we already have something in the stack and we're 1000 * in an expression, then rewind til we're not any more. 1001 */ 1002 while (parent->args == parent->expectargs) 1003 parent = parent->parent; 1004 cur = eqn_box_alloc(ep, parent); 1005 cur->type = EQN_TEXT; 1006 cur->text = p; 1007 switch (tok) { 1008 case EQN_TOK_FUNC: 1009 cur->font = EQNFONT_ROMAN; 1010 break; 1011 case EQN_TOK_QUOTED: 1012 if (cur->font == EQNFONT_NONE) 1013 cur->font = EQNFONT_ITALIC; 1014 break; 1015 case EQN_TOK_SYM: 1016 break; 1017 default: 1018 if (cur->font != EQNFONT_NONE || *p == '\0') 1019 break; 1020 cpn = p - 1; 1021 ccln = CCL_LET; 1022 split = NULL; 1023 for (;;) { 1024 /* Advance to next character. */ 1025 cp = cpn++; 1026 ccl = ccln; 1027 ccln = isalpha((unsigned char)*cpn) ? CCL_LET : 1028 isdigit((unsigned char)*cpn) || 1029 (*cpn == '.' && (ccl == CCL_DIG || 1030 isdigit((unsigned char)cpn[1]))) ? 1031 CCL_DIG : CCL_PUN; 1032 /* No boundary before first character. */ 1033 if (cp < p) 1034 continue; 1035 cur->font = ccl == CCL_LET ? 1036 EQNFONT_ITALIC : EQNFONT_ROMAN; 1037 if (*cp == '\\') 1038 mandoc_escape(&cpn, NULL, NULL); 1039 /* No boundary after last character. */ 1040 if (*cpn == '\0') 1041 break; 1042 if (ccln == ccl && *cp != ',' && *cpn != ',') 1043 continue; 1044 /* Boundary found, split the text. */ 1045 if (parent->args == parent->expectargs) { 1046 /* Remove the text from the tree. */ 1047 if (cur->prev == NULL) 1048 parent->first = cur->next; 1049 else 1050 cur->prev->next = NULL; 1051 parent->last = cur->prev; 1052 parent->args--; 1053 /* Set up a list instead. */ 1054 split = eqn_box_alloc(ep, parent); 1055 split->type = EQN_LIST; 1056 /* Insert the word into the list. */ 1057 split->first = split->last = cur; 1058 cur->parent = split; 1059 cur->prev = NULL; 1060 parent = split; 1061 } 1062 /* Append a new text box. */ 1063 nbox = eqn_box_alloc(ep, parent); 1064 nbox->type = EQN_TEXT; 1065 nbox->text = mandoc_strdup(cpn); 1066 /* Truncate the old box. */ 1067 p = mandoc_strndup(cur->text, 1068 cpn - cur->text); 1069 free(cur->text); 1070 cur->text = p; 1071 /* Setup to process the new box. */ 1072 cur = nbox; 1073 p = nbox->text; 1074 cpn = p - 1; 1075 ccln = CCL_LET; 1076 } 1077 if (split != NULL) 1078 parent = split->parent; 1079 break; 1080 } 1081 break; 1082 default: 1083 abort(); 1084 } 1085 goto next_tok; 1086 } 1087 1088 void 1089 eqn_free(struct eqn_node *p) 1090 { 1091 int i; 1092 1093 for (i = 0; i < (int)p->defsz; i++) { 1094 free(p->defs[i].key); 1095 free(p->defs[i].val); 1096 } 1097 1098 free(p->data); 1099 free(p->defs); 1100 free(p); 1101 } 1102