xref: /openbsd-src/usr.bin/mandoc/eqn.c (revision 9f11ffb7133c203312a01e4b986886bc88c7d74b)
1 /*	$OpenBSD: eqn.c,v 1.46 2018/12/14 06:33:03 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <ctype.h>
22 #include <limits.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <time.h>
27 
28 #include "mandoc_aux.h"
29 #include "mandoc.h"
30 #include "roff.h"
31 #include "eqn.h"
32 #include "libmandoc.h"
33 #include "eqn_parse.h"
34 
35 #define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
36 #define	STRNEQ(p1, sz1, p2, sz2) \
37 	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
38 
39 enum	eqn_tok {
40 	EQN_TOK_DYAD = 0,
41 	EQN_TOK_VEC,
42 	EQN_TOK_UNDER,
43 	EQN_TOK_BAR,
44 	EQN_TOK_TILDE,
45 	EQN_TOK_HAT,
46 	EQN_TOK_DOT,
47 	EQN_TOK_DOTDOT,
48 	EQN_TOK_FWD,
49 	EQN_TOK_BACK,
50 	EQN_TOK_DOWN,
51 	EQN_TOK_UP,
52 	EQN_TOK_FAT,
53 	EQN_TOK_ROMAN,
54 	EQN_TOK_ITALIC,
55 	EQN_TOK_BOLD,
56 	EQN_TOK_SIZE,
57 	EQN_TOK_SUB,
58 	EQN_TOK_SUP,
59 	EQN_TOK_SQRT,
60 	EQN_TOK_OVER,
61 	EQN_TOK_FROM,
62 	EQN_TOK_TO,
63 	EQN_TOK_BRACE_OPEN,
64 	EQN_TOK_BRACE_CLOSE,
65 	EQN_TOK_GSIZE,
66 	EQN_TOK_GFONT,
67 	EQN_TOK_MARK,
68 	EQN_TOK_LINEUP,
69 	EQN_TOK_LEFT,
70 	EQN_TOK_RIGHT,
71 	EQN_TOK_PILE,
72 	EQN_TOK_LPILE,
73 	EQN_TOK_RPILE,
74 	EQN_TOK_CPILE,
75 	EQN_TOK_MATRIX,
76 	EQN_TOK_CCOL,
77 	EQN_TOK_LCOL,
78 	EQN_TOK_RCOL,
79 	EQN_TOK_DELIM,
80 	EQN_TOK_DEFINE,
81 	EQN_TOK_TDEFINE,
82 	EQN_TOK_NDEFINE,
83 	EQN_TOK_UNDEF,
84 	EQN_TOK_ABOVE,
85 	EQN_TOK__MAX,
86 	EQN_TOK_FUNC,
87 	EQN_TOK_QUOTED,
88 	EQN_TOK_SYM,
89 	EQN_TOK_EOF
90 };
91 
92 static	const char *eqn_toks[EQN_TOK__MAX] = {
93 	"dyad", /* EQN_TOK_DYAD */
94 	"vec", /* EQN_TOK_VEC */
95 	"under", /* EQN_TOK_UNDER */
96 	"bar", /* EQN_TOK_BAR */
97 	"tilde", /* EQN_TOK_TILDE */
98 	"hat", /* EQN_TOK_HAT */
99 	"dot", /* EQN_TOK_DOT */
100 	"dotdot", /* EQN_TOK_DOTDOT */
101 	"fwd", /* EQN_TOK_FWD * */
102 	"back", /* EQN_TOK_BACK */
103 	"down", /* EQN_TOK_DOWN */
104 	"up", /* EQN_TOK_UP */
105 	"fat", /* EQN_TOK_FAT */
106 	"roman", /* EQN_TOK_ROMAN */
107 	"italic", /* EQN_TOK_ITALIC */
108 	"bold", /* EQN_TOK_BOLD */
109 	"size", /* EQN_TOK_SIZE */
110 	"sub", /* EQN_TOK_SUB */
111 	"sup", /* EQN_TOK_SUP */
112 	"sqrt", /* EQN_TOK_SQRT */
113 	"over", /* EQN_TOK_OVER */
114 	"from", /* EQN_TOK_FROM */
115 	"to", /* EQN_TOK_TO */
116 	"{", /* EQN_TOK_BRACE_OPEN */
117 	"}", /* EQN_TOK_BRACE_CLOSE */
118 	"gsize", /* EQN_TOK_GSIZE */
119 	"gfont", /* EQN_TOK_GFONT */
120 	"mark", /* EQN_TOK_MARK */
121 	"lineup", /* EQN_TOK_LINEUP */
122 	"left", /* EQN_TOK_LEFT */
123 	"right", /* EQN_TOK_RIGHT */
124 	"pile", /* EQN_TOK_PILE */
125 	"lpile", /* EQN_TOK_LPILE */
126 	"rpile", /* EQN_TOK_RPILE */
127 	"cpile", /* EQN_TOK_CPILE */
128 	"matrix", /* EQN_TOK_MATRIX */
129 	"ccol", /* EQN_TOK_CCOL */
130 	"lcol", /* EQN_TOK_LCOL */
131 	"rcol", /* EQN_TOK_RCOL */
132 	"delim", /* EQN_TOK_DELIM */
133 	"define", /* EQN_TOK_DEFINE */
134 	"tdefine", /* EQN_TOK_TDEFINE */
135 	"ndefine", /* EQN_TOK_NDEFINE */
136 	"undef", /* EQN_TOK_UNDEF */
137 	"above", /* EQN_TOK_ABOVE */
138 };
139 
140 static	const char *const eqn_func[] = {
141 	"acos",	"acsc",	"and",	"arc",	"asec",	"asin", "atan",
142 	"cos",	"cosh", "coth",	"csc",	"det",	"exp",	"for",
143 	"if",	"lim",	"ln",	"log",	"max",	"min",
144 	"sec",	"sin",	"sinh",	"tan",	"tanh",	"Im",	"Re",
145 };
146 
147 enum	eqn_symt {
148 	EQNSYM_alpha = 0,
149 	EQNSYM_beta,
150 	EQNSYM_chi,
151 	EQNSYM_delta,
152 	EQNSYM_epsilon,
153 	EQNSYM_eta,
154 	EQNSYM_gamma,
155 	EQNSYM_iota,
156 	EQNSYM_kappa,
157 	EQNSYM_lambda,
158 	EQNSYM_mu,
159 	EQNSYM_nu,
160 	EQNSYM_omega,
161 	EQNSYM_omicron,
162 	EQNSYM_phi,
163 	EQNSYM_pi,
164 	EQNSYM_ps,
165 	EQNSYM_rho,
166 	EQNSYM_sigma,
167 	EQNSYM_tau,
168 	EQNSYM_theta,
169 	EQNSYM_upsilon,
170 	EQNSYM_xi,
171 	EQNSYM_zeta,
172 	EQNSYM_DELTA,
173 	EQNSYM_GAMMA,
174 	EQNSYM_LAMBDA,
175 	EQNSYM_OMEGA,
176 	EQNSYM_PHI,
177 	EQNSYM_PI,
178 	EQNSYM_PSI,
179 	EQNSYM_SIGMA,
180 	EQNSYM_THETA,
181 	EQNSYM_UPSILON,
182 	EQNSYM_XI,
183 	EQNSYM_inter,
184 	EQNSYM_union,
185 	EQNSYM_prod,
186 	EQNSYM_int,
187 	EQNSYM_sum,
188 	EQNSYM_grad,
189 	EQNSYM_del,
190 	EQNSYM_times,
191 	EQNSYM_cdot,
192 	EQNSYM_nothing,
193 	EQNSYM_approx,
194 	EQNSYM_prime,
195 	EQNSYM_half,
196 	EQNSYM_partial,
197 	EQNSYM_inf,
198 	EQNSYM_muchgreat,
199 	EQNSYM_muchless,
200 	EQNSYM_larrow,
201 	EQNSYM_rarrow,
202 	EQNSYM_pm,
203 	EQNSYM_nequal,
204 	EQNSYM_equiv,
205 	EQNSYM_lessequal,
206 	EQNSYM_moreequal,
207 	EQNSYM_minus,
208 	EQNSYM__MAX
209 };
210 
211 struct	eqnsym {
212 	const char	*str;
213 	const char	*sym;
214 };
215 
216 static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
217 	{ "alpha", "*a" }, /* EQNSYM_alpha */
218 	{ "beta", "*b" }, /* EQNSYM_beta */
219 	{ "chi", "*x" }, /* EQNSYM_chi */
220 	{ "delta", "*d" }, /* EQNSYM_delta */
221 	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
222 	{ "eta", "*y" }, /* EQNSYM_eta */
223 	{ "gamma", "*g" }, /* EQNSYM_gamma */
224 	{ "iota", "*i" }, /* EQNSYM_iota */
225 	{ "kappa", "*k" }, /* EQNSYM_kappa */
226 	{ "lambda", "*l" }, /* EQNSYM_lambda */
227 	{ "mu", "*m" }, /* EQNSYM_mu */
228 	{ "nu", "*n" }, /* EQNSYM_nu */
229 	{ "omega", "*w" }, /* EQNSYM_omega */
230 	{ "omicron", "*o" }, /* EQNSYM_omicron */
231 	{ "phi", "*f" }, /* EQNSYM_phi */
232 	{ "pi", "*p" }, /* EQNSYM_pi */
233 	{ "psi", "*q" }, /* EQNSYM_psi */
234 	{ "rho", "*r" }, /* EQNSYM_rho */
235 	{ "sigma", "*s" }, /* EQNSYM_sigma */
236 	{ "tau", "*t" }, /* EQNSYM_tau */
237 	{ "theta", "*h" }, /* EQNSYM_theta */
238 	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
239 	{ "xi", "*c" }, /* EQNSYM_xi */
240 	{ "zeta", "*z" }, /* EQNSYM_zeta */
241 	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
242 	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
243 	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
244 	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
245 	{ "PHI", "*F" }, /* EQNSYM_PHI */
246 	{ "PI", "*P" }, /* EQNSYM_PI */
247 	{ "PSI", "*Q" }, /* EQNSYM_PSI */
248 	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
249 	{ "THETA", "*H" }, /* EQNSYM_THETA */
250 	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
251 	{ "XI", "*C" }, /* EQNSYM_XI */
252 	{ "inter", "ca" }, /* EQNSYM_inter */
253 	{ "union", "cu" }, /* EQNSYM_union */
254 	{ "prod", "product" }, /* EQNSYM_prod */
255 	{ "int", "integral" }, /* EQNSYM_int */
256 	{ "sum", "sum" }, /* EQNSYM_sum */
257 	{ "grad", "gr" }, /* EQNSYM_grad */
258 	{ "del", "gr" }, /* EQNSYM_del */
259 	{ "times", "mu" }, /* EQNSYM_times */
260 	{ "cdot", "pc" }, /* EQNSYM_cdot */
261 	{ "nothing", "&" }, /* EQNSYM_nothing */
262 	{ "approx", "~~" }, /* EQNSYM_approx */
263 	{ "prime", "fm" }, /* EQNSYM_prime */
264 	{ "half", "12" }, /* EQNSYM_half */
265 	{ "partial", "pd" }, /* EQNSYM_partial */
266 	{ "inf", "if" }, /* EQNSYM_inf */
267 	{ ">>", ">>" }, /* EQNSYM_muchgreat */
268 	{ "<<", "<<" }, /* EQNSYM_muchless */
269 	{ "<-", "<-" }, /* EQNSYM_larrow */
270 	{ "->", "->" }, /* EQNSYM_rarrow */
271 	{ "+-", "+-" }, /* EQNSYM_pm */
272 	{ "!=", "!=" }, /* EQNSYM_nequal */
273 	{ "==", "==" }, /* EQNSYM_equiv */
274 	{ "<=", "<=" }, /* EQNSYM_lessequal */
275 	{ ">=", ">=" }, /* EQNSYM_moreequal */
276 	{ "-", "mi" }, /* EQNSYM_minus */
277 };
278 
279 enum	parse_mode {
280 	MODE_QUOTED,
281 	MODE_NOSUB,
282 	MODE_SUB,
283 	MODE_TOK
284 };
285 
286 struct	eqn_def {
287 	char		 *key;
288 	size_t		  keysz;
289 	char		 *val;
290 	size_t		  valsz;
291 };
292 
293 static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
294 static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
295 				struct eqn_box *);
296 static	void		 eqn_def(struct eqn_node *);
297 static	struct eqn_def	*eqn_def_find(struct eqn_node *);
298 static	void		 eqn_delim(struct eqn_node *);
299 static	enum eqn_tok	 eqn_next(struct eqn_node *, enum parse_mode);
300 static	void		 eqn_undef(struct eqn_node *);
301 
302 
303 struct eqn_node *
304 eqn_alloc(void)
305 {
306 	struct eqn_node *ep;
307 
308 	ep = mandoc_calloc(1, sizeof(*ep));
309 	ep->gsize = EQN_DEFSIZE;
310 	return ep;
311 }
312 
313 void
314 eqn_reset(struct eqn_node *ep)
315 {
316 	free(ep->data);
317 	ep->data = ep->start = ep->end = NULL;
318 	ep->sz = ep->toksz = 0;
319 }
320 
321 void
322 eqn_read(struct eqn_node *ep, const char *p)
323 {
324 	char		*cp;
325 
326 	if (ep->data == NULL) {
327 		ep->sz = strlen(p);
328 		ep->data = mandoc_strdup(p);
329 	} else {
330 		ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p);
331 		free(ep->data);
332 		ep->data = cp;
333 	}
334 	ep->sz += 1;
335 }
336 
337 /*
338  * Find the key "key" of the give size within our eqn-defined values.
339  */
340 static struct eqn_def *
341 eqn_def_find(struct eqn_node *ep)
342 {
343 	int		 i;
344 
345 	for (i = 0; i < (int)ep->defsz; i++)
346 		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
347 		    ep->defs[i].keysz, ep->start, ep->toksz))
348 			return &ep->defs[i];
349 
350 	return NULL;
351 }
352 
353 /*
354  * Parse a token from the input text.  The modes are:
355  * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
356  *   before its next occurence.  Do not interpret the token in any
357  *   way and return EQN_TOK_QUOTED.  All other modes behave like
358  *   MODE_QUOTED when *ep->start is '"'.
359  * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
360  *   otherwise, it ends before the next whitespace or brace.
361  *   Do not interpret the token and return EQN_TOK__MAX.
362  * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
363  *   alias created with define.  If it is an alias, replace it with
364  *   its string value and reparse.
365  * MODE_TOK: Like MODE_SUB, but also check the token against the list
366  *   of tokens, and if there is a match, return that token.  Otherwise,
367  *   if the token matches a symbol, return EQN_TOK_SYM; if it matches
368  *   a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX.  Except for
369  *   a token match, *ep->start is set to an allocated string that the
370  *   caller is expected to free.
371  * All modes skip whitespace following the end of the token.
372  */
373 static enum eqn_tok
374 eqn_next(struct eqn_node *ep, enum parse_mode mode)
375 {
376 	static int	 last_len, lim;
377 
378 	struct eqn_def	*def;
379 	size_t		 start;
380 	int		 diff, i, quoted;
381 	enum eqn_tok	 tok;
382 
383 	/*
384 	 * Reset the recursion counter after advancing
385 	 * beyond the end of the previous substitution.
386 	 */
387 	if (ep->end - ep->data >= last_len)
388 		lim = 0;
389 
390 	ep->start = ep->end;
391 	quoted = mode == MODE_QUOTED;
392 	for (;;) {
393 		switch (*ep->start) {
394 		case '\0':
395 			ep->toksz = 0;
396 			return EQN_TOK_EOF;
397 		case '"':
398 			quoted = 1;
399 			break;
400 		default:
401 			break;
402 		}
403 		if (quoted) {
404 			ep->end = strchr(ep->start + 1, *ep->start);
405 			ep->start++;  /* Skip opening quote. */
406 			if (ep->end == NULL) {
407 				mandoc_msg(MANDOCERR_ARG_QUOTE,
408 				    ep->node->line, ep->node->pos, NULL);
409 				ep->end = strchr(ep->start, '\0');
410 			}
411 		} else {
412 			ep->end = ep->start + 1;
413 			if (*ep->start != '{' && *ep->start != '}')
414 				ep->end += strcspn(ep->end, " ^~\"{}\t");
415 		}
416 		ep->toksz = ep->end - ep->start;
417 		if (quoted && *ep->end != '\0')
418 			ep->end++;  /* Skip closing quote. */
419 		while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
420 			ep->end++;
421 		if (quoted)  /* Cannot return, may have to strndup. */
422 			break;
423 		if (mode == MODE_NOSUB)
424 			return EQN_TOK__MAX;
425 		if ((def = eqn_def_find(ep)) == NULL)
426 			break;
427 		if (++lim > EQN_NEST_MAX) {
428 			mandoc_msg(MANDOCERR_ROFFLOOP,
429 			    ep->node->line, ep->node->pos, NULL);
430 			return EQN_TOK_EOF;
431 		}
432 
433 		/* Replace a defined name with its string value. */
434 		if ((diff = def->valsz - ep->toksz) > 0) {
435 			start = ep->start - ep->data;
436 			ep->sz += diff;
437 			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
438 			ep->start = ep->data + start;
439 		}
440 		if (diff)
441 			memmove(ep->start + def->valsz, ep->start + ep->toksz,
442 			    strlen(ep->start + ep->toksz) + 1);
443 		memcpy(ep->start, def->val, def->valsz);
444 		last_len = ep->start - ep->data + def->valsz;
445 	}
446 	if (mode != MODE_TOK)
447 		return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
448 	if (quoted) {
449 		ep->start = mandoc_strndup(ep->start, ep->toksz);
450 		return EQN_TOK_QUOTED;
451 	}
452 	for (tok = 0; tok < EQN_TOK__MAX; tok++)
453 		if (STRNEQ(ep->start, ep->toksz,
454 		    eqn_toks[tok], strlen(eqn_toks[tok])))
455 			return tok;
456 
457 	for (i = 0; i < EQNSYM__MAX; i++) {
458 		if (STRNEQ(ep->start, ep->toksz,
459 		    eqnsyms[i].str, strlen(eqnsyms[i].str))) {
460 			mandoc_asprintf(&ep->start,
461 			    "\\[%s]", eqnsyms[i].sym);
462 			return EQN_TOK_SYM;
463 		}
464 	}
465 	ep->start = mandoc_strndup(ep->start, ep->toksz);
466 	for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
467 		if (STRNEQ(ep->start, ep->toksz,
468 		    eqn_func[i], strlen(eqn_func[i])))
469 			return EQN_TOK_FUNC;
470 	return EQN_TOK__MAX;
471 }
472 
473 void
474 eqn_box_free(struct eqn_box *bp)
475 {
476 	if (bp == NULL)
477 		return;
478 
479 	if (bp->first)
480 		eqn_box_free(bp->first);
481 	if (bp->next)
482 		eqn_box_free(bp->next);
483 
484 	free(bp->text);
485 	free(bp->left);
486 	free(bp->right);
487 	free(bp->top);
488 	free(bp->bottom);
489 	free(bp);
490 }
491 
492 struct eqn_box *
493 eqn_box_new(void)
494 {
495 	struct eqn_box	*bp;
496 
497 	bp = mandoc_calloc(1, sizeof(*bp));
498 	bp->expectargs = UINT_MAX;
499 	return bp;
500 }
501 
502 /*
503  * Allocate a box as the last child of the parent node.
504  */
505 static struct eqn_box *
506 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
507 {
508 	struct eqn_box	*bp;
509 
510 	bp = eqn_box_new();
511 	bp->parent = parent;
512 	bp->parent->args++;
513 	bp->font = bp->parent->font;
514 	bp->size = ep->gsize;
515 
516 	if (NULL != parent->first) {
517 		parent->last->next = bp;
518 		bp->prev = parent->last;
519 	} else
520 		parent->first = bp;
521 
522 	parent->last = bp;
523 	return bp;
524 }
525 
526 /*
527  * Reparent the current last node (of the current parent) under a new
528  * EQN_SUBEXPR as the first element.
529  * Then return the new parent.
530  * The new EQN_SUBEXPR will have a two-child limit.
531  */
532 static struct eqn_box *
533 eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent)
534 {
535 	struct eqn_box	*b, *newb;
536 
537 	assert(NULL != parent->last);
538 	b = parent->last;
539 	if (parent->last == parent->first)
540 		parent->first = NULL;
541 	parent->args--;
542 	parent->last = b->prev;
543 	b->prev = NULL;
544 	newb = eqn_box_alloc(ep, parent);
545 	newb->type = EQN_SUBEXPR;
546 	newb->expectargs = 2;
547 	newb->args = 1;
548 	newb->first = newb->last = b;
549 	newb->first->next = NULL;
550 	b->parent = newb;
551 	return newb;
552 }
553 
554 /*
555  * Parse the "delim" control statement.
556  */
557 static void
558 eqn_delim(struct eqn_node *ep)
559 {
560 	if (ep->end[0] == '\0' || ep->end[1] == '\0') {
561 		mandoc_msg(MANDOCERR_REQ_EMPTY,
562 		    ep->node->line, ep->node->pos, "delim");
563 		if (ep->end[0] != '\0')
564 			ep->end++;
565 	} else if (strncmp(ep->end, "off", 3) == 0) {
566 		ep->delim = 0;
567 		ep->end += 3;
568 	} else if (strncmp(ep->end, "on", 2) == 0) {
569 		if (ep->odelim && ep->cdelim)
570 			ep->delim = 1;
571 		ep->end += 2;
572 	} else {
573 		ep->odelim = *ep->end++;
574 		ep->cdelim = *ep->end++;
575 		ep->delim = 1;
576 	}
577 }
578 
579 /*
580  * Undefine a previously-defined string.
581  */
582 static void
583 eqn_undef(struct eqn_node *ep)
584 {
585 	struct eqn_def	*def;
586 
587 	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
588 		mandoc_msg(MANDOCERR_REQ_EMPTY,
589 		    ep->node->line, ep->node->pos, "undef");
590 		return;
591 	}
592 	if ((def = eqn_def_find(ep)) == NULL)
593 		return;
594 	free(def->key);
595 	free(def->val);
596 	def->key = def->val = NULL;
597 	def->keysz = def->valsz = 0;
598 }
599 
600 static void
601 eqn_def(struct eqn_node *ep)
602 {
603 	struct eqn_def	*def;
604 	int		 i;
605 
606 	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
607 		mandoc_msg(MANDOCERR_REQ_EMPTY,
608 		    ep->node->line, ep->node->pos, "define");
609 		return;
610 	}
611 
612 	/*
613 	 * Search for a key that already exists.
614 	 * Create a new key if none is found.
615 	 */
616 	if ((def = eqn_def_find(ep)) == NULL) {
617 		/* Find holes in string array. */
618 		for (i = 0; i < (int)ep->defsz; i++)
619 			if (0 == ep->defs[i].keysz)
620 				break;
621 
622 		if (i == (int)ep->defsz) {
623 			ep->defsz++;
624 			ep->defs = mandoc_reallocarray(ep->defs,
625 			    ep->defsz, sizeof(struct eqn_def));
626 			ep->defs[i].key = ep->defs[i].val = NULL;
627 		}
628 
629 		def = ep->defs + i;
630 		free(def->key);
631 		def->key = mandoc_strndup(ep->start, ep->toksz);
632 		def->keysz = ep->toksz;
633 	}
634 
635 	if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
636 		mandoc_msg(MANDOCERR_REQ_EMPTY,
637 		    ep->node->line, ep->node->pos, "define %s", def->key);
638 		free(def->key);
639 		free(def->val);
640 		def->key = def->val = NULL;
641 		def->keysz = def->valsz = 0;
642 		return;
643 	}
644 	free(def->val);
645 	def->val = mandoc_strndup(ep->start, ep->toksz);
646 	def->valsz = ep->toksz;
647 }
648 
649 void
650 eqn_parse(struct eqn_node *ep)
651 {
652 	struct eqn_box	*cur, *nbox, *parent, *split;
653 	const char	*cp, *cpn;
654 	char		*p;
655 	enum eqn_tok	 tok;
656 	enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
657 	int		 size;
658 
659 	parent = ep->node->eqn;
660 	assert(parent != NULL);
661 
662 	/*
663 	 * Empty equation.
664 	 * Do not add it to the high-level syntax tree.
665 	 */
666 
667 	if (ep->data == NULL)
668 		return;
669 
670 	ep->start = ep->end = ep->data + strspn(ep->data, " ^~");
671 
672 next_tok:
673 	tok = eqn_next(ep, MODE_TOK);
674 	switch (tok) {
675 	case EQN_TOK_UNDEF:
676 		eqn_undef(ep);
677 		break;
678 	case EQN_TOK_NDEFINE:
679 	case EQN_TOK_DEFINE:
680 		eqn_def(ep);
681 		break;
682 	case EQN_TOK_TDEFINE:
683 		if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
684 		    eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
685 			mandoc_msg(MANDOCERR_REQ_EMPTY,
686 			    ep->node->line, ep->node->pos, "tdefine");
687 		break;
688 	case EQN_TOK_DELIM:
689 		eqn_delim(ep);
690 		break;
691 	case EQN_TOK_GFONT:
692 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
693 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
694 			    ep->node->pos, "%s", eqn_toks[tok]);
695 		break;
696 	case EQN_TOK_MARK:
697 	case EQN_TOK_LINEUP:
698 		/* Ignore these. */
699 		break;
700 	case EQN_TOK_DYAD:
701 	case EQN_TOK_VEC:
702 	case EQN_TOK_UNDER:
703 	case EQN_TOK_BAR:
704 	case EQN_TOK_TILDE:
705 	case EQN_TOK_HAT:
706 	case EQN_TOK_DOT:
707 	case EQN_TOK_DOTDOT:
708 		if (parent->last == NULL) {
709 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
710 			    ep->node->pos, "%s", eqn_toks[tok]);
711 			cur = eqn_box_alloc(ep, parent);
712 			cur->type = EQN_TEXT;
713 			cur->text = mandoc_strdup("");
714 		}
715 		parent = eqn_box_makebinary(ep, parent);
716 		parent->type = EQN_LIST;
717 		parent->expectargs = 1;
718 		parent->font = EQNFONT_ROMAN;
719 		switch (tok) {
720 		case EQN_TOK_DOTDOT:
721 			parent->top = mandoc_strdup("\\[ad]");
722 			break;
723 		case EQN_TOK_VEC:
724 			parent->top = mandoc_strdup("\\[->]");
725 			break;
726 		case EQN_TOK_DYAD:
727 			parent->top = mandoc_strdup("\\[<>]");
728 			break;
729 		case EQN_TOK_TILDE:
730 			parent->top = mandoc_strdup("\\[a~]");
731 			break;
732 		case EQN_TOK_UNDER:
733 			parent->bottom = mandoc_strdup("\\[ul]");
734 			break;
735 		case EQN_TOK_BAR:
736 			parent->top = mandoc_strdup("\\[rn]");
737 			break;
738 		case EQN_TOK_DOT:
739 			parent->top = mandoc_strdup("\\[a.]");
740 			break;
741 		case EQN_TOK_HAT:
742 			parent->top = mandoc_strdup("\\[ha]");
743 			break;
744 		default:
745 			abort();
746 		}
747 		parent = parent->parent;
748 		break;
749 	case EQN_TOK_FWD:
750 	case EQN_TOK_BACK:
751 	case EQN_TOK_DOWN:
752 	case EQN_TOK_UP:
753 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
754 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
755 			    ep->node->pos, "%s", eqn_toks[tok]);
756 		break;
757 	case EQN_TOK_FAT:
758 	case EQN_TOK_ROMAN:
759 	case EQN_TOK_ITALIC:
760 	case EQN_TOK_BOLD:
761 		while (parent->args == parent->expectargs)
762 			parent = parent->parent;
763 		/*
764 		 * These values apply to the next word or sequence of
765 		 * words; thus, we mark that we'll have a child with
766 		 * exactly one of those.
767 		 */
768 		parent = eqn_box_alloc(ep, parent);
769 		parent->type = EQN_LIST;
770 		parent->expectargs = 1;
771 		switch (tok) {
772 		case EQN_TOK_FAT:
773 			parent->font = EQNFONT_FAT;
774 			break;
775 		case EQN_TOK_ROMAN:
776 			parent->font = EQNFONT_ROMAN;
777 			break;
778 		case EQN_TOK_ITALIC:
779 			parent->font = EQNFONT_ITALIC;
780 			break;
781 		case EQN_TOK_BOLD:
782 			parent->font = EQNFONT_BOLD;
783 			break;
784 		default:
785 			abort();
786 		}
787 		break;
788 	case EQN_TOK_SIZE:
789 	case EQN_TOK_GSIZE:
790 		/* Accept two values: integral size and a single. */
791 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
792 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
793 			    ep->node->pos, "%s", eqn_toks[tok]);
794 			break;
795 		}
796 		size = mandoc_strntoi(ep->start, ep->toksz, 10);
797 		if (-1 == size) {
798 			mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line,
799 			    ep->node->pos, "%s", eqn_toks[tok]);
800 			break;
801 		}
802 		if (EQN_TOK_GSIZE == tok) {
803 			ep->gsize = size;
804 			break;
805 		}
806 		while (parent->args == parent->expectargs)
807 			parent = parent->parent;
808 		parent = eqn_box_alloc(ep, parent);
809 		parent->type = EQN_LIST;
810 		parent->expectargs = 1;
811 		parent->size = size;
812 		break;
813 	case EQN_TOK_FROM:
814 	case EQN_TOK_TO:
815 	case EQN_TOK_SUB:
816 	case EQN_TOK_SUP:
817 		/*
818 		 * We have a left-right-associative expression.
819 		 * Repivot under a positional node, open a child scope
820 		 * and keep on reading.
821 		 */
822 		if (parent->last == NULL) {
823 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
824 			    ep->node->pos, "%s", eqn_toks[tok]);
825 			cur = eqn_box_alloc(ep, parent);
826 			cur->type = EQN_TEXT;
827 			cur->text = mandoc_strdup("");
828 		}
829 		while (parent->expectargs == 1 && parent->args == 1)
830 			parent = parent->parent;
831 		if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO)  {
832 			for (cur = parent; cur != NULL; cur = cur->parent)
833 				if (cur->pos == EQNPOS_SUB ||
834 				    cur->pos == EQNPOS_SUP ||
835 				    cur->pos == EQNPOS_SUBSUP ||
836 				    cur->pos == EQNPOS_SQRT ||
837 				    cur->pos == EQNPOS_OVER)
838 					break;
839 			if (cur != NULL)
840 				parent = cur->parent;
841 		}
842 		if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) {
843 			parent->expectargs = 3;
844 			parent->pos = EQNPOS_SUBSUP;
845 			break;
846 		}
847 		if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) {
848 			parent->expectargs = 3;
849 			parent->pos = EQNPOS_FROMTO;
850 			break;
851 		}
852 		parent = eqn_box_makebinary(ep, parent);
853 		switch (tok) {
854 		case EQN_TOK_FROM:
855 			parent->pos = EQNPOS_FROM;
856 			break;
857 		case EQN_TOK_TO:
858 			parent->pos = EQNPOS_TO;
859 			break;
860 		case EQN_TOK_SUP:
861 			parent->pos = EQNPOS_SUP;
862 			break;
863 		case EQN_TOK_SUB:
864 			parent->pos = EQNPOS_SUB;
865 			break;
866 		default:
867 			abort();
868 		}
869 		break;
870 	case EQN_TOK_SQRT:
871 		while (parent->args == parent->expectargs)
872 			parent = parent->parent;
873 		/*
874 		 * Accept a left-right-associative set of arguments just
875 		 * like sub and sup and friends but without rebalancing
876 		 * under a pivot.
877 		 */
878 		parent = eqn_box_alloc(ep, parent);
879 		parent->type = EQN_SUBEXPR;
880 		parent->pos = EQNPOS_SQRT;
881 		parent->expectargs = 1;
882 		break;
883 	case EQN_TOK_OVER:
884 		/*
885 		 * We have a right-left-associative fraction.
886 		 * Close out anything that's currently open, then
887 		 * rebalance and continue reading.
888 		 */
889 		if (parent->last == NULL) {
890 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
891 			    ep->node->pos, "%s", eqn_toks[tok]);
892 			cur = eqn_box_alloc(ep, parent);
893 			cur->type = EQN_TEXT;
894 			cur->text = mandoc_strdup("");
895 		}
896 		while (parent->args == parent->expectargs)
897 			parent = parent->parent;
898 		while (EQN_SUBEXPR == parent->type)
899 			parent = parent->parent;
900 		parent = eqn_box_makebinary(ep, parent);
901 		parent->pos = EQNPOS_OVER;
902 		break;
903 	case EQN_TOK_RIGHT:
904 	case EQN_TOK_BRACE_CLOSE:
905 		/*
906 		 * Close out the existing brace.
907 		 * FIXME: this is a shitty sentinel: we should really
908 		 * have a native EQN_BRACE type or whatnot.
909 		 */
910 		for (cur = parent; cur != NULL; cur = cur->parent)
911 			if (cur->type == EQN_LIST &&
912 			    cur->expectargs > 1 &&
913 			    (tok == EQN_TOK_BRACE_CLOSE ||
914 			     cur->left != NULL))
915 				break;
916 		if (cur == NULL) {
917 			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line,
918 			    ep->node->pos, "%s", eqn_toks[tok]);
919 			break;
920 		}
921 		parent = cur;
922 		if (EQN_TOK_RIGHT == tok) {
923 			if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
924 				mandoc_msg(MANDOCERR_REQ_EMPTY,
925 				    ep->node->line, ep->node->pos,
926 				    "%s", eqn_toks[tok]);
927 				break;
928 			}
929 			/* Handling depends on right/left. */
930 			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
931 				parent->right = mandoc_strdup("\\[rc]");
932 			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
933 				parent->right = mandoc_strdup("\\[rf]");
934 			else
935 				parent->right =
936 				    mandoc_strndup(ep->start, ep->toksz);
937 		}
938 		parent = parent->parent;
939 		if (tok == EQN_TOK_BRACE_CLOSE &&
940 		    (parent->type == EQN_PILE ||
941 		     parent->type == EQN_MATRIX))
942 			parent = parent->parent;
943 		/* Close out any "singleton" lists. */
944 		while (parent->type == EQN_LIST &&
945 		    parent->expectargs == 1 &&
946 		    parent->args == 1)
947 			parent = parent->parent;
948 		break;
949 	case EQN_TOK_BRACE_OPEN:
950 	case EQN_TOK_LEFT:
951 		/*
952 		 * If we already have something in the stack and we're
953 		 * in an expression, then rewind til we're not any more
954 		 * (just like with the text node).
955 		 */
956 		while (parent->args == parent->expectargs)
957 			parent = parent->parent;
958 		if (EQN_TOK_LEFT == tok &&
959 		    eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
960 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
961 			    ep->node->pos, "%s", eqn_toks[tok]);
962 			break;
963 		}
964 		parent = eqn_box_alloc(ep, parent);
965 		parent->type = EQN_LIST;
966 		if (EQN_TOK_LEFT == tok) {
967 			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
968 				parent->left = mandoc_strdup("\\[lc]");
969 			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
970 				parent->left = mandoc_strdup("\\[lf]");
971 			else
972 				parent->left =
973 				    mandoc_strndup(ep->start, ep->toksz);
974 		}
975 		break;
976 	case EQN_TOK_PILE:
977 	case EQN_TOK_LPILE:
978 	case EQN_TOK_RPILE:
979 	case EQN_TOK_CPILE:
980 	case EQN_TOK_CCOL:
981 	case EQN_TOK_LCOL:
982 	case EQN_TOK_RCOL:
983 		while (parent->args == parent->expectargs)
984 			parent = parent->parent;
985 		parent = eqn_box_alloc(ep, parent);
986 		parent->type = EQN_PILE;
987 		parent->expectargs = 1;
988 		break;
989 	case EQN_TOK_ABOVE:
990 		for (cur = parent; cur != NULL; cur = cur->parent)
991 			if (cur->type == EQN_PILE)
992 				break;
993 		if (cur == NULL) {
994 			mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line,
995 			    ep->node->pos, "%s", eqn_toks[tok]);
996 			break;
997 		}
998 		parent = eqn_box_alloc(ep, cur);
999 		parent->type = EQN_LIST;
1000 		break;
1001 	case EQN_TOK_MATRIX:
1002 		while (parent->args == parent->expectargs)
1003 			parent = parent->parent;
1004 		parent = eqn_box_alloc(ep, parent);
1005 		parent->type = EQN_MATRIX;
1006 		parent->expectargs = 1;
1007 		break;
1008 	case EQN_TOK_EOF:
1009 		return;
1010 	case EQN_TOK__MAX:
1011 	case EQN_TOK_FUNC:
1012 	case EQN_TOK_QUOTED:
1013 	case EQN_TOK_SYM:
1014 		p = ep->start;
1015 		assert(p != NULL);
1016 		/*
1017 		 * If we already have something in the stack and we're
1018 		 * in an expression, then rewind til we're not any more.
1019 		 */
1020 		while (parent->args == parent->expectargs)
1021 			parent = parent->parent;
1022 		cur = eqn_box_alloc(ep, parent);
1023 		cur->type = EQN_TEXT;
1024 		cur->text = p;
1025 		switch (tok) {
1026 		case EQN_TOK_FUNC:
1027 			cur->font = EQNFONT_ROMAN;
1028 			break;
1029 		case EQN_TOK_QUOTED:
1030 			if (cur->font == EQNFONT_NONE)
1031 				cur->font = EQNFONT_ITALIC;
1032 			break;
1033 		case EQN_TOK_SYM:
1034 			break;
1035 		default:
1036 			if (cur->font != EQNFONT_NONE || *p == '\0')
1037 				break;
1038 			cpn = p - 1;
1039 			ccln = CCL_LET;
1040 			split = NULL;
1041 			for (;;) {
1042 				/* Advance to next character. */
1043 				cp = cpn++;
1044 				ccl = ccln;
1045 				ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
1046 				    isdigit((unsigned char)*cpn) ||
1047 				    (*cpn == '.' && (ccl == CCL_DIG ||
1048 				     isdigit((unsigned char)cpn[1]))) ?
1049 				    CCL_DIG : CCL_PUN;
1050 				/* No boundary before first character. */
1051 				if (cp < p)
1052 					continue;
1053 				cur->font = ccl == CCL_LET ?
1054 				    EQNFONT_ITALIC : EQNFONT_ROMAN;
1055 				if (*cp == '\\')
1056 					mandoc_escape(&cpn, NULL, NULL);
1057 				/* No boundary after last character. */
1058 				if (*cpn == '\0')
1059 					break;
1060 				if (ccln == ccl && *cp != ',' && *cpn != ',')
1061 					continue;
1062 				/* Boundary found, split the text. */
1063 				if (parent->args == parent->expectargs) {
1064 					/* Remove the text from the tree. */
1065 					if (cur->prev == NULL)
1066 						parent->first = cur->next;
1067 					else
1068 						cur->prev->next = NULL;
1069 					parent->last = cur->prev;
1070 					parent->args--;
1071 					/* Set up a list instead. */
1072 					split = eqn_box_alloc(ep, parent);
1073 					split->type = EQN_LIST;
1074 					/* Insert the word into the list. */
1075 					split->first = split->last = cur;
1076 					cur->parent = split;
1077 					cur->prev = NULL;
1078 					parent = split;
1079 				}
1080 				/* Append a new text box. */
1081 				nbox = eqn_box_alloc(ep, parent);
1082 				nbox->type = EQN_TEXT;
1083 				nbox->text = mandoc_strdup(cpn);
1084 				/* Truncate the old box. */
1085 				p = mandoc_strndup(cur->text,
1086 				    cpn - cur->text);
1087 				free(cur->text);
1088 				cur->text = p;
1089 				/* Setup to process the new box. */
1090 				cur = nbox;
1091 				p = nbox->text;
1092 				cpn = p - 1;
1093 				ccln = CCL_LET;
1094 			}
1095 			if (split != NULL)
1096 				parent = split->parent;
1097 			break;
1098 		}
1099 		break;
1100 	default:
1101 		abort();
1102 	}
1103 	goto next_tok;
1104 }
1105 
1106 void
1107 eqn_free(struct eqn_node *p)
1108 {
1109 	int		 i;
1110 
1111 	if (p == NULL)
1112 		return;
1113 
1114 	for (i = 0; i < (int)p->defsz; i++) {
1115 		free(p->defs[i].key);
1116 		free(p->defs[i].val);
1117 	}
1118 
1119 	free(p->data);
1120 	free(p->defs);
1121 	free(p);
1122 }
1123