xref: /openbsd-src/usr.bin/mandoc/eqn.c (revision 897fc685943471cf985a0fe38ba076ea6fe74fa5)
1 /*	$OpenBSD: eqn.c,v 1.41 2017/07/15 15:06:31 bentley Exp $ */
2 /*
3  * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <ctype.h>
22 #include <limits.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <time.h>
27 
28 #include "mandoc_aux.h"
29 #include "mandoc.h"
30 #include "roff.h"
31 #include "libmandoc.h"
32 #include "libroff.h"
33 
34 #define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
35 #define	STRNEQ(p1, sz1, p2, sz2) \
36 	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
37 
38 enum	eqn_tok {
39 	EQN_TOK_DYAD = 0,
40 	EQN_TOK_VEC,
41 	EQN_TOK_UNDER,
42 	EQN_TOK_BAR,
43 	EQN_TOK_TILDE,
44 	EQN_TOK_HAT,
45 	EQN_TOK_DOT,
46 	EQN_TOK_DOTDOT,
47 	EQN_TOK_FWD,
48 	EQN_TOK_BACK,
49 	EQN_TOK_DOWN,
50 	EQN_TOK_UP,
51 	EQN_TOK_FAT,
52 	EQN_TOK_ROMAN,
53 	EQN_TOK_ITALIC,
54 	EQN_TOK_BOLD,
55 	EQN_TOK_SIZE,
56 	EQN_TOK_SUB,
57 	EQN_TOK_SUP,
58 	EQN_TOK_SQRT,
59 	EQN_TOK_OVER,
60 	EQN_TOK_FROM,
61 	EQN_TOK_TO,
62 	EQN_TOK_BRACE_OPEN,
63 	EQN_TOK_BRACE_CLOSE,
64 	EQN_TOK_GSIZE,
65 	EQN_TOK_GFONT,
66 	EQN_TOK_MARK,
67 	EQN_TOK_LINEUP,
68 	EQN_TOK_LEFT,
69 	EQN_TOK_RIGHT,
70 	EQN_TOK_PILE,
71 	EQN_TOK_LPILE,
72 	EQN_TOK_RPILE,
73 	EQN_TOK_CPILE,
74 	EQN_TOK_MATRIX,
75 	EQN_TOK_CCOL,
76 	EQN_TOK_LCOL,
77 	EQN_TOK_RCOL,
78 	EQN_TOK_DELIM,
79 	EQN_TOK_DEFINE,
80 	EQN_TOK_TDEFINE,
81 	EQN_TOK_NDEFINE,
82 	EQN_TOK_UNDEF,
83 	EQN_TOK_ABOVE,
84 	EQN_TOK__MAX,
85 	EQN_TOK_FUNC,
86 	EQN_TOK_QUOTED,
87 	EQN_TOK_SYM,
88 	EQN_TOK_EOF
89 };
90 
91 static	const char *eqn_toks[EQN_TOK__MAX] = {
92 	"dyad", /* EQN_TOK_DYAD */
93 	"vec", /* EQN_TOK_VEC */
94 	"under", /* EQN_TOK_UNDER */
95 	"bar", /* EQN_TOK_BAR */
96 	"tilde", /* EQN_TOK_TILDE */
97 	"hat", /* EQN_TOK_HAT */
98 	"dot", /* EQN_TOK_DOT */
99 	"dotdot", /* EQN_TOK_DOTDOT */
100 	"fwd", /* EQN_TOK_FWD * */
101 	"back", /* EQN_TOK_BACK */
102 	"down", /* EQN_TOK_DOWN */
103 	"up", /* EQN_TOK_UP */
104 	"fat", /* EQN_TOK_FAT */
105 	"roman", /* EQN_TOK_ROMAN */
106 	"italic", /* EQN_TOK_ITALIC */
107 	"bold", /* EQN_TOK_BOLD */
108 	"size", /* EQN_TOK_SIZE */
109 	"sub", /* EQN_TOK_SUB */
110 	"sup", /* EQN_TOK_SUP */
111 	"sqrt", /* EQN_TOK_SQRT */
112 	"over", /* EQN_TOK_OVER */
113 	"from", /* EQN_TOK_FROM */
114 	"to", /* EQN_TOK_TO */
115 	"{", /* EQN_TOK_BRACE_OPEN */
116 	"}", /* EQN_TOK_BRACE_CLOSE */
117 	"gsize", /* EQN_TOK_GSIZE */
118 	"gfont", /* EQN_TOK_GFONT */
119 	"mark", /* EQN_TOK_MARK */
120 	"lineup", /* EQN_TOK_LINEUP */
121 	"left", /* EQN_TOK_LEFT */
122 	"right", /* EQN_TOK_RIGHT */
123 	"pile", /* EQN_TOK_PILE */
124 	"lpile", /* EQN_TOK_LPILE */
125 	"rpile", /* EQN_TOK_RPILE */
126 	"cpile", /* EQN_TOK_CPILE */
127 	"matrix", /* EQN_TOK_MATRIX */
128 	"ccol", /* EQN_TOK_CCOL */
129 	"lcol", /* EQN_TOK_LCOL */
130 	"rcol", /* EQN_TOK_RCOL */
131 	"delim", /* EQN_TOK_DELIM */
132 	"define", /* EQN_TOK_DEFINE */
133 	"tdefine", /* EQN_TOK_TDEFINE */
134 	"ndefine", /* EQN_TOK_NDEFINE */
135 	"undef", /* EQN_TOK_UNDEF */
136 	"above", /* EQN_TOK_ABOVE */
137 };
138 
139 static	const char *const eqn_func[] = {
140 	"acos",	"acsc",	"and",	"arc",	"asec",	"asin", "atan",
141 	"cos",	"cosh", "coth",	"csc",	"det",	"exp",	"for",
142 	"if",	"lim",	"ln",	"log",	"max",	"min",
143 	"sec",	"sin",	"sinh",	"tan",	"tanh",	"Im",	"Re",
144 };
145 
146 enum	eqn_symt {
147 	EQNSYM_alpha = 0,
148 	EQNSYM_beta,
149 	EQNSYM_chi,
150 	EQNSYM_delta,
151 	EQNSYM_epsilon,
152 	EQNSYM_eta,
153 	EQNSYM_gamma,
154 	EQNSYM_iota,
155 	EQNSYM_kappa,
156 	EQNSYM_lambda,
157 	EQNSYM_mu,
158 	EQNSYM_nu,
159 	EQNSYM_omega,
160 	EQNSYM_omicron,
161 	EQNSYM_phi,
162 	EQNSYM_pi,
163 	EQNSYM_ps,
164 	EQNSYM_rho,
165 	EQNSYM_sigma,
166 	EQNSYM_tau,
167 	EQNSYM_theta,
168 	EQNSYM_upsilon,
169 	EQNSYM_xi,
170 	EQNSYM_zeta,
171 	EQNSYM_DELTA,
172 	EQNSYM_GAMMA,
173 	EQNSYM_LAMBDA,
174 	EQNSYM_OMEGA,
175 	EQNSYM_PHI,
176 	EQNSYM_PI,
177 	EQNSYM_PSI,
178 	EQNSYM_SIGMA,
179 	EQNSYM_THETA,
180 	EQNSYM_UPSILON,
181 	EQNSYM_XI,
182 	EQNSYM_inter,
183 	EQNSYM_union,
184 	EQNSYM_prod,
185 	EQNSYM_int,
186 	EQNSYM_sum,
187 	EQNSYM_grad,
188 	EQNSYM_del,
189 	EQNSYM_times,
190 	EQNSYM_cdot,
191 	EQNSYM_nothing,
192 	EQNSYM_approx,
193 	EQNSYM_prime,
194 	EQNSYM_half,
195 	EQNSYM_partial,
196 	EQNSYM_inf,
197 	EQNSYM_muchgreat,
198 	EQNSYM_muchless,
199 	EQNSYM_larrow,
200 	EQNSYM_rarrow,
201 	EQNSYM_pm,
202 	EQNSYM_nequal,
203 	EQNSYM_equiv,
204 	EQNSYM_lessequal,
205 	EQNSYM_moreequal,
206 	EQNSYM_minus,
207 	EQNSYM__MAX
208 };
209 
210 struct	eqnsym {
211 	const char	*str;
212 	const char	*sym;
213 };
214 
215 static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
216 	{ "alpha", "*a" }, /* EQNSYM_alpha */
217 	{ "beta", "*b" }, /* EQNSYM_beta */
218 	{ "chi", "*x" }, /* EQNSYM_chi */
219 	{ "delta", "*d" }, /* EQNSYM_delta */
220 	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
221 	{ "eta", "*y" }, /* EQNSYM_eta */
222 	{ "gamma", "*g" }, /* EQNSYM_gamma */
223 	{ "iota", "*i" }, /* EQNSYM_iota */
224 	{ "kappa", "*k" }, /* EQNSYM_kappa */
225 	{ "lambda", "*l" }, /* EQNSYM_lambda */
226 	{ "mu", "*m" }, /* EQNSYM_mu */
227 	{ "nu", "*n" }, /* EQNSYM_nu */
228 	{ "omega", "*w" }, /* EQNSYM_omega */
229 	{ "omicron", "*o" }, /* EQNSYM_omicron */
230 	{ "phi", "*f" }, /* EQNSYM_phi */
231 	{ "pi", "*p" }, /* EQNSYM_pi */
232 	{ "psi", "*q" }, /* EQNSYM_psi */
233 	{ "rho", "*r" }, /* EQNSYM_rho */
234 	{ "sigma", "*s" }, /* EQNSYM_sigma */
235 	{ "tau", "*t" }, /* EQNSYM_tau */
236 	{ "theta", "*h" }, /* EQNSYM_theta */
237 	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
238 	{ "xi", "*c" }, /* EQNSYM_xi */
239 	{ "zeta", "*z" }, /* EQNSYM_zeta */
240 	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
241 	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
242 	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
243 	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
244 	{ "PHI", "*F" }, /* EQNSYM_PHI */
245 	{ "PI", "*P" }, /* EQNSYM_PI */
246 	{ "PSI", "*Q" }, /* EQNSYM_PSI */
247 	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
248 	{ "THETA", "*H" }, /* EQNSYM_THETA */
249 	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
250 	{ "XI", "*C" }, /* EQNSYM_XI */
251 	{ "inter", "ca" }, /* EQNSYM_inter */
252 	{ "union", "cu" }, /* EQNSYM_union */
253 	{ "prod", "product" }, /* EQNSYM_prod */
254 	{ "int", "integral" }, /* EQNSYM_int */
255 	{ "sum", "sum" }, /* EQNSYM_sum */
256 	{ "grad", "gr" }, /* EQNSYM_grad */
257 	{ "del", "gr" }, /* EQNSYM_del */
258 	{ "times", "mu" }, /* EQNSYM_times */
259 	{ "cdot", "pc" }, /* EQNSYM_cdot */
260 	{ "nothing", "&" }, /* EQNSYM_nothing */
261 	{ "approx", "~~" }, /* EQNSYM_approx */
262 	{ "prime", "fm" }, /* EQNSYM_prime */
263 	{ "half", "12" }, /* EQNSYM_half */
264 	{ "partial", "pd" }, /* EQNSYM_partial */
265 	{ "inf", "if" }, /* EQNSYM_inf */
266 	{ ">>", ">>" }, /* EQNSYM_muchgreat */
267 	{ "<<", "<<" }, /* EQNSYM_muchless */
268 	{ "<-", "<-" }, /* EQNSYM_larrow */
269 	{ "->", "->" }, /* EQNSYM_rarrow */
270 	{ "+-", "+-" }, /* EQNSYM_pm */
271 	{ "!=", "!=" }, /* EQNSYM_nequal */
272 	{ "==", "==" }, /* EQNSYM_equiv */
273 	{ "<=", "<=" }, /* EQNSYM_lessequal */
274 	{ ">=", ">=" }, /* EQNSYM_moreequal */
275 	{ "-", "mi" }, /* EQNSYM_minus */
276 };
277 
278 enum	parse_mode {
279 	MODE_QUOTED,
280 	MODE_NOSUB,
281 	MODE_SUB,
282 	MODE_TOK
283 };
284 
285 static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
286 static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
287 				struct eqn_box *);
288 static	void		 eqn_def(struct eqn_node *);
289 static	struct eqn_def	*eqn_def_find(struct eqn_node *);
290 static	void		 eqn_delim(struct eqn_node *);
291 static	enum eqn_tok	 eqn_next(struct eqn_node *, enum parse_mode);
292 static	void		 eqn_undef(struct eqn_node *);
293 
294 
295 struct eqn_node *
296 eqn_alloc(struct mparse *parse)
297 {
298 	struct eqn_node *ep;
299 
300 	ep = mandoc_calloc(1, sizeof(*ep));
301 	ep->parse = parse;
302 	ep->gsize = EQN_DEFSIZE;
303 	return ep;
304 }
305 
306 void
307 eqn_reset(struct eqn_node *ep)
308 {
309 	free(ep->data);
310 	ep->data = ep->start = ep->end = NULL;
311 	ep->sz = ep->toksz = 0;
312 }
313 
314 void
315 eqn_read(struct eqn_node *ep, const char *p)
316 {
317 	char		*cp;
318 
319 	if (ep->data == NULL) {
320 		ep->sz = strlen(p);
321 		ep->data = mandoc_strdup(p);
322 	} else {
323 		ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p);
324 		free(ep->data);
325 		ep->data = cp;
326 	}
327 	ep->sz += 1;
328 }
329 
330 /*
331  * Find the key "key" of the give size within our eqn-defined values.
332  */
333 static struct eqn_def *
334 eqn_def_find(struct eqn_node *ep)
335 {
336 	int		 i;
337 
338 	for (i = 0; i < (int)ep->defsz; i++)
339 		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
340 		    ep->defs[i].keysz, ep->start, ep->toksz))
341 			return &ep->defs[i];
342 
343 	return NULL;
344 }
345 
346 /*
347  * Parse a token from the input text.  The modes are:
348  * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
349  *   before its next occurence.  Do not interpret the token in any
350  *   way and return EQN_TOK_QUOTED.  All other modes behave like
351  *   MODE_QUOTED when *ep->start is '"'.
352  * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
353  *   otherwise, it ends before the next whitespace or brace.
354  *   Do not interpret the token and return EQN_TOK__MAX.
355  * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
356  *   alias created with define.  If it is an alias, replace it with
357  *   its string value and reparse.
358  * MODE_TOK: Like MODE_SUB, but also check the token against the list
359  *   of tokens, and if there is a match, return that token.  Otherwise,
360  *   if the token matches a symbol, return EQN_TOK_SYM; if it matches
361  *   a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX.  Except for
362  *   a token match, *ep->start is set to an allocated string that the
363  *   caller is expected to free.
364  * All modes skip whitespace following the end of the token.
365  */
366 static enum eqn_tok
367 eqn_next(struct eqn_node *ep, enum parse_mode mode)
368 {
369 	static int	 last_len, lim;
370 
371 	struct eqn_def	*def;
372 	size_t		 start;
373 	int		 diff, i, quoted;
374 	enum eqn_tok	 tok;
375 
376 	/*
377 	 * Reset the recursion counter after advancing
378 	 * beyond the end of the previous substitution.
379 	 */
380 	if (ep->end - ep->data >= last_len)
381 		lim = 0;
382 
383 	ep->start = ep->end;
384 	quoted = mode == MODE_QUOTED;
385 	for (;;) {
386 		switch (*ep->start) {
387 		case '\0':
388 			ep->toksz = 0;
389 			return EQN_TOK_EOF;
390 		case '"':
391 			quoted = 1;
392 			break;
393 		default:
394 			break;
395 		}
396 		if (quoted) {
397 			ep->end = strchr(ep->start + 1, *ep->start);
398 			ep->start++;  /* Skip opening quote. */
399 			if (ep->end == NULL) {
400 				mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse,
401 				    ep->node->line, ep->node->pos, NULL);
402 				ep->end = strchr(ep->start, '\0');
403 			}
404 		} else {
405 			ep->end = ep->start + 1;
406 			if (*ep->start != '{' && *ep->start != '}')
407 				ep->end += strcspn(ep->end, " ^~\"{}\t");
408 		}
409 		ep->toksz = ep->end - ep->start;
410 		if (quoted && *ep->end != '\0')
411 			ep->end++;  /* Skip closing quote. */
412 		while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
413 			ep->end++;
414 		if (quoted)  /* Cannot return, may have to strndup. */
415 			break;
416 		if (mode == MODE_NOSUB)
417 			return EQN_TOK__MAX;
418 		if ((def = eqn_def_find(ep)) == NULL)
419 			break;
420 		if (++lim > EQN_NEST_MAX) {
421 			mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse,
422 			    ep->node->line, ep->node->pos, NULL);
423 			return EQN_TOK_EOF;
424 		}
425 
426 		/* Replace a defined name with its string value. */
427 		if ((diff = def->valsz - ep->toksz) > 0) {
428 			start = ep->start - ep->data;
429 			ep->sz += diff;
430 			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
431 			ep->start = ep->data + start;
432 		}
433 		if (diff)
434 			memmove(ep->start + def->valsz, ep->start + ep->toksz,
435 			    strlen(ep->start + ep->toksz) + 1);
436 		memcpy(ep->start, def->val, def->valsz);
437 		last_len = ep->start - ep->data + def->valsz;
438 	}
439 	if (mode != MODE_TOK)
440 		return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
441 	if (quoted) {
442 		ep->start = mandoc_strndup(ep->start, ep->toksz);
443 		return EQN_TOK_QUOTED;
444 	}
445 	for (tok = 0; tok < EQN_TOK__MAX; tok++)
446 		if (STRNEQ(ep->start, ep->toksz,
447 		    eqn_toks[tok], strlen(eqn_toks[tok])))
448 			return tok;
449 
450 	for (i = 0; i < EQNSYM__MAX; i++) {
451 		if (STRNEQ(ep->start, ep->toksz,
452 		    eqnsyms[i].str, strlen(eqnsyms[i].str))) {
453 			mandoc_asprintf(&ep->start,
454 			    "\\[%s]", eqnsyms[i].sym);
455 			return EQN_TOK_SYM;
456 		}
457 	}
458 	ep->start = mandoc_strndup(ep->start, ep->toksz);
459 	for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
460 		if (STRNEQ(ep->start, ep->toksz,
461 		    eqn_func[i], strlen(eqn_func[i])))
462 			return EQN_TOK_FUNC;
463 	return EQN_TOK__MAX;
464 }
465 
466 void
467 eqn_box_free(struct eqn_box *bp)
468 {
469 
470 	if (bp->first)
471 		eqn_box_free(bp->first);
472 	if (bp->next)
473 		eqn_box_free(bp->next);
474 
475 	free(bp->text);
476 	free(bp->left);
477 	free(bp->right);
478 	free(bp->top);
479 	free(bp->bottom);
480 	free(bp);
481 }
482 
483 /*
484  * Allocate a box as the last child of the parent node.
485  */
486 static struct eqn_box *
487 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
488 {
489 	struct eqn_box	*bp;
490 
491 	bp = mandoc_calloc(1, sizeof(struct eqn_box));
492 	bp->parent = parent;
493 	bp->parent->args++;
494 	bp->expectargs = UINT_MAX;
495 	bp->font = bp->parent->font;
496 	bp->size = ep->gsize;
497 
498 	if (NULL != parent->first) {
499 		parent->last->next = bp;
500 		bp->prev = parent->last;
501 	} else
502 		parent->first = bp;
503 
504 	parent->last = bp;
505 	return bp;
506 }
507 
508 /*
509  * Reparent the current last node (of the current parent) under a new
510  * EQN_SUBEXPR as the first element.
511  * Then return the new parent.
512  * The new EQN_SUBEXPR will have a two-child limit.
513  */
514 static struct eqn_box *
515 eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent)
516 {
517 	struct eqn_box	*b, *newb;
518 
519 	assert(NULL != parent->last);
520 	b = parent->last;
521 	if (parent->last == parent->first)
522 		parent->first = NULL;
523 	parent->args--;
524 	parent->last = b->prev;
525 	b->prev = NULL;
526 	newb = eqn_box_alloc(ep, parent);
527 	newb->type = EQN_SUBEXPR;
528 	newb->expectargs = 2;
529 	newb->args = 1;
530 	newb->first = newb->last = b;
531 	newb->first->next = NULL;
532 	b->parent = newb;
533 	return newb;
534 }
535 
536 /*
537  * Parse the "delim" control statement.
538  */
539 static void
540 eqn_delim(struct eqn_node *ep)
541 {
542 	if (ep->end[0] == '\0' || ep->end[1] == '\0') {
543 		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
544 		    ep->node->line, ep->node->pos, "delim");
545 		if (ep->end[0] != '\0')
546 			ep->end++;
547 	} else if (strncmp(ep->end, "off", 3) == 0) {
548 		ep->delim = 0;
549 		ep->end += 3;
550 	} else if (strncmp(ep->end, "on", 2) == 0) {
551 		if (ep->odelim && ep->cdelim)
552 			ep->delim = 1;
553 		ep->end += 2;
554 	} else {
555 		ep->odelim = *ep->end++;
556 		ep->cdelim = *ep->end++;
557 		ep->delim = 1;
558 	}
559 }
560 
561 /*
562  * Undefine a previously-defined string.
563  */
564 static void
565 eqn_undef(struct eqn_node *ep)
566 {
567 	struct eqn_def	*def;
568 
569 	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
570 		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
571 		    ep->node->line, ep->node->pos, "undef");
572 		return;
573 	}
574 	if ((def = eqn_def_find(ep)) == NULL)
575 		return;
576 	free(def->key);
577 	free(def->val);
578 	def->key = def->val = NULL;
579 	def->keysz = def->valsz = 0;
580 }
581 
582 static void
583 eqn_def(struct eqn_node *ep)
584 {
585 	struct eqn_def	*def;
586 	int		 i;
587 
588 	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
589 		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
590 		    ep->node->line, ep->node->pos, "define");
591 		return;
592 	}
593 
594 	/*
595 	 * Search for a key that already exists.
596 	 * Create a new key if none is found.
597 	 */
598 	if ((def = eqn_def_find(ep)) == NULL) {
599 		/* Find holes in string array. */
600 		for (i = 0; i < (int)ep->defsz; i++)
601 			if (0 == ep->defs[i].keysz)
602 				break;
603 
604 		if (i == (int)ep->defsz) {
605 			ep->defsz++;
606 			ep->defs = mandoc_reallocarray(ep->defs,
607 			    ep->defsz, sizeof(struct eqn_def));
608 			ep->defs[i].key = ep->defs[i].val = NULL;
609 		}
610 
611 		def = ep->defs + i;
612 		free(def->key);
613 		def->key = mandoc_strndup(ep->start, ep->toksz);
614 		def->keysz = ep->toksz;
615 	}
616 
617 	if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
618 		mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse,
619 		    ep->node->line, ep->node->pos, "define %s", def->key);
620 		free(def->key);
621 		free(def->val);
622 		def->key = def->val = NULL;
623 		def->keysz = def->valsz = 0;
624 		return;
625 	}
626 	free(def->val);
627 	def->val = mandoc_strndup(ep->start, ep->toksz);
628 	def->valsz = ep->toksz;
629 }
630 
631 void
632 eqn_parse(struct eqn_node *ep)
633 {
634 	struct eqn_box	*cur, *nbox, *parent, *split;
635 	const char	*cp, *cpn;
636 	char		*p;
637 	enum eqn_tok	 tok;
638 	enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
639 	int		 size;
640 
641 	parent = ep->node->eqn;
642 	assert(parent != NULL);
643 
644 	/*
645 	 * Empty equation.
646 	 * Do not add it to the high-level syntax tree.
647 	 */
648 
649 	if (ep->data == NULL)
650 		return;
651 
652 	ep->start = ep->end = ep->data + strspn(ep->data, " ^~");
653 
654 next_tok:
655 	tok = eqn_next(ep, MODE_TOK);
656 	switch (tok) {
657 	case EQN_TOK_UNDEF:
658 		eqn_undef(ep);
659 		break;
660 	case EQN_TOK_NDEFINE:
661 	case EQN_TOK_DEFINE:
662 		eqn_def(ep);
663 		break;
664 	case EQN_TOK_TDEFINE:
665 		if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
666 		    eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
667 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
668 			    ep->node->line, ep->node->pos, "tdefine");
669 		break;
670 	case EQN_TOK_DELIM:
671 		eqn_delim(ep);
672 		break;
673 	case EQN_TOK_GFONT:
674 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
675 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
676 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
677 		break;
678 	case EQN_TOK_MARK:
679 	case EQN_TOK_LINEUP:
680 		/* Ignore these. */
681 		break;
682 	case EQN_TOK_DYAD:
683 	case EQN_TOK_VEC:
684 	case EQN_TOK_UNDER:
685 	case EQN_TOK_BAR:
686 	case EQN_TOK_TILDE:
687 	case EQN_TOK_HAT:
688 	case EQN_TOK_DOT:
689 	case EQN_TOK_DOTDOT:
690 		if (parent->last == NULL) {
691 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
692 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
693 			cur = eqn_box_alloc(ep, parent);
694 			cur->type = EQN_TEXT;
695 			cur->text = mandoc_strdup("");
696 		}
697 		parent = eqn_box_makebinary(ep, parent);
698 		parent->type = EQN_LIST;
699 		parent->expectargs = 1;
700 		parent->font = EQNFONT_ROMAN;
701 		switch (tok) {
702 		case EQN_TOK_DOTDOT:
703 			parent->top = mandoc_strdup("\\[ad]");
704 			break;
705 		case EQN_TOK_VEC:
706 			parent->top = mandoc_strdup("\\[->]");
707 			break;
708 		case EQN_TOK_DYAD:
709 			parent->top = mandoc_strdup("\\[<>]");
710 			break;
711 		case EQN_TOK_TILDE:
712 			parent->top = mandoc_strdup("\\[a~]");
713 			break;
714 		case EQN_TOK_UNDER:
715 			parent->bottom = mandoc_strdup("\\[ul]");
716 			break;
717 		case EQN_TOK_BAR:
718 			parent->top = mandoc_strdup("\\[rn]");
719 			break;
720 		case EQN_TOK_DOT:
721 			parent->top = mandoc_strdup("\\[a.]");
722 			break;
723 		case EQN_TOK_HAT:
724 			parent->top = mandoc_strdup("\\[ha]");
725 			break;
726 		default:
727 			abort();
728 		}
729 		parent = parent->parent;
730 		break;
731 	case EQN_TOK_FWD:
732 	case EQN_TOK_BACK:
733 	case EQN_TOK_DOWN:
734 	case EQN_TOK_UP:
735 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
736 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
737 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
738 		break;
739 	case EQN_TOK_FAT:
740 	case EQN_TOK_ROMAN:
741 	case EQN_TOK_ITALIC:
742 	case EQN_TOK_BOLD:
743 		while (parent->args == parent->expectargs)
744 			parent = parent->parent;
745 		/*
746 		 * These values apply to the next word or sequence of
747 		 * words; thus, we mark that we'll have a child with
748 		 * exactly one of those.
749 		 */
750 		parent = eqn_box_alloc(ep, parent);
751 		parent->type = EQN_LIST;
752 		parent->expectargs = 1;
753 		switch (tok) {
754 		case EQN_TOK_FAT:
755 			parent->font = EQNFONT_FAT;
756 			break;
757 		case EQN_TOK_ROMAN:
758 			parent->font = EQNFONT_ROMAN;
759 			break;
760 		case EQN_TOK_ITALIC:
761 			parent->font = EQNFONT_ITALIC;
762 			break;
763 		case EQN_TOK_BOLD:
764 			parent->font = EQNFONT_BOLD;
765 			break;
766 		default:
767 			abort();
768 		}
769 		break;
770 	case EQN_TOK_SIZE:
771 	case EQN_TOK_GSIZE:
772 		/* Accept two values: integral size and a single. */
773 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
774 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
775 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
776 			break;
777 		}
778 		size = mandoc_strntoi(ep->start, ep->toksz, 10);
779 		if (-1 == size) {
780 			mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
781 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
782 			break;
783 		}
784 		if (EQN_TOK_GSIZE == tok) {
785 			ep->gsize = size;
786 			break;
787 		}
788 		while (parent->args == parent->expectargs)
789 			parent = parent->parent;
790 		parent = eqn_box_alloc(ep, parent);
791 		parent->type = EQN_LIST;
792 		parent->expectargs = 1;
793 		parent->size = size;
794 		break;
795 	case EQN_TOK_FROM:
796 	case EQN_TOK_TO:
797 	case EQN_TOK_SUB:
798 	case EQN_TOK_SUP:
799 		/*
800 		 * We have a left-right-associative expression.
801 		 * Repivot under a positional node, open a child scope
802 		 * and keep on reading.
803 		 */
804 		if (parent->last == NULL) {
805 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
806 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
807 			cur = eqn_box_alloc(ep, parent);
808 			cur->type = EQN_TEXT;
809 			cur->text = mandoc_strdup("");
810 		}
811 		while (parent->expectargs == 1 && parent->args == 1)
812 			parent = parent->parent;
813 		if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO)  {
814 			for (cur = parent; cur != NULL; cur = cur->parent)
815 				if (cur->pos == EQNPOS_SUB ||
816 				    cur->pos == EQNPOS_SUP ||
817 				    cur->pos == EQNPOS_SUBSUP ||
818 				    cur->pos == EQNPOS_SQRT ||
819 				    cur->pos == EQNPOS_OVER)
820 					break;
821 			if (cur != NULL)
822 				parent = cur->parent;
823 		}
824 		if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) {
825 			parent->expectargs = 3;
826 			parent->pos = EQNPOS_SUBSUP;
827 			break;
828 		}
829 		if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) {
830 			parent->expectargs = 3;
831 			parent->pos = EQNPOS_FROMTO;
832 			break;
833 		}
834 		parent = eqn_box_makebinary(ep, parent);
835 		switch (tok) {
836 		case EQN_TOK_FROM:
837 			parent->pos = EQNPOS_FROM;
838 			break;
839 		case EQN_TOK_TO:
840 			parent->pos = EQNPOS_TO;
841 			break;
842 		case EQN_TOK_SUP:
843 			parent->pos = EQNPOS_SUP;
844 			break;
845 		case EQN_TOK_SUB:
846 			parent->pos = EQNPOS_SUB;
847 			break;
848 		default:
849 			abort();
850 		}
851 		break;
852 	case EQN_TOK_SQRT:
853 		while (parent->args == parent->expectargs)
854 			parent = parent->parent;
855 		/*
856 		 * Accept a left-right-associative set of arguments just
857 		 * like sub and sup and friends but without rebalancing
858 		 * under a pivot.
859 		 */
860 		parent = eqn_box_alloc(ep, parent);
861 		parent->type = EQN_SUBEXPR;
862 		parent->pos = EQNPOS_SQRT;
863 		parent->expectargs = 1;
864 		break;
865 	case EQN_TOK_OVER:
866 		/*
867 		 * We have a right-left-associative fraction.
868 		 * Close out anything that's currently open, then
869 		 * rebalance and continue reading.
870 		 */
871 		if (parent->last == NULL) {
872 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
873 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
874 			cur = eqn_box_alloc(ep, parent);
875 			cur->type = EQN_TEXT;
876 			cur->text = mandoc_strdup("");
877 		}
878 		while (parent->args == parent->expectargs)
879 			parent = parent->parent;
880 		while (EQN_SUBEXPR == parent->type)
881 			parent = parent->parent;
882 		parent = eqn_box_makebinary(ep, parent);
883 		parent->pos = EQNPOS_OVER;
884 		break;
885 	case EQN_TOK_RIGHT:
886 	case EQN_TOK_BRACE_CLOSE:
887 		/*
888 		 * Close out the existing brace.
889 		 * FIXME: this is a shitty sentinel: we should really
890 		 * have a native EQN_BRACE type or whatnot.
891 		 */
892 		for (cur = parent; cur != NULL; cur = cur->parent)
893 			if (cur->type == EQN_LIST &&
894 			    cur->expectargs > 1 &&
895 			    (tok == EQN_TOK_BRACE_CLOSE ||
896 			     cur->left != NULL))
897 				break;
898 		if (cur == NULL) {
899 			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
900 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
901 			break;
902 		}
903 		parent = cur;
904 		if (EQN_TOK_RIGHT == tok) {
905 			if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
906 				mandoc_msg(MANDOCERR_REQ_EMPTY,
907 				    ep->parse, ep->node->line,
908 				    ep->node->pos, eqn_toks[tok]);
909 				break;
910 			}
911 			/* Handling depends on right/left. */
912 			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
913 				parent->right = mandoc_strdup("\\[rc]");
914 			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
915 				parent->right = mandoc_strdup("\\[rf]");
916 			else
917 				parent->right =
918 				    mandoc_strndup(ep->start, ep->toksz);
919 		}
920 		parent = parent->parent;
921 		if (tok == EQN_TOK_BRACE_CLOSE &&
922 		    (parent->type == EQN_PILE ||
923 		     parent->type == EQN_MATRIX))
924 			parent = parent->parent;
925 		/* Close out any "singleton" lists. */
926 		while (parent->type == EQN_LIST &&
927 		    parent->expectargs == 1 &&
928 		    parent->args == 1)
929 			parent = parent->parent;
930 		break;
931 	case EQN_TOK_BRACE_OPEN:
932 	case EQN_TOK_LEFT:
933 		/*
934 		 * If we already have something in the stack and we're
935 		 * in an expression, then rewind til we're not any more
936 		 * (just like with the text node).
937 		 */
938 		while (parent->args == parent->expectargs)
939 			parent = parent->parent;
940 		if (EQN_TOK_LEFT == tok &&
941 		    eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
942 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
943 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
944 			break;
945 		}
946 		parent = eqn_box_alloc(ep, parent);
947 		parent->type = EQN_LIST;
948 		if (EQN_TOK_LEFT == tok) {
949 			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
950 				parent->left = mandoc_strdup("\\[lc]");
951 			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
952 				parent->left = mandoc_strdup("\\[lf]");
953 			else
954 				parent->left =
955 				    mandoc_strndup(ep->start, ep->toksz);
956 		}
957 		break;
958 	case EQN_TOK_PILE:
959 	case EQN_TOK_LPILE:
960 	case EQN_TOK_RPILE:
961 	case EQN_TOK_CPILE:
962 	case EQN_TOK_CCOL:
963 	case EQN_TOK_LCOL:
964 	case EQN_TOK_RCOL:
965 		while (parent->args == parent->expectargs)
966 			parent = parent->parent;
967 		parent = eqn_box_alloc(ep, parent);
968 		parent->type = EQN_PILE;
969 		parent->expectargs = 1;
970 		break;
971 	case EQN_TOK_ABOVE:
972 		for (cur = parent; cur != NULL; cur = cur->parent)
973 			if (cur->type == EQN_PILE)
974 				break;
975 		if (cur == NULL) {
976 			mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
977 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
978 			break;
979 		}
980 		parent = eqn_box_alloc(ep, cur);
981 		parent->type = EQN_LIST;
982 		break;
983 	case EQN_TOK_MATRIX:
984 		while (parent->args == parent->expectargs)
985 			parent = parent->parent;
986 		parent = eqn_box_alloc(ep, parent);
987 		parent->type = EQN_MATRIX;
988 		parent->expectargs = 1;
989 		break;
990 	case EQN_TOK_EOF:
991 		return;
992 	case EQN_TOK__MAX:
993 	case EQN_TOK_FUNC:
994 	case EQN_TOK_QUOTED:
995 	case EQN_TOK_SYM:
996 		p = ep->start;
997 		assert(p != NULL);
998 		/*
999 		 * If we already have something in the stack and we're
1000 		 * in an expression, then rewind til we're not any more.
1001 		 */
1002 		while (parent->args == parent->expectargs)
1003 			parent = parent->parent;
1004 		cur = eqn_box_alloc(ep, parent);
1005 		cur->type = EQN_TEXT;
1006 		cur->text = p;
1007 		switch (tok) {
1008 		case EQN_TOK_FUNC:
1009 			cur->font = EQNFONT_ROMAN;
1010 			break;
1011 		case EQN_TOK_QUOTED:
1012 			if (cur->font == EQNFONT_NONE)
1013 				cur->font = EQNFONT_ITALIC;
1014 			break;
1015 		case EQN_TOK_SYM:
1016 			break;
1017 		default:
1018 			if (cur->font != EQNFONT_NONE || *p == '\0')
1019 				break;
1020 			cpn = p - 1;
1021 			ccln = CCL_LET;
1022 			split = NULL;
1023 			for (;;) {
1024 				/* Advance to next character. */
1025 				cp = cpn++;
1026 				ccl = ccln;
1027 				ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
1028 				    isdigit((unsigned char)*cpn) ||
1029 				    (*cpn == '.' && (ccl == CCL_DIG ||
1030 				     isdigit((unsigned char)cpn[1]))) ?
1031 				    CCL_DIG : CCL_PUN;
1032 				/* No boundary before first character. */
1033 				if (cp < p)
1034 					continue;
1035 				cur->font = ccl == CCL_LET ?
1036 				    EQNFONT_ITALIC : EQNFONT_ROMAN;
1037 				if (*cp == '\\')
1038 					mandoc_escape(&cpn, NULL, NULL);
1039 				/* No boundary after last character. */
1040 				if (*cpn == '\0')
1041 					break;
1042 				if (ccln == ccl && *cp != ',' && *cpn != ',')
1043 					continue;
1044 				/* Boundary found, split the text. */
1045 				if (parent->args == parent->expectargs) {
1046 					/* Remove the text from the tree. */
1047 					if (cur->prev == NULL)
1048 						parent->first = cur->next;
1049 					else
1050 						cur->prev->next = NULL;
1051 					parent->last = cur->prev;
1052 					parent->args--;
1053 					/* Set up a list instead. */
1054 					split = eqn_box_alloc(ep, parent);
1055 					split->type = EQN_LIST;
1056 					/* Insert the word into the list. */
1057 					split->first = split->last = cur;
1058 					cur->parent = split;
1059 					cur->prev = NULL;
1060 					parent = split;
1061 				}
1062 				/* Append a new text box. */
1063 				nbox = eqn_box_alloc(ep, parent);
1064 				nbox->type = EQN_TEXT;
1065 				nbox->text = mandoc_strdup(cpn);
1066 				/* Truncate the old box. */
1067 				p = mandoc_strndup(cur->text,
1068 				    cpn - cur->text);
1069 				free(cur->text);
1070 				cur->text = p;
1071 				/* Setup to process the new box. */
1072 				cur = nbox;
1073 				p = nbox->text;
1074 				cpn = p - 1;
1075 				ccln = CCL_LET;
1076 			}
1077 			if (split != NULL)
1078 				parent = split->parent;
1079 			break;
1080 		}
1081 		break;
1082 	default:
1083 		abort();
1084 	}
1085 	goto next_tok;
1086 }
1087 
1088 void
1089 eqn_free(struct eqn_node *p)
1090 {
1091 	int		 i;
1092 
1093 	for (i = 0; i < (int)p->defsz; i++) {
1094 		free(p->defs[i].key);
1095 		free(p->defs[i].val);
1096 	}
1097 
1098 	free(p->data);
1099 	free(p->defs);
1100 	free(p);
1101 }
1102