xref: /openbsd-src/usr.bin/mandoc/eqn.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: eqn.c,v 1.24 2016/01/08 00:50:20 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <limits.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <time.h>
26 
27 #include "mandoc.h"
28 #include "mandoc_aux.h"
29 #include "libmandoc.h"
30 #include "libroff.h"
31 
32 #define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
33 #define	STRNEQ(p1, sz1, p2, sz2) \
34 	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
35 
36 enum	eqn_tok {
37 	EQN_TOK_DYAD = 0,
38 	EQN_TOK_VEC,
39 	EQN_TOK_UNDER,
40 	EQN_TOK_BAR,
41 	EQN_TOK_TILDE,
42 	EQN_TOK_HAT,
43 	EQN_TOK_DOT,
44 	EQN_TOK_DOTDOT,
45 	EQN_TOK_FWD,
46 	EQN_TOK_BACK,
47 	EQN_TOK_DOWN,
48 	EQN_TOK_UP,
49 	EQN_TOK_FAT,
50 	EQN_TOK_ROMAN,
51 	EQN_TOK_ITALIC,
52 	EQN_TOK_BOLD,
53 	EQN_TOK_SIZE,
54 	EQN_TOK_SUB,
55 	EQN_TOK_SUP,
56 	EQN_TOK_SQRT,
57 	EQN_TOK_OVER,
58 	EQN_TOK_FROM,
59 	EQN_TOK_TO,
60 	EQN_TOK_BRACE_OPEN,
61 	EQN_TOK_BRACE_CLOSE,
62 	EQN_TOK_GSIZE,
63 	EQN_TOK_GFONT,
64 	EQN_TOK_MARK,
65 	EQN_TOK_LINEUP,
66 	EQN_TOK_LEFT,
67 	EQN_TOK_RIGHT,
68 	EQN_TOK_PILE,
69 	EQN_TOK_LPILE,
70 	EQN_TOK_RPILE,
71 	EQN_TOK_CPILE,
72 	EQN_TOK_MATRIX,
73 	EQN_TOK_CCOL,
74 	EQN_TOK_LCOL,
75 	EQN_TOK_RCOL,
76 	EQN_TOK_DELIM,
77 	EQN_TOK_DEFINE,
78 	EQN_TOK_TDEFINE,
79 	EQN_TOK_NDEFINE,
80 	EQN_TOK_UNDEF,
81 	EQN_TOK_EOF,
82 	EQN_TOK_ABOVE,
83 	EQN_TOK__MAX
84 };
85 
86 static	const char *eqn_toks[EQN_TOK__MAX] = {
87 	"dyad", /* EQN_TOK_DYAD */
88 	"vec", /* EQN_TOK_VEC */
89 	"under", /* EQN_TOK_UNDER */
90 	"bar", /* EQN_TOK_BAR */
91 	"tilde", /* EQN_TOK_TILDE */
92 	"hat", /* EQN_TOK_HAT */
93 	"dot", /* EQN_TOK_DOT */
94 	"dotdot", /* EQN_TOK_DOTDOT */
95 	"fwd", /* EQN_TOK_FWD * */
96 	"back", /* EQN_TOK_BACK */
97 	"down", /* EQN_TOK_DOWN */
98 	"up", /* EQN_TOK_UP */
99 	"fat", /* EQN_TOK_FAT */
100 	"roman", /* EQN_TOK_ROMAN */
101 	"italic", /* EQN_TOK_ITALIC */
102 	"bold", /* EQN_TOK_BOLD */
103 	"size", /* EQN_TOK_SIZE */
104 	"sub", /* EQN_TOK_SUB */
105 	"sup", /* EQN_TOK_SUP */
106 	"sqrt", /* EQN_TOK_SQRT */
107 	"over", /* EQN_TOK_OVER */
108 	"from", /* EQN_TOK_FROM */
109 	"to", /* EQN_TOK_TO */
110 	"{", /* EQN_TOK_BRACE_OPEN */
111 	"}", /* EQN_TOK_BRACE_CLOSE */
112 	"gsize", /* EQN_TOK_GSIZE */
113 	"gfont", /* EQN_TOK_GFONT */
114 	"mark", /* EQN_TOK_MARK */
115 	"lineup", /* EQN_TOK_LINEUP */
116 	"left", /* EQN_TOK_LEFT */
117 	"right", /* EQN_TOK_RIGHT */
118 	"pile", /* EQN_TOK_PILE */
119 	"lpile", /* EQN_TOK_LPILE */
120 	"rpile", /* EQN_TOK_RPILE */
121 	"cpile", /* EQN_TOK_CPILE */
122 	"matrix", /* EQN_TOK_MATRIX */
123 	"ccol", /* EQN_TOK_CCOL */
124 	"lcol", /* EQN_TOK_LCOL */
125 	"rcol", /* EQN_TOK_RCOL */
126 	"delim", /* EQN_TOK_DELIM */
127 	"define", /* EQN_TOK_DEFINE */
128 	"tdefine", /* EQN_TOK_TDEFINE */
129 	"ndefine", /* EQN_TOK_NDEFINE */
130 	"undef", /* EQN_TOK_UNDEF */
131 	NULL, /* EQN_TOK_EOF */
132 	"above", /* EQN_TOK_ABOVE */
133 };
134 
135 enum	eqn_symt {
136 	EQNSYM_alpha,
137 	EQNSYM_beta,
138 	EQNSYM_chi,
139 	EQNSYM_delta,
140 	EQNSYM_epsilon,
141 	EQNSYM_eta,
142 	EQNSYM_gamma,
143 	EQNSYM_iota,
144 	EQNSYM_kappa,
145 	EQNSYM_lambda,
146 	EQNSYM_mu,
147 	EQNSYM_nu,
148 	EQNSYM_omega,
149 	EQNSYM_omicron,
150 	EQNSYM_phi,
151 	EQNSYM_pi,
152 	EQNSYM_ps,
153 	EQNSYM_rho,
154 	EQNSYM_sigma,
155 	EQNSYM_tau,
156 	EQNSYM_theta,
157 	EQNSYM_upsilon,
158 	EQNSYM_xi,
159 	EQNSYM_zeta,
160 	EQNSYM_DELTA,
161 	EQNSYM_GAMMA,
162 	EQNSYM_LAMBDA,
163 	EQNSYM_OMEGA,
164 	EQNSYM_PHI,
165 	EQNSYM_PI,
166 	EQNSYM_PSI,
167 	EQNSYM_SIGMA,
168 	EQNSYM_THETA,
169 	EQNSYM_UPSILON,
170 	EQNSYM_XI,
171 	EQNSYM_inter,
172 	EQNSYM_union,
173 	EQNSYM_prod,
174 	EQNSYM_int,
175 	EQNSYM_sum,
176 	EQNSYM_grad,
177 	EQNSYM_del,
178 	EQNSYM_times,
179 	EQNSYM_cdot,
180 	EQNSYM_nothing,
181 	EQNSYM_approx,
182 	EQNSYM_prime,
183 	EQNSYM_half,
184 	EQNSYM_partial,
185 	EQNSYM_inf,
186 	EQNSYM_muchgreat,
187 	EQNSYM_muchless,
188 	EQNSYM_larrow,
189 	EQNSYM_rarrow,
190 	EQNSYM_pm,
191 	EQNSYM_nequal,
192 	EQNSYM_equiv,
193 	EQNSYM_lessequal,
194 	EQNSYM_moreequal,
195 	EQNSYM_minus,
196 	EQNSYM__MAX
197 };
198 
199 struct	eqnsym {
200 	const char	*str;
201 	const char	*sym;
202 };
203 
204 static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
205 	{ "alpha", "*a" }, /* EQNSYM_alpha */
206 	{ "beta", "*b" }, /* EQNSYM_beta */
207 	{ "chi", "*x" }, /* EQNSYM_chi */
208 	{ "delta", "*d" }, /* EQNSYM_delta */
209 	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
210 	{ "eta", "*y" }, /* EQNSYM_eta */
211 	{ "gamma", "*g" }, /* EQNSYM_gamma */
212 	{ "iota", "*i" }, /* EQNSYM_iota */
213 	{ "kappa", "*k" }, /* EQNSYM_kappa */
214 	{ "lambda", "*l" }, /* EQNSYM_lambda */
215 	{ "mu", "*m" }, /* EQNSYM_mu */
216 	{ "nu", "*n" }, /* EQNSYM_nu */
217 	{ "omega", "*w" }, /* EQNSYM_omega */
218 	{ "omicron", "*o" }, /* EQNSYM_omicron */
219 	{ "phi", "*f" }, /* EQNSYM_phi */
220 	{ "pi", "*p" }, /* EQNSYM_pi */
221 	{ "psi", "*q" }, /* EQNSYM_psi */
222 	{ "rho", "*r" }, /* EQNSYM_rho */
223 	{ "sigma", "*s" }, /* EQNSYM_sigma */
224 	{ "tau", "*t" }, /* EQNSYM_tau */
225 	{ "theta", "*h" }, /* EQNSYM_theta */
226 	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
227 	{ "xi", "*c" }, /* EQNSYM_xi */
228 	{ "zeta", "*z" }, /* EQNSYM_zeta */
229 	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
230 	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
231 	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
232 	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
233 	{ "PHI", "*F" }, /* EQNSYM_PHI */
234 	{ "PI", "*P" }, /* EQNSYM_PI */
235 	{ "PSI", "*Q" }, /* EQNSYM_PSI */
236 	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
237 	{ "THETA", "*H" }, /* EQNSYM_THETA */
238 	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
239 	{ "XI", "*C" }, /* EQNSYM_XI */
240 	{ "inter", "ca" }, /* EQNSYM_inter */
241 	{ "union", "cu" }, /* EQNSYM_union */
242 	{ "prod", "product" }, /* EQNSYM_prod */
243 	{ "int", "integral" }, /* EQNSYM_int */
244 	{ "sum", "sum" }, /* EQNSYM_sum */
245 	{ "grad", "gr" }, /* EQNSYM_grad */
246 	{ "del", "gr" }, /* EQNSYM_del */
247 	{ "times", "mu" }, /* EQNSYM_times */
248 	{ "cdot", "pc" }, /* EQNSYM_cdot */
249 	{ "nothing", "&" }, /* EQNSYM_nothing */
250 	{ "approx", "~~" }, /* EQNSYM_approx */
251 	{ "prime", "fm" }, /* EQNSYM_prime */
252 	{ "half", "12" }, /* EQNSYM_half */
253 	{ "partial", "pd" }, /* EQNSYM_partial */
254 	{ "inf", "if" }, /* EQNSYM_inf */
255 	{ ">>", ">>" }, /* EQNSYM_muchgreat */
256 	{ "<<", "<<" }, /* EQNSYM_muchless */
257 	{ "<-", "<-" }, /* EQNSYM_larrow */
258 	{ "->", "->" }, /* EQNSYM_rarrow */
259 	{ "+-", "+-" }, /* EQNSYM_pm */
260 	{ "!=", "!=" }, /* EQNSYM_nequal */
261 	{ "==", "==" }, /* EQNSYM_equiv */
262 	{ "<=", "<=" }, /* EQNSYM_lessequal */
263 	{ ">=", ">=" }, /* EQNSYM_moreequal */
264 	{ "-", "mi" }, /* EQNSYM_minus */
265 };
266 
267 static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
268 static	void		 eqn_box_free(struct eqn_box *);
269 static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
270 				enum eqn_post, struct eqn_box *);
271 static	void		 eqn_def(struct eqn_node *);
272 static	struct eqn_def	*eqn_def_find(struct eqn_node *, const char *, size_t);
273 static	void		 eqn_delim(struct eqn_node *);
274 static	const char	*eqn_next(struct eqn_node *, char, size_t *, int);
275 static	const char	*eqn_nextrawtok(struct eqn_node *, size_t *);
276 static	const char	*eqn_nexttok(struct eqn_node *, size_t *);
277 static	enum rofferr	 eqn_parse(struct eqn_node *, struct eqn_box *);
278 static	enum eqn_tok	 eqn_tok_parse(struct eqn_node *, char **);
279 static	void		 eqn_undef(struct eqn_node *);
280 
281 
282 enum rofferr
283 eqn_read(struct eqn_node **epp, int ln,
284 		const char *p, int pos, int *offs)
285 {
286 	size_t		 sz;
287 	struct eqn_node	*ep;
288 	enum rofferr	 er;
289 
290 	ep = *epp;
291 
292 	/*
293 	 * If we're the terminating mark, unset our equation status and
294 	 * validate the full equation.
295 	 */
296 
297 	if (0 == strncmp(p, ".EN", 3)) {
298 		er = eqn_end(epp);
299 		p += 3;
300 		while (' ' == *p || '\t' == *p)
301 			p++;
302 		if ('\0' == *p)
303 			return er;
304 		mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse,
305 		    ln, pos, "EN %s", p);
306 		return er;
307 	}
308 
309 	/*
310 	 * Build up the full string, replacing all newlines with regular
311 	 * whitespace.
312 	 */
313 
314 	sz = strlen(p + pos) + 1;
315 	ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1);
316 
317 	/* First invocation: nil terminate the string. */
318 
319 	if (0 == ep->sz)
320 		*ep->data = '\0';
321 
322 	ep->sz += sz;
323 	strlcat(ep->data, p + pos, ep->sz + 1);
324 	strlcat(ep->data, " ", ep->sz + 1);
325 	return ROFF_IGN;
326 }
327 
328 struct eqn_node *
329 eqn_alloc(int pos, int line, struct mparse *parse)
330 {
331 	struct eqn_node	*p;
332 
333 	p = mandoc_calloc(1, sizeof(struct eqn_node));
334 
335 	p->parse = parse;
336 	p->eqn.ln = line;
337 	p->eqn.pos = pos;
338 	p->gsize = EQN_DEFSIZE;
339 
340 	return p;
341 }
342 
343 /*
344  * Find the key "key" of the give size within our eqn-defined values.
345  */
346 static struct eqn_def *
347 eqn_def_find(struct eqn_node *ep, const char *key, size_t sz)
348 {
349 	int		 i;
350 
351 	for (i = 0; i < (int)ep->defsz; i++)
352 		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
353 		    ep->defs[i].keysz, key, sz))
354 			return &ep->defs[i];
355 
356 	return NULL;
357 }
358 
359 /*
360  * Get the next token from the input stream using the given quote
361  * character.
362  * Optionally make any replacements.
363  */
364 static const char *
365 eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl)
366 {
367 	char		*start, *next;
368 	int		 q, diff, lim;
369 	size_t		 ssz, dummy;
370 	struct eqn_def	*def;
371 
372 	if (NULL == sz)
373 		sz = &dummy;
374 
375 	lim = 0;
376 	ep->rew = ep->cur;
377 again:
378 	/* Prevent self-definitions. */
379 
380 	if (lim >= EQN_NEST_MAX) {
381 		mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse,
382 		    ep->eqn.ln, ep->eqn.pos, NULL);
383 		return NULL;
384 	}
385 
386 	ep->cur = ep->rew;
387 	start = &ep->data[(int)ep->cur];
388 	q = 0;
389 
390 	if ('\0' == *start)
391 		return NULL;
392 
393 	if (quote == *start) {
394 		ep->cur++;
395 		q = 1;
396 	}
397 
398 	start = &ep->data[(int)ep->cur];
399 
400 	if ( ! q) {
401 		if ('{' == *start || '}' == *start)
402 			ssz = 1;
403 		else
404 			ssz = strcspn(start + 1, " ^~\"{}\t") + 1;
405 		next = start + (int)ssz;
406 		if ('\0' == *next)
407 			next = NULL;
408 	} else
409 		next = strchr(start, quote);
410 
411 	if (NULL != next) {
412 		*sz = (size_t)(next - start);
413 		ep->cur += *sz;
414 		if (q)
415 			ep->cur++;
416 		while (' ' == ep->data[(int)ep->cur] ||
417 		    '\t' == ep->data[(int)ep->cur] ||
418 		    '^' == ep->data[(int)ep->cur] ||
419 		    '~' == ep->data[(int)ep->cur])
420 			ep->cur++;
421 	} else {
422 		if (q)
423 			mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse,
424 			    ep->eqn.ln, ep->eqn.pos, NULL);
425 		next = strchr(start, '\0');
426 		*sz = (size_t)(next - start);
427 		ep->cur += *sz;
428 	}
429 
430 	/* Quotes aren't expanded for values. */
431 
432 	if (q || ! repl)
433 		return start;
434 
435 	if (NULL != (def = eqn_def_find(ep, start, *sz))) {
436 		diff = def->valsz - *sz;
437 
438 		if (def->valsz > *sz) {
439 			ep->sz += diff;
440 			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
441 			ep->data[ep->sz] = '\0';
442 			start = &ep->data[(int)ep->rew];
443 		}
444 
445 		diff = def->valsz - *sz;
446 		memmove(start + *sz + diff, start + *sz,
447 		    (strlen(start) - *sz) + 1);
448 		memcpy(start, def->val, def->valsz);
449 		lim++;
450 		goto again;
451 	}
452 
453 	return start;
454 }
455 
456 /*
457  * Get the next delimited token using the default current quote
458  * character.
459  */
460 static const char *
461 eqn_nexttok(struct eqn_node *ep, size_t *sz)
462 {
463 
464 	return eqn_next(ep, '"', sz, 1);
465 }
466 
467 /*
468  * Get next token without replacement.
469  */
470 static const char *
471 eqn_nextrawtok(struct eqn_node *ep, size_t *sz)
472 {
473 
474 	return eqn_next(ep, '"', sz, 0);
475 }
476 
477 /*
478  * Parse a token from the stream of text.
479  * A token consists of one of the recognised eqn(7) strings.
480  * Strings are separated by delimiting marks.
481  * This returns EQN_TOK_EOF when there are no more tokens.
482  * If the token is an unrecognised string literal, then it returns
483  * EQN_TOK__MAX and sets the "p" pointer to an allocated, nil-terminated
484  * string.
485  * This must be later freed with free(3).
486  */
487 static enum eqn_tok
488 eqn_tok_parse(struct eqn_node *ep, char **p)
489 {
490 	const char	*start;
491 	size_t		 i, sz;
492 	int		 quoted;
493 
494 	if (NULL != p)
495 		*p = NULL;
496 
497 	quoted = ep->data[ep->cur] == '"';
498 
499 	if (NULL == (start = eqn_nexttok(ep, &sz)))
500 		return EQN_TOK_EOF;
501 
502 	if (quoted) {
503 		if (p != NULL)
504 			*p = mandoc_strndup(start, sz);
505 		return EQN_TOK__MAX;
506 	}
507 
508 	for (i = 0; i < EQN_TOK__MAX; i++) {
509 		if (NULL == eqn_toks[i])
510 			continue;
511 		if (STRNEQ(start, sz, eqn_toks[i], strlen(eqn_toks[i])))
512 			break;
513 	}
514 
515 	if (i == EQN_TOK__MAX && NULL != p)
516 		*p = mandoc_strndup(start, sz);
517 
518 	return i;
519 }
520 
521 static void
522 eqn_box_free(struct eqn_box *bp)
523 {
524 
525 	if (bp->first)
526 		eqn_box_free(bp->first);
527 	if (bp->next)
528 		eqn_box_free(bp->next);
529 
530 	free(bp->text);
531 	free(bp->left);
532 	free(bp->right);
533 	free(bp->top);
534 	free(bp->bottom);
535 	free(bp);
536 }
537 
538 /*
539  * Allocate a box as the last child of the parent node.
540  */
541 static struct eqn_box *
542 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
543 {
544 	struct eqn_box	*bp;
545 
546 	bp = mandoc_calloc(1, sizeof(struct eqn_box));
547 	bp->parent = parent;
548 	bp->parent->args++;
549 	bp->expectargs = UINT_MAX;
550 	bp->size = ep->gsize;
551 
552 	if (NULL != parent->first) {
553 		parent->last->next = bp;
554 		bp->prev = parent->last;
555 	} else
556 		parent->first = bp;
557 
558 	parent->last = bp;
559 	return bp;
560 }
561 
562 /*
563  * Reparent the current last node (of the current parent) under a new
564  * EQN_SUBEXPR as the first element.
565  * Then return the new parent.
566  * The new EQN_SUBEXPR will have a two-child limit.
567  */
568 static struct eqn_box *
569 eqn_box_makebinary(struct eqn_node *ep,
570 	enum eqn_post pos, struct eqn_box *parent)
571 {
572 	struct eqn_box	*b, *newb;
573 
574 	assert(NULL != parent->last);
575 	b = parent->last;
576 	if (parent->last == parent->first)
577 		parent->first = NULL;
578 	parent->args--;
579 	parent->last = b->prev;
580 	b->prev = NULL;
581 	newb = eqn_box_alloc(ep, parent);
582 	newb->pos = pos;
583 	newb->type = EQN_SUBEXPR;
584 	newb->expectargs = 2;
585 	newb->args = 1;
586 	newb->first = newb->last = b;
587 	newb->first->next = NULL;
588 	b->parent = newb;
589 	return newb;
590 }
591 
592 /*
593  * Parse the "delim" control statement.
594  */
595 static void
596 eqn_delim(struct eqn_node *ep)
597 {
598 	const char	*start;
599 	size_t		 sz;
600 
601 	if ((start = eqn_nextrawtok(ep, &sz)) == NULL)
602 		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
603 		    ep->eqn.ln, ep->eqn.pos, "delim");
604 	else if (strncmp(start, "off", 3) == 0)
605 		ep->delim = 0;
606 	else if (strncmp(start, "on", 2) == 0) {
607 		if (ep->odelim && ep->cdelim)
608 			ep->delim = 1;
609 	} else if (start[1] != '\0') {
610 		ep->odelim = start[0];
611 		ep->cdelim = start[1];
612 		ep->delim = 1;
613 	}
614 }
615 
616 /*
617  * Undefine a previously-defined string.
618  */
619 static void
620 eqn_undef(struct eqn_node *ep)
621 {
622 	const char	*start;
623 	struct eqn_def	*def;
624 	size_t		 sz;
625 
626 	if ((start = eqn_nextrawtok(ep, &sz)) == NULL) {
627 		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
628 		    ep->eqn.ln, ep->eqn.pos, "undef");
629 		return;
630 	}
631 	if ((def = eqn_def_find(ep, start, sz)) == NULL)
632 		return;
633 	free(def->key);
634 	free(def->val);
635 	def->key = def->val = NULL;
636 	def->keysz = def->valsz = 0;
637 }
638 
639 static void
640 eqn_def(struct eqn_node *ep)
641 {
642 	const char	*start;
643 	size_t		 sz;
644 	struct eqn_def	*def;
645 	int		 i;
646 
647 	if ((start = eqn_nextrawtok(ep, &sz)) == NULL) {
648 		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
649 		    ep->eqn.ln, ep->eqn.pos, "define");
650 		return;
651 	}
652 
653 	/*
654 	 * Search for a key that already exists.
655 	 * Create a new key if none is found.
656 	 */
657 	if (NULL == (def = eqn_def_find(ep, start, sz))) {
658 		/* Find holes in string array. */
659 		for (i = 0; i < (int)ep->defsz; i++)
660 			if (0 == ep->defs[i].keysz)
661 				break;
662 
663 		if (i == (int)ep->defsz) {
664 			ep->defsz++;
665 			ep->defs = mandoc_reallocarray(ep->defs,
666 			    ep->defsz, sizeof(struct eqn_def));
667 			ep->defs[i].key = ep->defs[i].val = NULL;
668 		}
669 
670 		def = ep->defs + i;
671 		free(def->key);
672 		def->key = mandoc_strndup(start, sz);
673 		def->keysz = sz;
674 	}
675 
676 	start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0);
677 	if (start == NULL) {
678 		mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse,
679 		    ep->eqn.ln, ep->eqn.pos, "define %s", def->key);
680 		free(def->key);
681 		free(def->val);
682 		def->key = def->val = NULL;
683 		def->keysz = def->valsz = 0;
684 		return;
685 	}
686 	free(def->val);
687 	def->val = mandoc_strndup(start, sz);
688 	def->valsz = sz;
689 }
690 
691 /*
692  * Recursively parse an eqn(7) expression.
693  */
694 static enum rofferr
695 eqn_parse(struct eqn_node *ep, struct eqn_box *parent)
696 {
697 	char		 sym[64];
698 	struct eqn_box	*cur;
699 	const char	*start;
700 	char		*p;
701 	size_t		 i, sz;
702 	enum eqn_tok	 tok, subtok;
703 	enum eqn_post	 pos;
704 	int		 size;
705 
706 	assert(parent != NULL);
707 
708 	/*
709 	 * Empty equation.
710 	 * Do not add it to the high-level syntax tree.
711 	 */
712 
713 	if (ep->data == NULL)
714 		return ROFF_IGN;
715 
716 next_tok:
717 	tok = eqn_tok_parse(ep, &p);
718 
719 this_tok:
720 	switch (tok) {
721 	case (EQN_TOK_UNDEF):
722 		eqn_undef(ep);
723 		break;
724 	case (EQN_TOK_NDEFINE):
725 	case (EQN_TOK_DEFINE):
726 		eqn_def(ep);
727 		break;
728 	case (EQN_TOK_TDEFINE):
729 		if (eqn_nextrawtok(ep, NULL) == NULL ||
730 		    eqn_next(ep, ep->data[(int)ep->cur], NULL, 0) == NULL)
731 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
732 			    ep->eqn.ln, ep->eqn.pos, "tdefine");
733 		break;
734 	case (EQN_TOK_DELIM):
735 		eqn_delim(ep);
736 		break;
737 	case (EQN_TOK_GFONT):
738 		if (eqn_nextrawtok(ep, NULL) == NULL)
739 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
740 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
741 		break;
742 	case (EQN_TOK_MARK):
743 	case (EQN_TOK_LINEUP):
744 		/* Ignore these. */
745 		break;
746 	case (EQN_TOK_DYAD):
747 	case (EQN_TOK_VEC):
748 	case (EQN_TOK_UNDER):
749 	case (EQN_TOK_BAR):
750 	case (EQN_TOK_TILDE):
751 	case (EQN_TOK_HAT):
752 	case (EQN_TOK_DOT):
753 	case (EQN_TOK_DOTDOT):
754 		if (parent->last == NULL) {
755 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
756 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
757 			cur = eqn_box_alloc(ep, parent);
758 			cur->type = EQN_TEXT;
759 			cur->text = mandoc_strdup("");
760 		}
761 		parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent);
762 		parent->type = EQN_LISTONE;
763 		parent->expectargs = 1;
764 		switch (tok) {
765 		case (EQN_TOK_DOTDOT):
766 			strlcpy(sym, "\\[ad]", sizeof(sym));
767 			break;
768 		case (EQN_TOK_VEC):
769 			strlcpy(sym, "\\[->]", sizeof(sym));
770 			break;
771 		case (EQN_TOK_DYAD):
772 			strlcpy(sym, "\\[<>]", sizeof(sym));
773 			break;
774 		case (EQN_TOK_TILDE):
775 			strlcpy(sym, "\\[a~]", sizeof(sym));
776 			break;
777 		case (EQN_TOK_UNDER):
778 			strlcpy(sym, "\\[ul]", sizeof(sym));
779 			break;
780 		case (EQN_TOK_BAR):
781 			strlcpy(sym, "\\[rl]", sizeof(sym));
782 			break;
783 		case (EQN_TOK_DOT):
784 			strlcpy(sym, "\\[a.]", sizeof(sym));
785 			break;
786 		case (EQN_TOK_HAT):
787 			strlcpy(sym, "\\[ha]", sizeof(sym));
788 			break;
789 		default:
790 			abort();
791 		}
792 
793 		switch (tok) {
794 		case (EQN_TOK_DOTDOT):
795 		case (EQN_TOK_VEC):
796 		case (EQN_TOK_DYAD):
797 		case (EQN_TOK_TILDE):
798 		case (EQN_TOK_BAR):
799 		case (EQN_TOK_DOT):
800 		case (EQN_TOK_HAT):
801 			parent->top = mandoc_strdup(sym);
802 			break;
803 		case (EQN_TOK_UNDER):
804 			parent->bottom = mandoc_strdup(sym);
805 			break;
806 		default:
807 			abort();
808 		}
809 		parent = parent->parent;
810 		break;
811 	case (EQN_TOK_FWD):
812 	case (EQN_TOK_BACK):
813 	case (EQN_TOK_DOWN):
814 	case (EQN_TOK_UP):
815 		subtok = eqn_tok_parse(ep, NULL);
816 		if (subtok != EQN_TOK__MAX) {
817 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
818 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
819 			tok = subtok;
820 			goto this_tok;
821 		}
822 		break;
823 	case (EQN_TOK_FAT):
824 	case (EQN_TOK_ROMAN):
825 	case (EQN_TOK_ITALIC):
826 	case (EQN_TOK_BOLD):
827 		while (parent->args == parent->expectargs)
828 			parent = parent->parent;
829 		/*
830 		 * These values apply to the next word or sequence of
831 		 * words; thus, we mark that we'll have a child with
832 		 * exactly one of those.
833 		 */
834 		parent = eqn_box_alloc(ep, parent);
835 		parent->type = EQN_LISTONE;
836 		parent->expectargs = 1;
837 		switch (tok) {
838 		case (EQN_TOK_FAT):
839 			parent->font = EQNFONT_FAT;
840 			break;
841 		case (EQN_TOK_ROMAN):
842 			parent->font = EQNFONT_ROMAN;
843 			break;
844 		case (EQN_TOK_ITALIC):
845 			parent->font = EQNFONT_ITALIC;
846 			break;
847 		case (EQN_TOK_BOLD):
848 			parent->font = EQNFONT_BOLD;
849 			break;
850 		default:
851 			abort();
852 		}
853 		break;
854 	case (EQN_TOK_SIZE):
855 	case (EQN_TOK_GSIZE):
856 		/* Accept two values: integral size and a single. */
857 		if (NULL == (start = eqn_nexttok(ep, &sz))) {
858 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
859 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
860 			break;
861 		}
862 		size = mandoc_strntoi(start, sz, 10);
863 		if (-1 == size) {
864 			mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
865 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
866 			break;
867 		}
868 		if (EQN_TOK_GSIZE == tok) {
869 			ep->gsize = size;
870 			break;
871 		}
872 		parent = eqn_box_alloc(ep, parent);
873 		parent->type = EQN_LISTONE;
874 		parent->expectargs = 1;
875 		parent->size = size;
876 		break;
877 	case (EQN_TOK_FROM):
878 	case (EQN_TOK_TO):
879 	case (EQN_TOK_SUB):
880 	case (EQN_TOK_SUP):
881 		/*
882 		 * We have a left-right-associative expression.
883 		 * Repivot under a positional node, open a child scope
884 		 * and keep on reading.
885 		 */
886 		if (parent->last == NULL) {
887 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
888 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
889 			cur = eqn_box_alloc(ep, parent);
890 			cur->type = EQN_TEXT;
891 			cur->text = mandoc_strdup("");
892 		}
893 		/* Handle the "subsup" and "fromto" positions. */
894 		if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) {
895 			parent->expectargs = 3;
896 			parent->pos = EQNPOS_SUBSUP;
897 			break;
898 		}
899 		if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) {
900 			parent->expectargs = 3;
901 			parent->pos = EQNPOS_FROMTO;
902 			break;
903 		}
904 		switch (tok) {
905 		case (EQN_TOK_FROM):
906 			pos = EQNPOS_FROM;
907 			break;
908 		case (EQN_TOK_TO):
909 			pos = EQNPOS_TO;
910 			break;
911 		case (EQN_TOK_SUP):
912 			pos = EQNPOS_SUP;
913 			break;
914 		case (EQN_TOK_SUB):
915 			pos = EQNPOS_SUB;
916 			break;
917 		default:
918 			abort();
919 		}
920 		parent = eqn_box_makebinary(ep, pos, parent);
921 		break;
922 	case (EQN_TOK_SQRT):
923 		while (parent->args == parent->expectargs)
924 			parent = parent->parent;
925 		/*
926 		 * Accept a left-right-associative set of arguments just
927 		 * like sub and sup and friends but without rebalancing
928 		 * under a pivot.
929 		 */
930 		parent = eqn_box_alloc(ep, parent);
931 		parent->type = EQN_SUBEXPR;
932 		parent->pos = EQNPOS_SQRT;
933 		parent->expectargs = 1;
934 		break;
935 	case (EQN_TOK_OVER):
936 		/*
937 		 * We have a right-left-associative fraction.
938 		 * Close out anything that's currently open, then
939 		 * rebalance and continue reading.
940 		 */
941 		if (parent->last == NULL) {
942 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
943 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
944 			cur = eqn_box_alloc(ep, parent);
945 			cur->type = EQN_TEXT;
946 			cur->text = mandoc_strdup("");
947 		}
948 		while (EQN_SUBEXPR == parent->type)
949 			parent = parent->parent;
950 		parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent);
951 		break;
952 	case (EQN_TOK_RIGHT):
953 	case (EQN_TOK_BRACE_CLOSE):
954 		/*
955 		 * Close out the existing brace.
956 		 * FIXME: this is a shitty sentinel: we should really
957 		 * have a native EQN_BRACE type or whatnot.
958 		 */
959 		for (cur = parent; cur != NULL; cur = cur->parent)
960 			if (cur->type == EQN_LIST &&
961 			    (tok == EQN_TOK_BRACE_CLOSE ||
962 			     cur->left != NULL))
963 				break;
964 		if (cur == NULL) {
965 			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
966 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
967 			break;
968 		}
969 		parent = cur;
970 		if (EQN_TOK_RIGHT == tok) {
971 			if (NULL == (start = eqn_nexttok(ep, &sz))) {
972 				mandoc_msg(MANDOCERR_REQ_EMPTY,
973 				    ep->parse, ep->eqn.ln,
974 				    ep->eqn.pos, eqn_toks[tok]);
975 				break;
976 			}
977 			/* Handling depends on right/left. */
978 			if (STRNEQ(start, sz, "ceiling", 7)) {
979 				strlcpy(sym, "\\[rc]", sizeof(sym));
980 				parent->right = mandoc_strdup(sym);
981 			} else if (STRNEQ(start, sz, "floor", 5)) {
982 				strlcpy(sym, "\\[rf]", sizeof(sym));
983 				parent->right = mandoc_strdup(sym);
984 			} else
985 				parent->right = mandoc_strndup(start, sz);
986 		}
987 		parent = parent->parent;
988 		if (tok == EQN_TOK_BRACE_CLOSE &&
989 		    (parent->type == EQN_PILE ||
990 		     parent->type == EQN_MATRIX))
991 			parent = parent->parent;
992 		/* Close out any "singleton" lists. */
993 		while (parent->type == EQN_LISTONE &&
994 		    parent->args == parent->expectargs)
995 			parent = parent->parent;
996 		break;
997 	case (EQN_TOK_BRACE_OPEN):
998 	case (EQN_TOK_LEFT):
999 		/*
1000 		 * If we already have something in the stack and we're
1001 		 * in an expression, then rewind til we're not any more
1002 		 * (just like with the text node).
1003 		 */
1004 		while (parent->args == parent->expectargs)
1005 			parent = parent->parent;
1006 		if (EQN_TOK_LEFT == tok &&
1007 		    (start = eqn_nexttok(ep, &sz)) == NULL) {
1008 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
1009 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1010 			break;
1011 		}
1012 		parent = eqn_box_alloc(ep, parent);
1013 		parent->type = EQN_LIST;
1014 		if (EQN_TOK_LEFT == tok) {
1015 			if (STRNEQ(start, sz, "ceiling", 7)) {
1016 				strlcpy(sym, "\\[lc]", sizeof(sym));
1017 				parent->left = mandoc_strdup(sym);
1018 			} else if (STRNEQ(start, sz, "floor", 5)) {
1019 				strlcpy(sym, "\\[lf]", sizeof(sym));
1020 				parent->left = mandoc_strdup(sym);
1021 			} else
1022 				parent->left = mandoc_strndup(start, sz);
1023 		}
1024 		break;
1025 	case (EQN_TOK_PILE):
1026 	case (EQN_TOK_LPILE):
1027 	case (EQN_TOK_RPILE):
1028 	case (EQN_TOK_CPILE):
1029 	case (EQN_TOK_CCOL):
1030 	case (EQN_TOK_LCOL):
1031 	case (EQN_TOK_RCOL):
1032 		while (parent->args == parent->expectargs)
1033 			parent = parent->parent;
1034 		parent = eqn_box_alloc(ep, parent);
1035 		parent->type = EQN_PILE;
1036 		parent->expectargs = 1;
1037 		break;
1038 	case (EQN_TOK_ABOVE):
1039 		for (cur = parent; cur != NULL; cur = cur->parent)
1040 			if (cur->type == EQN_PILE)
1041 				break;
1042 		if (cur == NULL) {
1043 			mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
1044 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1045 			break;
1046 		}
1047 		parent = eqn_box_alloc(ep, cur);
1048 		parent->type = EQN_LIST;
1049 		break;
1050 	case (EQN_TOK_MATRIX):
1051 		while (parent->args == parent->expectargs)
1052 			parent = parent->parent;
1053 		parent = eqn_box_alloc(ep, parent);
1054 		parent->type = EQN_MATRIX;
1055 		parent->expectargs = 1;
1056 		break;
1057 	case (EQN_TOK_EOF):
1058 		/*
1059 		 * End of file!
1060 		 * TODO: make sure we're not in an open subexpression.
1061 		 */
1062 		return ROFF_EQN;
1063 	default:
1064 		assert(tok == EQN_TOK__MAX);
1065 		assert(NULL != p);
1066 		/*
1067 		 * If we already have something in the stack and we're
1068 		 * in an expression, then rewind til we're not any more.
1069 		 */
1070 		while (parent->args == parent->expectargs)
1071 			parent = parent->parent;
1072 		cur = eqn_box_alloc(ep, parent);
1073 		cur->type = EQN_TEXT;
1074 		for (i = 0; i < EQNSYM__MAX; i++)
1075 			if (0 == strcmp(eqnsyms[i].str, p)) {
1076 				(void)snprintf(sym, sizeof(sym),
1077 					"\\[%s]", eqnsyms[i].sym);
1078 				cur->text = mandoc_strdup(sym);
1079 				free(p);
1080 				break;
1081 			}
1082 
1083 		if (i == EQNSYM__MAX)
1084 			cur->text = p;
1085 		/*
1086 		 * Post-process list status.
1087 		 */
1088 		while (parent->type == EQN_LISTONE &&
1089 		    parent->args == parent->expectargs)
1090 			parent = parent->parent;
1091 		break;
1092 	}
1093 	goto next_tok;
1094 }
1095 
1096 enum rofferr
1097 eqn_end(struct eqn_node **epp)
1098 {
1099 	struct eqn_node	*ep;
1100 
1101 	ep = *epp;
1102 	*epp = NULL;
1103 
1104 	ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box));
1105 	ep->eqn.root->expectargs = UINT_MAX;
1106 	return eqn_parse(ep, ep->eqn.root);
1107 }
1108 
1109 void
1110 eqn_free(struct eqn_node *p)
1111 {
1112 	int		 i;
1113 
1114 	eqn_box_free(p->eqn.root);
1115 
1116 	for (i = 0; i < (int)p->defsz; i++) {
1117 		free(p->defs[i].key);
1118 		free(p->defs[i].val);
1119 	}
1120 
1121 	free(p->data);
1122 	free(p->defs);
1123 	free(p);
1124 }
1125