xref: /openbsd-src/usr.bin/mandoc/man_macro.c (revision cd1eb269cafb12c415be1749cd4a4b5422710415)
1 /*	$Id: man_macro.c,v 1.15 2010/04/25 16:32:19 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <assert.h>
18 #include <ctype.h>
19 #include <stdlib.h>
20 #include <string.h>
21 
22 #include "libman.h"
23 
24 enum	rew {
25 	REW_REWIND,
26 	REW_NOHALT,
27 	REW_HALT
28 };
29 
30 static	int		 blk_close(MACRO_PROT_ARGS);
31 static	int		 blk_dotted(MACRO_PROT_ARGS);
32 static	int		 blk_exp(MACRO_PROT_ARGS);
33 static	int		 blk_imp(MACRO_PROT_ARGS);
34 static	int		 blk_cond(MACRO_PROT_ARGS);
35 static	int		 in_line_eoln(MACRO_PROT_ARGS);
36 
37 static	int		 rew_scope(enum man_type,
38 				struct man *, enum mant);
39 static	enum rew	 rew_dohalt(enum mant, enum man_type,
40 				const struct man_node *);
41 static	enum rew	 rew_block(enum mant, enum man_type,
42 				const struct man_node *);
43 static	int		 rew_warn(struct man *,
44 				struct man_node *, enum merr);
45 
46 const	struct man_macro __man_macros[MAN_MAX] = {
47 	{ in_line_eoln, MAN_NSCOPED }, /* br */
48 	{ in_line_eoln, 0 }, /* TH */
49 	{ blk_imp, MAN_SCOPED }, /* SH */
50 	{ blk_imp, MAN_SCOPED }, /* SS */
51 	{ blk_imp, MAN_SCOPED | MAN_FSCOPED }, /* TP */
52 	{ blk_imp, 0 }, /* LP */
53 	{ blk_imp, 0 }, /* PP */
54 	{ blk_imp, 0 }, /* P */
55 	{ blk_imp, 0 }, /* IP */
56 	{ blk_imp, 0 }, /* HP */
57 	{ in_line_eoln, MAN_SCOPED }, /* SM */
58 	{ in_line_eoln, MAN_SCOPED }, /* SB */
59 	{ in_line_eoln, 0 }, /* BI */
60 	{ in_line_eoln, 0 }, /* IB */
61 	{ in_line_eoln, 0 }, /* BR */
62 	{ in_line_eoln, 0 }, /* RB */
63 	{ in_line_eoln, MAN_SCOPED }, /* R */
64 	{ in_line_eoln, MAN_SCOPED }, /* B */
65 	{ in_line_eoln, MAN_SCOPED }, /* I */
66 	{ in_line_eoln, 0 }, /* IR */
67 	{ in_line_eoln, 0 }, /* RI */
68 	{ in_line_eoln, MAN_NSCOPED }, /* na */
69 	{ in_line_eoln, 0 }, /* i */
70 	{ in_line_eoln, MAN_NSCOPED }, /* sp */
71 	{ in_line_eoln, 0 }, /* nf */
72 	{ in_line_eoln, 0 }, /* fi */
73 	{ in_line_eoln, 0 }, /* r */
74 	{ blk_close, 0 }, /* RE */
75 	{ blk_exp, MAN_EXPLICIT }, /* RS */
76 	{ in_line_eoln, 0 }, /* DT */
77 	{ in_line_eoln, 0 }, /* UC */
78 	{ in_line_eoln, 0 }, /* PD */
79 	{ in_line_eoln, MAN_NSCOPED }, /* Sp */
80 	{ in_line_eoln, 0 }, /* Vb */
81 	{ in_line_eoln, 0 }, /* Ve */
82 	{ blk_exp, MAN_EXPLICIT | MAN_NOCLOSE}, /* de */
83 	{ blk_exp, MAN_EXPLICIT | MAN_NOCLOSE}, /* dei */
84 	{ blk_exp, MAN_EXPLICIT | MAN_NOCLOSE}, /* am */
85 	{ blk_exp, MAN_EXPLICIT | MAN_NOCLOSE}, /* ami */
86 	{ blk_exp, MAN_EXPLICIT | MAN_NOCLOSE}, /* ig */
87 	{ blk_dotted, 0 }, /* . */
88 	{ blk_cond, 0 }, /* if */
89 	{ blk_cond, 0 }, /* ie */
90 	{ blk_cond, 0 }, /* el */
91 };
92 
93 const	struct man_macro * const man_macros = __man_macros;
94 
95 
96 /*
97  * Warn when "n" is an explicit non-roff macro.
98  */
99 static int
100 rew_warn(struct man *m, struct man_node *n, enum merr er)
101 {
102 
103 	if (er == WERRMAX || MAN_BLOCK != n->type)
104 		return(1);
105 	if (MAN_VALID & n->flags)
106 		return(1);
107 	if ( ! (MAN_EXPLICIT & man_macros[n->tok].flags))
108 		return(1);
109 	if (MAN_NOCLOSE & man_macros[n->tok].flags)
110 		return(1);
111 	return(man_nwarn(m, n, er));
112 }
113 
114 
115 /*
116  * Rewind scope.  If a code "er" != WERRMAX has been provided, it will
117  * be used if an explicit block scope is being closed out.
118  */
119 int
120 man_unscope(struct man *m, const struct man_node *n, enum merr er)
121 {
122 
123 	assert(n);
124 
125 	/* LINTED */
126 	while (m->last != n) {
127 		if ( ! rew_warn(m, m->last, er))
128 			return(0);
129 		if ( ! man_valid_post(m))
130 			return(0);
131 		if ( ! man_action_post(m))
132 			return(0);
133 		m->last = m->last->parent;
134 		assert(m->last);
135 	}
136 
137 	if ( ! rew_warn(m, m->last, er))
138 		return(0);
139 	if ( ! man_valid_post(m))
140 		return(0);
141 	if ( ! man_action_post(m))
142 		return(0);
143 
144 	m->next = MAN_ROOT == m->last->type ?
145 		MAN_NEXT_CHILD : MAN_NEXT_SIBLING;
146 
147 	return(1);
148 }
149 
150 
151 static enum rew
152 rew_block(enum mant ntok, enum man_type type, const struct man_node *n)
153 {
154 
155 	if (MAN_BLOCK == type && ntok == n->parent->tok &&
156 			MAN_BODY == n->parent->type)
157 		return(REW_REWIND);
158 	return(ntok == n->tok ? REW_HALT : REW_NOHALT);
159 }
160 
161 
162 /*
163  * There are three scope levels: scoped to the root (all), scoped to the
164  * section (all less sections), and scoped to subsections (all less
165  * sections and subsections).
166  */
167 static enum rew
168 rew_dohalt(enum mant tok, enum man_type type, const struct man_node *n)
169 {
170 	enum rew	 c;
171 
172 	/* We cannot progress beyond the root ever. */
173 	if (MAN_ROOT == n->type)
174 		return(REW_HALT);
175 
176 	assert(n->parent);
177 
178 	/* Normal nodes shouldn't go to the level of the root. */
179 	if (MAN_ROOT == n->parent->type)
180 		return(REW_REWIND);
181 
182 	/* Already-validated nodes should be closed out. */
183 	if (MAN_VALID & n->flags)
184 		return(REW_NOHALT);
185 
186 	/* First: rewind to ourselves. */
187 	if (type == n->type && tok == n->tok)
188 		return(REW_REWIND);
189 
190 	/*
191 	 * If we're a roff macro, then we can close out anything that
192 	 * stands between us and our parent context.
193 	 */
194 	if (MAN_NOCLOSE & man_macros[tok].flags)
195 		return(REW_NOHALT);
196 
197 	/*
198 	 * Don't clobber roff macros: this is a bit complicated.  If the
199 	 * current macro is a roff macro, halt immediately and don't
200 	 * rewind.  If it's not, and the parent is, then close out the
201 	 * current scope and halt at the parent.
202 	 */
203 	if (MAN_NOCLOSE & man_macros[n->tok].flags)
204 		return(REW_HALT);
205 	if (MAN_NOCLOSE & man_macros[n->parent->tok].flags)
206 		return(REW_REWIND);
207 
208 	/*
209 	 * Next follow the implicit scope-smashings as defined by man.7:
210 	 * section, sub-section, etc.
211 	 */
212 
213 	switch (tok) {
214 	case (MAN_SH):
215 		break;
216 	case (MAN_SS):
217 		/* Rewind to a section, if a block. */
218 		if (REW_NOHALT != (c = rew_block(MAN_SH, type, n)))
219 			return(c);
220 		break;
221 	case (MAN_RS):
222 		/* Rewind to a subsection, if a block. */
223 		if (REW_NOHALT != (c = rew_block(MAN_SS, type, n)))
224 			return(c);
225 		/* Rewind to a section, if a block. */
226 		if (REW_NOHALT != (c = rew_block(MAN_SH, type, n)))
227 			return(c);
228 		break;
229 	default:
230 		/* Rewind to an offsetter, if a block. */
231 		if (REW_NOHALT != (c = rew_block(MAN_RS, type, n)))
232 			return(c);
233 		/* Rewind to a subsection, if a block. */
234 		if (REW_NOHALT != (c = rew_block(MAN_SS, type, n)))
235 			return(c);
236 		/* Rewind to a section, if a block. */
237 		if (REW_NOHALT != (c = rew_block(MAN_SH, type, n)))
238 			return(c);
239 		break;
240 	}
241 
242 	return(REW_NOHALT);
243 }
244 
245 
246 /*
247  * Rewinding entails ascending the parse tree until a coherent point,
248  * for example, the `SH' macro will close out any intervening `SS'
249  * scopes.  When a scope is closed, it must be validated and actioned.
250  */
251 static int
252 rew_scope(enum man_type type, struct man *m, enum mant tok)
253 {
254 	struct man_node	*n;
255 	enum rew	 c;
256 
257 	/* LINTED */
258 	for (n = m->last; n; n = n->parent) {
259 		/*
260 		 * Whether we should stop immediately (REW_HALT), stop
261 		 * and rewind until this point (REW_REWIND), or keep
262 		 * rewinding (REW_NOHALT).
263 		 */
264 		c = rew_dohalt(tok, type, n);
265 		if (REW_HALT == c)
266 			return(1);
267 		if (REW_REWIND == c)
268 			break;
269 	}
270 
271 	/*
272 	 * Rewind until the current point.  Warn if we're a roff
273 	 * instruction that's mowing over explicit scopes.
274 	 */
275 	assert(n);
276 	if (MAN_NOCLOSE & man_macros[tok].flags)
277 		return(man_unscope(m, n, WROFFSCOPE));
278 
279 	return(man_unscope(m, n, WERRMAX));
280 }
281 
282 
283 /*
284  * Closure for brace blocks (if, ie, el).
285  */
286 int
287 man_brace_close(struct man *m, int line, int ppos)
288 {
289 	struct man_node	*nif;
290 
291 	nif = m->last->parent;
292 	while (nif &&
293 	    MAN_if != nif->tok &&
294 	    MAN_ie != nif->tok &&
295 	    MAN_el != nif->tok)
296 		nif = nif->parent;
297 
298 	if (NULL == nif)
299 		return(man_pwarn(m, line, ppos, WNOSCOPE));
300 
301 	if (MAN_ie != nif->tok || MAN_USE & nif->flags)
302 		m->flags &= ~MAN_EL_USE;
303 	else
304 		m->flags |= MAN_EL_USE;
305 
306 	if (MAN_USE & nif->flags) {
307 		if (nif->prev) {
308 			nif->prev->next = nif->child;
309 			nif->child->prev = nif->prev;
310 			nif->prev = NULL;
311 		} else {
312 			nif->parent->child = nif->child;
313 		}
314 		nif->parent->nchild += nif->nchild - 1;
315 		while (nif->child) {
316 			nif->child->parent = nif->parent;
317 			nif->child = nif->child->next;
318 		}
319 		nif->nchild = 0;
320 		nif->parent = NULL;
321 	}
322 	man_node_delete(m, nif);
323 	return(1);
324 }
325 
326 
327 /*
328  * Closure for dotted macros (de, dei, am, ami, ign).  This must handle
329  * any of these as the parent node, so it needs special handling.
330  * Beyond this, it's the same as blk_close().
331  */
332 /* ARGSUSED */
333 int
334 blk_dotted(MACRO_PROT_ARGS)
335 {
336 	enum mant	 ntok;
337 	struct man_node	*nn;
338 
339 	/* Check for any of the following parents... */
340 
341 	for (nn = m->last->parent; nn; nn = nn->parent)
342 		if (nn->tok == MAN_de || nn->tok == MAN_dei ||
343 				nn->tok == MAN_am ||
344 				nn->tok == MAN_ami ||
345 				nn->tok == MAN_ig) {
346 			ntok = nn->tok;
347 			break;
348 		}
349 
350 	if (NULL == nn) {
351 		if ( ! man_pwarn(m, line, ppos, WNOSCOPE))
352 			return(0);
353 		return(1);
354 	}
355 
356 	if ( ! rew_scope(MAN_BODY, m, ntok))
357 		return(0);
358 	if ( ! rew_scope(MAN_BLOCK, m, ntok))
359 		return(0);
360 
361 	/*
362 	 * Restore flags set when we got here and also stipulate that we
363 	 * don't post-process the line when exiting the macro op
364 	 * function in man_pmacro().  See blk_exp().
365 	 */
366 
367 	m->flags = m->svflags | MAN_ILINE;
368 	m->next = m->svnext;
369 	return(1);
370 }
371 
372 
373 /*
374  * Close out a generic explicit macro.
375  */
376 /* ARGSUSED */
377 int
378 blk_close(MACRO_PROT_ARGS)
379 {
380 	enum mant	 	 ntok;
381 	const struct man_node	*nn;
382 
383 	switch (tok) {
384 	case (MAN_RE):
385 		ntok = MAN_RS;
386 		break;
387 	default:
388 		abort();
389 		/* NOTREACHED */
390 	}
391 
392 	for (nn = m->last->parent; nn; nn = nn->parent)
393 		if (ntok == nn->tok)
394 			break;
395 
396 	if (NULL == nn)
397 		if ( ! man_pwarn(m, line, ppos, WNOSCOPE))
398 			return(0);
399 
400 	if ( ! rew_scope(MAN_BODY, m, ntok))
401 		return(0);
402 	if ( ! rew_scope(MAN_BLOCK, m, ntok))
403 		return(0);
404 
405 	return(1);
406 }
407 
408 
409 int
410 blk_exp(MACRO_PROT_ARGS)
411 {
412 	int		 w, la;
413 	char		*p;
414 
415 	/*
416 	 * Close out prior scopes.  "Regular" explicit macros cannot be
417 	 * nested, but we allow roff macros to be placed just about
418 	 * anywhere.
419 	 */
420 
421 	if ( ! (MAN_NOCLOSE & man_macros[tok].flags)) {
422 		if ( ! rew_scope(MAN_BODY, m, tok))
423 			return(0);
424 		if ( ! rew_scope(MAN_BLOCK, m, tok))
425 			return(0);
426 	} else {
427 		/*
428 		 * Save our state and next-scope indicator; we restore
429 		 * it when exiting from the roff instruction block.  See
430 		 * blk_dotted().
431 		 */
432 		m->svflags = m->flags;
433 		m->svnext = m->next;
434 
435 		/* Make sure we drop any line modes. */
436 		m->flags = 0;
437 	}
438 
439 	if ( ! man_block_alloc(m, line, ppos, tok))
440 		return(0);
441 	if ( ! man_head_alloc(m, line, ppos, tok))
442 		return(0);
443 
444 	for (;;) {
445 		la = *pos;
446 		w = man_args(m, line, pos, buf, &p);
447 
448 		if (-1 == w)
449 			return(0);
450 		if (0 == w)
451 			break;
452 
453 		if ( ! man_word_alloc(m, line, la, p))
454 			return(0);
455 	}
456 
457 	assert(m);
458 	assert(tok != MAN_MAX);
459 
460 	if ( ! rew_scope(MAN_HEAD, m, tok))
461 		return(0);
462 	return(man_body_alloc(m, line, ppos, tok));
463 }
464 
465 
466 
467 /*
468  * Parse an implicit-block macro.  These contain a MAN_HEAD and a
469  * MAN_BODY contained within a MAN_BLOCK.  Rules for closing out other
470  * scopes, such as `SH' closing out an `SS', are defined in the rew
471  * routines.
472  */
473 int
474 blk_imp(MACRO_PROT_ARGS)
475 {
476 	int		 w, la;
477 	char		*p;
478 	struct man_node	*n;
479 
480 	/* Close out prior scopes. */
481 
482 	if ( ! rew_scope(MAN_BODY, m, tok))
483 		return(0);
484 	if ( ! rew_scope(MAN_BLOCK, m, tok))
485 		return(0);
486 
487 	/* Allocate new block & head scope. */
488 
489 	if ( ! man_block_alloc(m, line, ppos, tok))
490 		return(0);
491 	if ( ! man_head_alloc(m, line, ppos, tok))
492 		return(0);
493 
494 	n = m->last;
495 
496 	/* Add line arguments. */
497 
498 	for (;;) {
499 		la = *pos;
500 		w = man_args(m, line, pos, buf, &p);
501 
502 		if (-1 == w)
503 			return(0);
504 		if (0 == w)
505 			break;
506 
507 		if ( ! man_word_alloc(m, line, la, p))
508 			return(0);
509 	}
510 
511 	/* Close out head and open body (unless MAN_SCOPE). */
512 
513 	if (MAN_SCOPED & man_macros[tok].flags) {
514 		/* If we're forcing scope (`TP'), keep it open. */
515 		if (MAN_FSCOPED & man_macros[tok].flags) {
516 			m->flags |= MAN_BLINE;
517 			return(1);
518 		} else if (n == m->last) {
519 			m->flags |= MAN_BLINE;
520 			return(1);
521 		}
522 	}
523 
524 	if ( ! rew_scope(MAN_HEAD, m, tok))
525 		return(0);
526 	return(man_body_alloc(m, line, ppos, tok));
527 }
528 
529 
530 /*
531  * Parse a conditional roff instruction.
532  */
533 int
534 blk_cond(MACRO_PROT_ARGS)
535 {
536 	char		*p = buf + *pos;
537 	int		 use;
538 
539 	if (MAN_el == tok)
540 		use = m->flags & MAN_EL_USE;
541 	else {
542 		use = 'n' == *p++;
543 		/* XXX skip the rest of the condition for now */
544 		while (*p && !isblank(*p))
545 			p++;
546 	}
547 	m->flags &= ~MAN_EL_USE;
548 
549 	/* advance to the code controlled by the condition */
550 	while (*p && isblank(*p))
551 		p++;
552 	if ('\0' == *p)
553 		return(1);
554 
555 	/* single-line body */
556 	if (strncmp("\\{", p, 2)) {
557 		if (use && ! man_parseln(m, line, p))
558 			return(0);
559 	        if (MAN_ie == tok && !use)
560                         m->flags |= MAN_EL_USE;
561 		return(1);
562         }
563 
564 	/* multi-line body */
565 	if ( ! man_block_alloc(m, line, ppos, tok))
566 		return(0);
567 	if (use)
568 		m->last->flags |= MAN_USE;
569 	p += 2;
570 	return(*p ? man_parseln(m, line, p) : 1);
571 }
572 
573 
574 int
575 in_line_eoln(MACRO_PROT_ARGS)
576 {
577 	int		 w, la;
578 	char		*p;
579 	struct man_node	*n;
580 
581 	if ( ! man_elem_alloc(m, line, ppos, tok))
582 		return(0);
583 
584 	n = m->last;
585 
586 	for (;;) {
587 		la = *pos;
588 		w = man_args(m, line, pos, buf, &p);
589 
590 		if (-1 == w)
591 			return(0);
592 		if (0 == w)
593 			break;
594 		if ( ! man_word_alloc(m, line, la, p))
595 			return(0);
596 	}
597 
598 	/*
599 	 * If no arguments are specified and this is MAN_SCOPED (i.e.,
600 	 * next-line scoped), then set our mode to indicate that we're
601 	 * waiting for terms to load into our context.
602 	 */
603 
604 	if (n == m->last && MAN_SCOPED & man_macros[tok].flags) {
605 		assert( ! (MAN_NSCOPED & man_macros[tok].flags));
606 		m->flags |= MAN_ELINE;
607 		return(1);
608 	}
609 
610 	/* Set ignorable context, if applicable. */
611 
612 	if (MAN_NSCOPED & man_macros[tok].flags) {
613 		assert( ! (MAN_SCOPED & man_macros[tok].flags));
614 		m->flags |= MAN_ILINE;
615 	}
616 
617 	/*
618 	 * Rewind our element scope.  Note that when TH is pruned, we'll
619 	 * be back at the root, so make sure that we don't clobber as
620 	 * its sibling.
621 	 */
622 
623 	for ( ; m->last; m->last = m->last->parent) {
624 		if (m->last == n)
625 			break;
626 		if (m->last->type == MAN_ROOT)
627 			break;
628 		if ( ! man_valid_post(m))
629 			return(0);
630 		if ( ! man_action_post(m))
631 			return(0);
632 	}
633 
634 	assert(m->last);
635 
636 	/*
637 	 * Same here regarding whether we're back at the root.
638 	 */
639 
640 	if (m->last->type != MAN_ROOT && ! man_valid_post(m))
641 		return(0);
642 	if (m->last->type != MAN_ROOT && ! man_action_post(m))
643 		return(0);
644 
645 	m->next = MAN_ROOT == m->last->type ?
646 		MAN_NEXT_CHILD : MAN_NEXT_SIBLING;
647 
648 	return(1);
649 }
650 
651 
652 int
653 man_macroend(struct man *m)
654 {
655 
656 	return(man_unscope(m, m->first, WEXITSCOPE));
657 }
658 
659