xref: /openbsd-src/usr.bin/mandoc/man.c (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1 /*	$Id: man.c,v 1.67 2012/07/07 18:27:36 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <sys/types.h>
18 
19 #include <assert.h>
20 #include <stdarg.h>
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 
25 #include "man.h"
26 #include "mandoc.h"
27 #include "libman.h"
28 #include "libmandoc.h"
29 
30 const	char *const __man_macronames[MAN_MAX] = {
31 	"br",		"TH",		"SH",		"SS",
32 	"TP", 		"LP",		"PP",		"P",
33 	"IP",		"HP",		"SM",		"SB",
34 	"BI",		"IB",		"BR",		"RB",
35 	"R",		"B",		"I",		"IR",
36 	"RI",		"na",		"sp",		"nf",
37 	"fi",		"RE",		"RS",		"DT",
38 	"UC",		"PD",		"AT",		"in",
39 	"ft",		"OP",		"EX",		"EE"
40 	};
41 
42 const	char * const *man_macronames = __man_macronames;
43 
44 static	struct man_node	*man_node_alloc(struct man *, int, int,
45 				enum man_type, enum mant);
46 static	int		 man_node_append(struct man *,
47 				struct man_node *);
48 static	void		 man_node_free(struct man_node *);
49 static	void		 man_node_unlink(struct man *,
50 				struct man_node *);
51 static	int		 man_ptext(struct man *, int, char *, int);
52 static	int		 man_pmacro(struct man *, int, char *, int);
53 static	void		 man_free1(struct man *);
54 static	void		 man_alloc1(struct man *);
55 static	int		 man_descope(struct man *, int, int);
56 
57 
58 const struct man_node *
59 man_node(const struct man *m)
60 {
61 
62 	assert( ! (MAN_HALT & m->flags));
63 	return(m->first);
64 }
65 
66 
67 const struct man_meta *
68 man_meta(const struct man *m)
69 {
70 
71 	assert( ! (MAN_HALT & m->flags));
72 	return(&m->meta);
73 }
74 
75 
76 void
77 man_reset(struct man *man)
78 {
79 
80 	man_free1(man);
81 	man_alloc1(man);
82 }
83 
84 
85 void
86 man_free(struct man *man)
87 {
88 
89 	man_free1(man);
90 	free(man);
91 }
92 
93 
94 struct man *
95 man_alloc(struct roff *roff, struct mparse *parse)
96 {
97 	struct man	*p;
98 
99 	p = mandoc_calloc(1, sizeof(struct man));
100 
101 	man_hash_init();
102 	p->parse = parse;
103 	p->roff = roff;
104 
105 	man_alloc1(p);
106 	return(p);
107 }
108 
109 
110 int
111 man_endparse(struct man *m)
112 {
113 
114 	assert( ! (MAN_HALT & m->flags));
115 	if (man_macroend(m))
116 		return(1);
117 	m->flags |= MAN_HALT;
118 	return(0);
119 }
120 
121 
122 int
123 man_parseln(struct man *m, int ln, char *buf, int offs)
124 {
125 
126 	m->flags |= MAN_NEWLINE;
127 
128 	assert( ! (MAN_HALT & m->flags));
129 
130 	return (roff_getcontrol(m->roff, buf, &offs) ?
131 			man_pmacro(m, ln, buf, offs) :
132 			man_ptext(m, ln, buf, offs));
133 }
134 
135 
136 static void
137 man_free1(struct man *man)
138 {
139 
140 	if (man->first)
141 		man_node_delete(man, man->first);
142 	if (man->meta.title)
143 		free(man->meta.title);
144 	if (man->meta.source)
145 		free(man->meta.source);
146 	if (man->meta.date)
147 		free(man->meta.date);
148 	if (man->meta.vol)
149 		free(man->meta.vol);
150 	if (man->meta.msec)
151 		free(man->meta.msec);
152 }
153 
154 
155 static void
156 man_alloc1(struct man *m)
157 {
158 
159 	memset(&m->meta, 0, sizeof(struct man_meta));
160 	m->flags = 0;
161 	m->last = mandoc_calloc(1, sizeof(struct man_node));
162 	m->first = m->last;
163 	m->last->type = MAN_ROOT;
164 	m->last->tok = MAN_MAX;
165 	m->next = MAN_NEXT_CHILD;
166 }
167 
168 
169 static int
170 man_node_append(struct man *man, struct man_node *p)
171 {
172 
173 	assert(man->last);
174 	assert(man->first);
175 	assert(MAN_ROOT != p->type);
176 
177 	switch (man->next) {
178 	case (MAN_NEXT_SIBLING):
179 		man->last->next = p;
180 		p->prev = man->last;
181 		p->parent = man->last->parent;
182 		break;
183 	case (MAN_NEXT_CHILD):
184 		man->last->child = p;
185 		p->parent = man->last;
186 		break;
187 	default:
188 		abort();
189 		/* NOTREACHED */
190 	}
191 
192 	assert(p->parent);
193 	p->parent->nchild++;
194 
195 	if ( ! man_valid_pre(man, p))
196 		return(0);
197 
198 	switch (p->type) {
199 	case (MAN_HEAD):
200 		assert(MAN_BLOCK == p->parent->type);
201 		p->parent->head = p;
202 		break;
203 	case (MAN_TAIL):
204 		assert(MAN_BLOCK == p->parent->type);
205 		p->parent->tail = p;
206 		break;
207 	case (MAN_BODY):
208 		assert(MAN_BLOCK == p->parent->type);
209 		p->parent->body = p;
210 		break;
211 	default:
212 		break;
213 	}
214 
215 	man->last = p;
216 
217 	switch (p->type) {
218 	case (MAN_TBL):
219 		/* FALLTHROUGH */
220 	case (MAN_TEXT):
221 		if ( ! man_valid_post(man))
222 			return(0);
223 		break;
224 	default:
225 		break;
226 	}
227 
228 	return(1);
229 }
230 
231 
232 static struct man_node *
233 man_node_alloc(struct man *m, int line, int pos,
234 		enum man_type type, enum mant tok)
235 {
236 	struct man_node *p;
237 
238 	p = mandoc_calloc(1, sizeof(struct man_node));
239 	p->line = line;
240 	p->pos = pos;
241 	p->type = type;
242 	p->tok = tok;
243 
244 	if (MAN_NEWLINE & m->flags)
245 		p->flags |= MAN_LINE;
246 	m->flags &= ~MAN_NEWLINE;
247 	return(p);
248 }
249 
250 
251 int
252 man_elem_alloc(struct man *m, int line, int pos, enum mant tok)
253 {
254 	struct man_node *p;
255 
256 	p = man_node_alloc(m, line, pos, MAN_ELEM, tok);
257 	if ( ! man_node_append(m, p))
258 		return(0);
259 	m->next = MAN_NEXT_CHILD;
260 	return(1);
261 }
262 
263 
264 int
265 man_tail_alloc(struct man *m, int line, int pos, enum mant tok)
266 {
267 	struct man_node *p;
268 
269 	p = man_node_alloc(m, line, pos, MAN_TAIL, tok);
270 	if ( ! man_node_append(m, p))
271 		return(0);
272 	m->next = MAN_NEXT_CHILD;
273 	return(1);
274 }
275 
276 
277 int
278 man_head_alloc(struct man *m, int line, int pos, enum mant tok)
279 {
280 	struct man_node *p;
281 
282 	p = man_node_alloc(m, line, pos, MAN_HEAD, tok);
283 	if ( ! man_node_append(m, p))
284 		return(0);
285 	m->next = MAN_NEXT_CHILD;
286 	return(1);
287 }
288 
289 
290 int
291 man_body_alloc(struct man *m, int line, int pos, enum mant tok)
292 {
293 	struct man_node *p;
294 
295 	p = man_node_alloc(m, line, pos, MAN_BODY, tok);
296 	if ( ! man_node_append(m, p))
297 		return(0);
298 	m->next = MAN_NEXT_CHILD;
299 	return(1);
300 }
301 
302 
303 int
304 man_block_alloc(struct man *m, int line, int pos, enum mant tok)
305 {
306 	struct man_node *p;
307 
308 	p = man_node_alloc(m, line, pos, MAN_BLOCK, tok);
309 	if ( ! man_node_append(m, p))
310 		return(0);
311 	m->next = MAN_NEXT_CHILD;
312 	return(1);
313 }
314 
315 int
316 man_word_alloc(struct man *m, int line, int pos, const char *word)
317 {
318 	struct man_node	*n;
319 
320 	n = man_node_alloc(m, line, pos, MAN_TEXT, MAN_MAX);
321 	n->string = roff_strdup(m->roff, word);
322 
323 	if ( ! man_node_append(m, n))
324 		return(0);
325 
326 	m->next = MAN_NEXT_SIBLING;
327 	return(1);
328 }
329 
330 
331 /*
332  * Free all of the resources held by a node.  This does NOT unlink a
333  * node from its context; for that, see man_node_unlink().
334  */
335 static void
336 man_node_free(struct man_node *p)
337 {
338 
339 	if (p->string)
340 		free(p->string);
341 	free(p);
342 }
343 
344 
345 void
346 man_node_delete(struct man *m, struct man_node *p)
347 {
348 
349 	while (p->child)
350 		man_node_delete(m, p->child);
351 
352 	man_node_unlink(m, p);
353 	man_node_free(p);
354 }
355 
356 int
357 man_addeqn(struct man *m, const struct eqn *ep)
358 {
359 	struct man_node	*n;
360 
361 	assert( ! (MAN_HALT & m->flags));
362 
363 	n = man_node_alloc(m, ep->ln, ep->pos, MAN_EQN, MAN_MAX);
364 	n->eqn = ep;
365 
366 	if ( ! man_node_append(m, n))
367 		return(0);
368 
369 	m->next = MAN_NEXT_SIBLING;
370 	return(man_descope(m, ep->ln, ep->pos));
371 }
372 
373 int
374 man_addspan(struct man *m, const struct tbl_span *sp)
375 {
376 	struct man_node	*n;
377 
378 	assert( ! (MAN_HALT & m->flags));
379 
380 	n = man_node_alloc(m, sp->line, 0, MAN_TBL, MAN_MAX);
381 	n->span = sp;
382 
383 	if ( ! man_node_append(m, n))
384 		return(0);
385 
386 	m->next = MAN_NEXT_SIBLING;
387 	return(man_descope(m, sp->line, 0));
388 }
389 
390 static int
391 man_descope(struct man *m, int line, int offs)
392 {
393 	/*
394 	 * Co-ordinate what happens with having a next-line scope open:
395 	 * first close out the element scope (if applicable), then close
396 	 * out the block scope (also if applicable).
397 	 */
398 
399 	if (MAN_ELINE & m->flags) {
400 		m->flags &= ~MAN_ELINE;
401 		if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX))
402 			return(0);
403 	}
404 
405 	if ( ! (MAN_BLINE & m->flags))
406 		return(1);
407 	m->flags &= ~MAN_BLINE;
408 
409 	if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX))
410 		return(0);
411 	return(man_body_alloc(m, line, offs, m->last->tok));
412 }
413 
414 static int
415 man_ptext(struct man *m, int line, char *buf, int offs)
416 {
417 	int		 i;
418 
419 	/* Literal free-form text whitespace is preserved. */
420 
421 	if (MAN_LITERAL & m->flags) {
422 		if ( ! man_word_alloc(m, line, offs, buf + offs))
423 			return(0);
424 		return(man_descope(m, line, offs));
425 	}
426 
427 	/* Pump blank lines directly into the backend. */
428 
429 	for (i = offs; ' ' == buf[i]; i++)
430 		/* Skip leading whitespace. */ ;
431 
432 	if ('\0' == buf[i]) {
433 		/* Allocate a blank entry. */
434 		if ( ! man_word_alloc(m, line, offs, ""))
435 			return(0);
436 		return(man_descope(m, line, offs));
437 	}
438 
439 	/*
440 	 * Warn if the last un-escaped character is whitespace. Then
441 	 * strip away the remaining spaces (tabs stay!).
442 	 */
443 
444 	i = (int)strlen(buf);
445 	assert(i);
446 
447 	if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
448 		if (i > 1 && '\\' != buf[i - 2])
449 			man_pmsg(m, line, i - 1, MANDOCERR_EOLNSPACE);
450 
451 		for (--i; i && ' ' == buf[i]; i--)
452 			/* Spin back to non-space. */ ;
453 
454 		/* Jump ahead of escaped whitespace. */
455 		i += '\\' == buf[i] ? 2 : 1;
456 
457 		buf[i] = '\0';
458 	}
459 
460 	if ( ! man_word_alloc(m, line, offs, buf + offs))
461 		return(0);
462 
463 	/*
464 	 * End-of-sentence check.  If the last character is an unescaped
465 	 * EOS character, then flag the node as being the end of a
466 	 * sentence.  The front-end will know how to interpret this.
467 	 */
468 
469 	assert(i);
470 	if (mandoc_eos(buf, (size_t)i, 0))
471 		m->last->flags |= MAN_EOS;
472 
473 	return(man_descope(m, line, offs));
474 }
475 
476 static int
477 man_pmacro(struct man *m, int ln, char *buf, int offs)
478 {
479 	int		 i, ppos;
480 	enum mant	 tok;
481 	char		 mac[5];
482 	struct man_node	*n;
483 
484 	if ('"' == buf[offs]) {
485 		man_pmsg(m, ln, offs, MANDOCERR_BADCOMMENT);
486 		return(1);
487 	} else if ('\0' == buf[offs])
488 		return(1);
489 
490 	ppos = offs;
491 
492 	/*
493 	 * Copy the first word into a nil-terminated buffer.
494 	 * Stop copying when a tab, space, or eoln is encountered.
495 	 */
496 
497 	i = 0;
498 	while (i < 4 && '\0' != buf[offs] &&
499 			' ' != buf[offs] && '\t' != buf[offs])
500 		mac[i++] = buf[offs++];
501 
502 	mac[i] = '\0';
503 
504 	tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX;
505 
506 	if (MAN_MAX == tok) {
507 		mandoc_vmsg(MANDOCERR_MACRO, m->parse, ln,
508 				ppos, "%s", buf + ppos - 1);
509 		return(1);
510 	}
511 
512 	/* The macro is sane.  Jump to the next word. */
513 
514 	while (buf[offs] && ' ' == buf[offs])
515 		offs++;
516 
517 	/*
518 	 * Trailing whitespace.  Note that tabs are allowed to be passed
519 	 * into the parser as "text", so we only warn about spaces here.
520 	 */
521 
522 	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
523 		man_pmsg(m, ln, offs - 1, MANDOCERR_EOLNSPACE);
524 
525 	/*
526 	 * Remove prior ELINE macro, as it's being clobbered by a new
527 	 * macro.  Note that NSCOPED macros do not close out ELINE
528 	 * macros---they don't print text---so we let those slip by.
529 	 */
530 
531 	if ( ! (MAN_NSCOPED & man_macros[tok].flags) &&
532 			m->flags & MAN_ELINE) {
533 		n = m->last;
534 		assert(MAN_TEXT != n->type);
535 
536 		/* Remove repeated NSCOPED macros causing ELINE. */
537 
538 		if (MAN_NSCOPED & man_macros[n->tok].flags)
539 			n = n->parent;
540 
541 		mandoc_vmsg(MANDOCERR_LINESCOPE, m->parse, n->line,
542 		    n->pos, "%s breaks %s", man_macronames[tok],
543 		    man_macronames[n->tok]);
544 
545 		man_node_delete(m, n);
546 		m->flags &= ~MAN_ELINE;
547 	}
548 
549 	/*
550 	 * Remove prior BLINE macro that is being clobbered.
551 	 */
552 	if ((m->flags & MAN_BLINE) &&
553 	    (MAN_BSCOPE & man_macros[tok].flags)) {
554 		n = m->last;
555 
556 		/* Might be a text node like 8 in
557 		 * .TP 8
558 		 * .SH foo
559 		 */
560 		if (MAN_TEXT == n->type)
561 			n = n->parent;
562 
563 		/* Remove element that didn't end BLINE, if any. */
564 		if ( ! (MAN_BSCOPE & man_macros[n->tok].flags))
565 			n = n->parent;
566 
567 		assert(MAN_HEAD == n->type);
568 		n = n->parent;
569 		assert(MAN_BLOCK == n->type);
570 		assert(MAN_SCOPED & man_macros[n->tok].flags);
571 
572 		mandoc_vmsg(MANDOCERR_LINESCOPE, m->parse, n->line,
573 		    n->pos, "%s breaks %s", man_macronames[tok],
574 		    man_macronames[n->tok]);
575 
576 		man_node_delete(m, n);
577 		m->flags &= ~MAN_BLINE;
578 	}
579 
580 	/*
581 	 * Save the fact that we're in the next-line for a block.  In
582 	 * this way, embedded roff instructions can "remember" state
583 	 * when they exit.
584 	 */
585 
586 	if (MAN_BLINE & m->flags)
587 		m->flags |= MAN_BPLINE;
588 
589 	/* Call to handler... */
590 
591 	assert(man_macros[tok].fp);
592 	if ( ! (*man_macros[tok].fp)(m, tok, ln, ppos, &offs, buf))
593 		goto err;
594 
595 	/*
596 	 * We weren't in a block-line scope when entering the
597 	 * above-parsed macro, so return.
598 	 */
599 
600 	if ( ! (MAN_BPLINE & m->flags)) {
601 		m->flags &= ~MAN_ILINE;
602 		return(1);
603 	}
604 	m->flags &= ~MAN_BPLINE;
605 
606 	/*
607 	 * If we're in a block scope, then allow this macro to slip by
608 	 * without closing scope around it.
609 	 */
610 
611 	if (MAN_ILINE & m->flags) {
612 		m->flags &= ~MAN_ILINE;
613 		return(1);
614 	}
615 
616 	/*
617 	 * If we've opened a new next-line element scope, then return
618 	 * now, as the next line will close out the block scope.
619 	 */
620 
621 	if (MAN_ELINE & m->flags)
622 		return(1);
623 
624 	/* Close out the block scope opened in the prior line.  */
625 
626 	assert(MAN_BLINE & m->flags);
627 	m->flags &= ~MAN_BLINE;
628 
629 	if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX))
630 		return(0);
631 	return(man_body_alloc(m, ln, ppos, m->last->tok));
632 
633 err:	/* Error out. */
634 
635 	m->flags |= MAN_HALT;
636 	return(0);
637 }
638 
639 /*
640  * Unlink a node from its context.  If "m" is provided, the last parse
641  * point will also be adjusted accordingly.
642  */
643 static void
644 man_node_unlink(struct man *m, struct man_node *n)
645 {
646 
647 	/* Adjust siblings. */
648 
649 	if (n->prev)
650 		n->prev->next = n->next;
651 	if (n->next)
652 		n->next->prev = n->prev;
653 
654 	/* Adjust parent. */
655 
656 	if (n->parent) {
657 		n->parent->nchild--;
658 		if (n->parent->child == n)
659 			n->parent->child = n->prev ? n->prev : n->next;
660 	}
661 
662 	/* Adjust parse point, if applicable. */
663 
664 	if (m && m->last == n) {
665 		/*XXX: this can occur when bailing from validation. */
666 		/*assert(NULL == n->next);*/
667 		if (n->prev) {
668 			m->last = n->prev;
669 			m->next = MAN_NEXT_SIBLING;
670 		} else {
671 			m->last = n->parent;
672 			m->next = MAN_NEXT_CHILD;
673 		}
674 	}
675 
676 	if (m && m->first == n)
677 		m->first = NULL;
678 }
679 
680 const struct mparse *
681 man_mparse(const struct man *m)
682 {
683 
684 	assert(m && m->parse);
685 	return(m->parse);
686 }
687