xref: /openbsd-src/usr.bin/mandoc/man.c (revision 91f110e064cd7c194e59e019b83bb7496c1c84d4)
1 /*	$Id: man.c,v 1.77 2014/03/23 20:57:23 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include <sys/types.h>
20 
21 #include <assert.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27 
28 #include "man.h"
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "libman.h"
32 #include "libmandoc.h"
33 
34 const	char *const __man_macronames[MAN_MAX] = {
35 	"br",		"TH",		"SH",		"SS",
36 	"TP", 		"LP",		"PP",		"P",
37 	"IP",		"HP",		"SM",		"SB",
38 	"BI",		"IB",		"BR",		"RB",
39 	"R",		"B",		"I",		"IR",
40 	"RI",		"na",		"sp",		"nf",
41 	"fi",		"RE",		"RS",		"DT",
42 	"UC",		"PD",		"AT",		"in",
43 	"ft",		"OP",		"EX",		"EE",
44 	"UR",		"UE"
45 	};
46 
47 const	char * const *man_macronames = __man_macronames;
48 
49 static	struct man_node	*man_node_alloc(struct man *, int, int,
50 				enum man_type, enum mant);
51 static	int		 man_node_append(struct man *,
52 				struct man_node *);
53 static	void		 man_node_free(struct man_node *);
54 static	void		 man_node_unlink(struct man *,
55 				struct man_node *);
56 static	int		 man_ptext(struct man *, int, char *, int);
57 static	int		 man_pmacro(struct man *, int, char *, int);
58 static	void		 man_free1(struct man *);
59 static	void		 man_alloc1(struct man *);
60 static	int		 man_descope(struct man *, int, int);
61 
62 
63 const struct man_node *
64 man_node(const struct man *man)
65 {
66 
67 	assert( ! (MAN_HALT & man->flags));
68 	return(man->first);
69 }
70 
71 
72 const struct man_meta *
73 man_meta(const struct man *man)
74 {
75 
76 	assert( ! (MAN_HALT & man->flags));
77 	return(&man->meta);
78 }
79 
80 
81 void
82 man_reset(struct man *man)
83 {
84 
85 	man_free1(man);
86 	man_alloc1(man);
87 }
88 
89 
90 void
91 man_free(struct man *man)
92 {
93 
94 	man_free1(man);
95 	free(man);
96 }
97 
98 
99 struct man *
100 man_alloc(struct roff *roff, struct mparse *parse, int quick)
101 {
102 	struct man	*p;
103 
104 	p = mandoc_calloc(1, sizeof(struct man));
105 
106 	man_hash_init();
107 	p->parse = parse;
108 	p->quick = quick;
109 	p->roff = roff;
110 
111 	man_alloc1(p);
112 	return(p);
113 }
114 
115 
116 int
117 man_endparse(struct man *man)
118 {
119 
120 	assert( ! (MAN_HALT & man->flags));
121 	if (man_macroend(man))
122 		return(1);
123 	man->flags |= MAN_HALT;
124 	return(0);
125 }
126 
127 
128 int
129 man_parseln(struct man *man, int ln, char *buf, int offs)
130 {
131 
132 	man->flags |= MAN_NEWLINE;
133 
134 	assert( ! (MAN_HALT & man->flags));
135 
136 	return (roff_getcontrol(man->roff, buf, &offs) ?
137 			man_pmacro(man, ln, buf, offs) :
138 			man_ptext(man, ln, buf, offs));
139 }
140 
141 
142 static void
143 man_free1(struct man *man)
144 {
145 
146 	if (man->first)
147 		man_node_delete(man, man->first);
148 	if (man->meta.title)
149 		free(man->meta.title);
150 	if (man->meta.source)
151 		free(man->meta.source);
152 	if (man->meta.date)
153 		free(man->meta.date);
154 	if (man->meta.vol)
155 		free(man->meta.vol);
156 	if (man->meta.msec)
157 		free(man->meta.msec);
158 }
159 
160 
161 static void
162 man_alloc1(struct man *man)
163 {
164 
165 	memset(&man->meta, 0, sizeof(struct man_meta));
166 	man->flags = 0;
167 	man->last = mandoc_calloc(1, sizeof(struct man_node));
168 	man->first = man->last;
169 	man->last->type = MAN_ROOT;
170 	man->last->tok = MAN_MAX;
171 	man->next = MAN_NEXT_CHILD;
172 }
173 
174 
175 static int
176 man_node_append(struct man *man, struct man_node *p)
177 {
178 
179 	assert(man->last);
180 	assert(man->first);
181 	assert(MAN_ROOT != p->type);
182 
183 	switch (man->next) {
184 	case (MAN_NEXT_SIBLING):
185 		man->last->next = p;
186 		p->prev = man->last;
187 		p->parent = man->last->parent;
188 		break;
189 	case (MAN_NEXT_CHILD):
190 		man->last->child = p;
191 		p->parent = man->last;
192 		break;
193 	default:
194 		abort();
195 		/* NOTREACHED */
196 	}
197 
198 	assert(p->parent);
199 	p->parent->nchild++;
200 
201 	if ( ! man_valid_pre(man, p))
202 		return(0);
203 
204 	switch (p->type) {
205 	case (MAN_HEAD):
206 		assert(MAN_BLOCK == p->parent->type);
207 		p->parent->head = p;
208 		break;
209 	case (MAN_TAIL):
210 		assert(MAN_BLOCK == p->parent->type);
211 		p->parent->tail = p;
212 		break;
213 	case (MAN_BODY):
214 		assert(MAN_BLOCK == p->parent->type);
215 		p->parent->body = p;
216 		break;
217 	default:
218 		break;
219 	}
220 
221 	man->last = p;
222 
223 	switch (p->type) {
224 	case (MAN_TBL):
225 		/* FALLTHROUGH */
226 	case (MAN_TEXT):
227 		if ( ! man_valid_post(man))
228 			return(0);
229 		break;
230 	default:
231 		break;
232 	}
233 
234 	return(1);
235 }
236 
237 
238 static struct man_node *
239 man_node_alloc(struct man *man, int line, int pos,
240 		enum man_type type, enum mant tok)
241 {
242 	struct man_node *p;
243 
244 	p = mandoc_calloc(1, sizeof(struct man_node));
245 	p->line = line;
246 	p->pos = pos;
247 	p->type = type;
248 	p->tok = tok;
249 
250 	if (MAN_NEWLINE & man->flags)
251 		p->flags |= MAN_LINE;
252 	man->flags &= ~MAN_NEWLINE;
253 	return(p);
254 }
255 
256 
257 int
258 man_elem_alloc(struct man *man, int line, int pos, enum mant tok)
259 {
260 	struct man_node *p;
261 
262 	p = man_node_alloc(man, line, pos, MAN_ELEM, tok);
263 	if ( ! man_node_append(man, p))
264 		return(0);
265 	man->next = MAN_NEXT_CHILD;
266 	return(1);
267 }
268 
269 
270 int
271 man_tail_alloc(struct man *man, int line, int pos, enum mant tok)
272 {
273 	struct man_node *p;
274 
275 	p = man_node_alloc(man, line, pos, MAN_TAIL, tok);
276 	if ( ! man_node_append(man, p))
277 		return(0);
278 	man->next = MAN_NEXT_CHILD;
279 	return(1);
280 }
281 
282 
283 int
284 man_head_alloc(struct man *man, int line, int pos, enum mant tok)
285 {
286 	struct man_node *p;
287 
288 	p = man_node_alloc(man, line, pos, MAN_HEAD, tok);
289 	if ( ! man_node_append(man, p))
290 		return(0);
291 	man->next = MAN_NEXT_CHILD;
292 	return(1);
293 }
294 
295 
296 int
297 man_body_alloc(struct man *man, int line, int pos, enum mant tok)
298 {
299 	struct man_node *p;
300 
301 	p = man_node_alloc(man, line, pos, MAN_BODY, tok);
302 	if ( ! man_node_append(man, p))
303 		return(0);
304 	man->next = MAN_NEXT_CHILD;
305 	return(1);
306 }
307 
308 
309 int
310 man_block_alloc(struct man *man, int line, int pos, enum mant tok)
311 {
312 	struct man_node *p;
313 
314 	p = man_node_alloc(man, line, pos, MAN_BLOCK, tok);
315 	if ( ! man_node_append(man, p))
316 		return(0);
317 	man->next = MAN_NEXT_CHILD;
318 	return(1);
319 }
320 
321 int
322 man_word_alloc(struct man *man, int line, int pos, const char *word)
323 {
324 	struct man_node	*n;
325 
326 	n = man_node_alloc(man, line, pos, MAN_TEXT, MAN_MAX);
327 	n->string = roff_strdup(man->roff, word);
328 
329 	if ( ! man_node_append(man, n))
330 		return(0);
331 
332 	man->next = MAN_NEXT_SIBLING;
333 	return(1);
334 }
335 
336 
337 /*
338  * Free all of the resources held by a node.  This does NOT unlink a
339  * node from its context; for that, see man_node_unlink().
340  */
341 static void
342 man_node_free(struct man_node *p)
343 {
344 
345 	if (p->string)
346 		free(p->string);
347 	free(p);
348 }
349 
350 
351 void
352 man_node_delete(struct man *man, struct man_node *p)
353 {
354 
355 	while (p->child)
356 		man_node_delete(man, p->child);
357 
358 	man_node_unlink(man, p);
359 	man_node_free(p);
360 }
361 
362 int
363 man_addeqn(struct man *man, const struct eqn *ep)
364 {
365 	struct man_node	*n;
366 
367 	assert( ! (MAN_HALT & man->flags));
368 
369 	n = man_node_alloc(man, ep->ln, ep->pos, MAN_EQN, MAN_MAX);
370 	n->eqn = ep;
371 
372 	if ( ! man_node_append(man, n))
373 		return(0);
374 
375 	man->next = MAN_NEXT_SIBLING;
376 	return(man_descope(man, ep->ln, ep->pos));
377 }
378 
379 int
380 man_addspan(struct man *man, const struct tbl_span *sp)
381 {
382 	struct man_node	*n;
383 
384 	assert( ! (MAN_HALT & man->flags));
385 
386 	n = man_node_alloc(man, sp->line, 0, MAN_TBL, MAN_MAX);
387 	n->span = sp;
388 
389 	if ( ! man_node_append(man, n))
390 		return(0);
391 
392 	man->next = MAN_NEXT_SIBLING;
393 	return(man_descope(man, sp->line, 0));
394 }
395 
396 static int
397 man_descope(struct man *man, int line, int offs)
398 {
399 	/*
400 	 * Co-ordinate what happens with having a next-line scope open:
401 	 * first close out the element scope (if applicable), then close
402 	 * out the block scope (also if applicable).
403 	 */
404 
405 	if (MAN_ELINE & man->flags) {
406 		man->flags &= ~MAN_ELINE;
407 		if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
408 			return(0);
409 	}
410 
411 	if ( ! (MAN_BLINE & man->flags))
412 		return(1);
413 	man->flags &= ~MAN_BLINE;
414 
415 	if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
416 		return(0);
417 	return(man_body_alloc(man, line, offs, man->last->tok));
418 }
419 
420 static int
421 man_ptext(struct man *man, int line, char *buf, int offs)
422 {
423 	int		 i;
424 
425 	/* Literal free-form text whitespace is preserved. */
426 
427 	if (MAN_LITERAL & man->flags) {
428 		if ( ! man_word_alloc(man, line, offs, buf + offs))
429 			return(0);
430 		return(man_descope(man, line, offs));
431 	}
432 
433 	for (i = offs; ' ' == buf[i]; i++)
434 		/* Skip leading whitespace. */ ;
435 
436 	/*
437 	 * Blank lines are ignored right after headings
438 	 * but add a single vertical space elsewhere.
439 	 */
440 
441 	if ('\0' == buf[i]) {
442 		/* Allocate a blank entry. */
443 		if (MAN_SH != man->last->tok &&
444 		    MAN_SS != man->last->tok) {
445 			if ( ! man_elem_alloc(man, line, offs, MAN_sp))
446 				return(0);
447 			man->next = MAN_NEXT_SIBLING;
448 		}
449 		return(1);
450 	}
451 
452 	/*
453 	 * Warn if the last un-escaped character is whitespace. Then
454 	 * strip away the remaining spaces (tabs stay!).
455 	 */
456 
457 	i = (int)strlen(buf);
458 	assert(i);
459 
460 	if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
461 		if (i > 1 && '\\' != buf[i - 2])
462 			man_pmsg(man, line, i - 1, MANDOCERR_EOLNSPACE);
463 
464 		for (--i; i && ' ' == buf[i]; i--)
465 			/* Spin back to non-space. */ ;
466 
467 		/* Jump ahead of escaped whitespace. */
468 		i += '\\' == buf[i] ? 2 : 1;
469 
470 		buf[i] = '\0';
471 	}
472 
473 	if ( ! man_word_alloc(man, line, offs, buf + offs))
474 		return(0);
475 
476 	/*
477 	 * End-of-sentence check.  If the last character is an unescaped
478 	 * EOS character, then flag the node as being the end of a
479 	 * sentence.  The front-end will know how to interpret this.
480 	 */
481 
482 	assert(i);
483 	if (mandoc_eos(buf, (size_t)i))
484 		man->last->flags |= MAN_EOS;
485 
486 	return(man_descope(man, line, offs));
487 }
488 
489 static int
490 man_pmacro(struct man *man, int ln, char *buf, int offs)
491 {
492 	int		 i, ppos;
493 	enum mant	 tok;
494 	char		 mac[5];
495 	struct man_node	*n;
496 
497 	if ('"' == buf[offs]) {
498 		man_pmsg(man, ln, offs, MANDOCERR_BADCOMMENT);
499 		return(1);
500 	} else if ('\0' == buf[offs])
501 		return(1);
502 
503 	ppos = offs;
504 
505 	/*
506 	 * Copy the first word into a nil-terminated buffer.
507 	 * Stop copying when a tab, space, or eoln is encountered.
508 	 */
509 
510 	i = 0;
511 	while (i < 4 && '\0' != buf[offs] &&
512 			' ' != buf[offs] && '\t' != buf[offs])
513 		mac[i++] = buf[offs++];
514 
515 	mac[i] = '\0';
516 
517 	tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX;
518 
519 	if (MAN_MAX == tok) {
520 		mandoc_vmsg(MANDOCERR_MACRO, man->parse, ln,
521 				ppos, "%s", buf + ppos - 1);
522 		return(1);
523 	}
524 
525 	/* The macro is sane.  Jump to the next word. */
526 
527 	while (buf[offs] && ' ' == buf[offs])
528 		offs++;
529 
530 	/*
531 	 * Trailing whitespace.  Note that tabs are allowed to be passed
532 	 * into the parser as "text", so we only warn about spaces here.
533 	 */
534 
535 	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
536 		man_pmsg(man, ln, offs - 1, MANDOCERR_EOLNSPACE);
537 
538 	/*
539 	 * Remove prior ELINE macro, as it's being clobbered by a new
540 	 * macro.  Note that NSCOPED macros do not close out ELINE
541 	 * macros---they don't print text---so we let those slip by.
542 	 */
543 
544 	if ( ! (MAN_NSCOPED & man_macros[tok].flags) &&
545 			man->flags & MAN_ELINE) {
546 		n = man->last;
547 		assert(MAN_TEXT != n->type);
548 
549 		/* Remove repeated NSCOPED macros causing ELINE. */
550 
551 		if (MAN_NSCOPED & man_macros[n->tok].flags)
552 			n = n->parent;
553 
554 		mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line,
555 		    n->pos, "%s breaks %s", man_macronames[tok],
556 		    man_macronames[n->tok]);
557 
558 		man_node_delete(man, n);
559 		man->flags &= ~MAN_ELINE;
560 	}
561 
562 	/*
563 	 * Remove prior BLINE macro that is being clobbered.
564 	 */
565 	if ((man->flags & MAN_BLINE) &&
566 	    (MAN_BSCOPE & man_macros[tok].flags)) {
567 		n = man->last;
568 
569 		/* Might be a text node like 8 in
570 		 * .TP 8
571 		 * .SH foo
572 		 */
573 		if (MAN_TEXT == n->type)
574 			n = n->parent;
575 
576 		/* Remove element that didn't end BLINE, if any. */
577 		if ( ! (MAN_BSCOPE & man_macros[n->tok].flags))
578 			n = n->parent;
579 
580 		assert(MAN_HEAD == n->type);
581 		n = n->parent;
582 		assert(MAN_BLOCK == n->type);
583 		assert(MAN_SCOPED & man_macros[n->tok].flags);
584 
585 		mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line,
586 		    n->pos, "%s breaks %s", man_macronames[tok],
587 		    man_macronames[n->tok]);
588 
589 		man_node_delete(man, n);
590 		man->flags &= ~MAN_BLINE;
591 	}
592 
593 	/*
594 	 * Save the fact that we're in the next-line for a block.  In
595 	 * this way, embedded roff instructions can "remember" state
596 	 * when they exit.
597 	 */
598 
599 	if (MAN_BLINE & man->flags)
600 		man->flags |= MAN_BPLINE;
601 
602 	/* Call to handler... */
603 
604 	assert(man_macros[tok].fp);
605 	if ( ! (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf))
606 		goto err;
607 
608 	/* In quick mode (for mandocdb), abort after the NAME section. */
609 
610 	if (man->quick && MAN_SH == tok &&
611 	    strcmp(man->last->prev->child->string, "NAME"))
612 		return(2);
613 
614 	/*
615 	 * We weren't in a block-line scope when entering the
616 	 * above-parsed macro, so return.
617 	 */
618 
619 	if ( ! (MAN_BPLINE & man->flags)) {
620 		man->flags &= ~MAN_ILINE;
621 		return(1);
622 	}
623 	man->flags &= ~MAN_BPLINE;
624 
625 	/*
626 	 * If we're in a block scope, then allow this macro to slip by
627 	 * without closing scope around it.
628 	 */
629 
630 	if (MAN_ILINE & man->flags) {
631 		man->flags &= ~MAN_ILINE;
632 		return(1);
633 	}
634 
635 	/*
636 	 * If we've opened a new next-line element scope, then return
637 	 * now, as the next line will close out the block scope.
638 	 */
639 
640 	if (MAN_ELINE & man->flags)
641 		return(1);
642 
643 	/* Close out the block scope opened in the prior line.  */
644 
645 	assert(MAN_BLINE & man->flags);
646 	man->flags &= ~MAN_BLINE;
647 
648 	if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
649 		return(0);
650 	return(man_body_alloc(man, ln, ppos, man->last->tok));
651 
652 err:	/* Error out. */
653 
654 	man->flags |= MAN_HALT;
655 	return(0);
656 }
657 
658 /*
659  * Unlink a node from its context.  If "man" is provided, the last parse
660  * point will also be adjusted accordingly.
661  */
662 static void
663 man_node_unlink(struct man *man, struct man_node *n)
664 {
665 
666 	/* Adjust siblings. */
667 
668 	if (n->prev)
669 		n->prev->next = n->next;
670 	if (n->next)
671 		n->next->prev = n->prev;
672 
673 	/* Adjust parent. */
674 
675 	if (n->parent) {
676 		n->parent->nchild--;
677 		if (n->parent->child == n)
678 			n->parent->child = n->prev ? n->prev : n->next;
679 	}
680 
681 	/* Adjust parse point, if applicable. */
682 
683 	if (man && man->last == n) {
684 		/*XXX: this can occur when bailing from validation. */
685 		/*assert(NULL == n->next);*/
686 		if (n->prev) {
687 			man->last = n->prev;
688 			man->next = MAN_NEXT_SIBLING;
689 		} else {
690 			man->last = n->parent;
691 			man->next = MAN_NEXT_CHILD;
692 		}
693 	}
694 
695 	if (man && man->first == n)
696 		man->first = NULL;
697 }
698 
699 const struct mparse *
700 man_mparse(const struct man *man)
701 {
702 
703 	assert(man && man->parse);
704 	return(man->parse);
705 }
706 
707 void
708 man_deroff(char **dest, const struct man_node *n)
709 {
710 	char	*cp;
711 	size_t	 sz;
712 
713 	if (MAN_TEXT != n->type) {
714 		for (n = n->child; n; n = n->next)
715 			man_deroff(dest, n);
716 		return;
717 	}
718 
719 	/* Skip leading whitespace and escape sequences. */
720 
721 	cp = n->string;
722 	while ('\0' != *cp) {
723 		if ('\\' == *cp) {
724 			cp++;
725 			mandoc_escape((const char **)&cp, NULL, NULL);
726 		} else if (isspace((unsigned char)*cp))
727 			cp++;
728 		else
729 			break;
730 	}
731 
732 	/* Skip trailing whitespace. */
733 
734 	for (sz = strlen(cp); sz; sz--)
735 		if (0 == isspace((unsigned char)cp[sz-1]))
736 			break;
737 
738 	/* Skip empty strings. */
739 
740 	if (0 == sz)
741 		return;
742 
743 	if (NULL == *dest) {
744 		*dest = mandoc_strndup(cp, sz);
745 		return;
746 	}
747 
748 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
749 	free(*dest);
750 	*dest = cp;
751 }
752