xref: /openbsd-src/usr.bin/mandoc/man.c (revision 5ad04d351680822078003e2b066cfc9680d6157d)
1 /*	$Id: man.c,v 1.80 2014/05/07 14:11:24 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include <sys/types.h>
20 
21 #include <assert.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27 
28 #include "man.h"
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "libman.h"
32 #include "libmandoc.h"
33 
34 const	char *const __man_macronames[MAN_MAX] = {
35 	"br",		"TH",		"SH",		"SS",
36 	"TP",		"LP",		"PP",		"P",
37 	"IP",		"HP",		"SM",		"SB",
38 	"BI",		"IB",		"BR",		"RB",
39 	"R",		"B",		"I",		"IR",
40 	"RI",		"na",		"sp",		"nf",
41 	"fi",		"RE",		"RS",		"DT",
42 	"UC",		"PD",		"AT",		"in",
43 	"ft",		"OP",		"EX",		"EE",
44 	"UR",		"UE",		"ll"
45 	};
46 
47 const	char * const *man_macronames = __man_macronames;
48 
49 static	struct man_node	*man_node_alloc(struct man *, int, int,
50 				enum man_type, enum mant);
51 static	int		 man_node_append(struct man *,
52 				struct man_node *);
53 static	void		 man_node_free(struct man_node *);
54 static	void		 man_node_unlink(struct man *,
55 				struct man_node *);
56 static	int		 man_ptext(struct man *, int, char *, int);
57 static	int		 man_pmacro(struct man *, int, char *, int);
58 static	void		 man_free1(struct man *);
59 static	void		 man_alloc1(struct man *);
60 static	int		 man_descope(struct man *, int, int);
61 
62 
63 const struct man_node *
64 man_node(const struct man *man)
65 {
66 
67 	assert( ! (MAN_HALT & man->flags));
68 	return(man->first);
69 }
70 
71 const struct man_meta *
72 man_meta(const struct man *man)
73 {
74 
75 	assert( ! (MAN_HALT & man->flags));
76 	return(&man->meta);
77 }
78 
79 void
80 man_reset(struct man *man)
81 {
82 
83 	man_free1(man);
84 	man_alloc1(man);
85 }
86 
87 void
88 man_free(struct man *man)
89 {
90 
91 	man_free1(man);
92 	free(man);
93 }
94 
95 struct man *
96 man_alloc(struct roff *roff, struct mparse *parse, int quick)
97 {
98 	struct man	*p;
99 
100 	p = mandoc_calloc(1, sizeof(struct man));
101 
102 	man_hash_init();
103 	p->parse = parse;
104 	p->quick = quick;
105 	p->roff = roff;
106 
107 	man_alloc1(p);
108 	return(p);
109 }
110 
111 int
112 man_endparse(struct man *man)
113 {
114 
115 	assert( ! (MAN_HALT & man->flags));
116 	if (man_macroend(man))
117 		return(1);
118 	man->flags |= MAN_HALT;
119 	return(0);
120 }
121 
122 int
123 man_parseln(struct man *man, int ln, char *buf, int offs)
124 {
125 
126 	man->flags |= MAN_NEWLINE;
127 
128 	assert( ! (MAN_HALT & man->flags));
129 
130 	return (roff_getcontrol(man->roff, buf, &offs) ?
131 	    man_pmacro(man, ln, buf, offs) :
132 	    man_ptext(man, ln, buf, offs));
133 }
134 
135 static void
136 man_free1(struct man *man)
137 {
138 
139 	if (man->first)
140 		man_node_delete(man, man->first);
141 	if (man->meta.title)
142 		free(man->meta.title);
143 	if (man->meta.source)
144 		free(man->meta.source);
145 	if (man->meta.date)
146 		free(man->meta.date);
147 	if (man->meta.vol)
148 		free(man->meta.vol);
149 	if (man->meta.msec)
150 		free(man->meta.msec);
151 }
152 
153 static void
154 man_alloc1(struct man *man)
155 {
156 
157 	memset(&man->meta, 0, sizeof(struct man_meta));
158 	man->flags = 0;
159 	man->last = mandoc_calloc(1, sizeof(struct man_node));
160 	man->first = man->last;
161 	man->last->type = MAN_ROOT;
162 	man->last->tok = MAN_MAX;
163 	man->next = MAN_NEXT_CHILD;
164 }
165 
166 
167 static int
168 man_node_append(struct man *man, struct man_node *p)
169 {
170 
171 	assert(man->last);
172 	assert(man->first);
173 	assert(MAN_ROOT != p->type);
174 
175 	switch (man->next) {
176 	case MAN_NEXT_SIBLING:
177 		man->last->next = p;
178 		p->prev = man->last;
179 		p->parent = man->last->parent;
180 		break;
181 	case MAN_NEXT_CHILD:
182 		man->last->child = p;
183 		p->parent = man->last;
184 		break;
185 	default:
186 		abort();
187 		/* NOTREACHED */
188 	}
189 
190 	assert(p->parent);
191 	p->parent->nchild++;
192 
193 	if ( ! man_valid_pre(man, p))
194 		return(0);
195 
196 	switch (p->type) {
197 	case MAN_HEAD:
198 		assert(MAN_BLOCK == p->parent->type);
199 		p->parent->head = p;
200 		break;
201 	case MAN_TAIL:
202 		assert(MAN_BLOCK == p->parent->type);
203 		p->parent->tail = p;
204 		break;
205 	case MAN_BODY:
206 		assert(MAN_BLOCK == p->parent->type);
207 		p->parent->body = p;
208 		break;
209 	default:
210 		break;
211 	}
212 
213 	man->last = p;
214 
215 	switch (p->type) {
216 	case MAN_TBL:
217 		/* FALLTHROUGH */
218 	case MAN_TEXT:
219 		if ( ! man_valid_post(man))
220 			return(0);
221 		break;
222 	default:
223 		break;
224 	}
225 
226 	return(1);
227 }
228 
229 static struct man_node *
230 man_node_alloc(struct man *man, int line, int pos,
231 		enum man_type type, enum mant tok)
232 {
233 	struct man_node *p;
234 
235 	p = mandoc_calloc(1, sizeof(struct man_node));
236 	p->line = line;
237 	p->pos = pos;
238 	p->type = type;
239 	p->tok = tok;
240 
241 	if (MAN_NEWLINE & man->flags)
242 		p->flags |= MAN_LINE;
243 	man->flags &= ~MAN_NEWLINE;
244 	return(p);
245 }
246 
247 int
248 man_elem_alloc(struct man *man, int line, int pos, enum mant tok)
249 {
250 	struct man_node *p;
251 
252 	p = man_node_alloc(man, line, pos, MAN_ELEM, tok);
253 	if ( ! man_node_append(man, p))
254 		return(0);
255 	man->next = MAN_NEXT_CHILD;
256 	return(1);
257 }
258 
259 int
260 man_tail_alloc(struct man *man, int line, int pos, enum mant tok)
261 {
262 	struct man_node *p;
263 
264 	p = man_node_alloc(man, line, pos, MAN_TAIL, tok);
265 	if ( ! man_node_append(man, p))
266 		return(0);
267 	man->next = MAN_NEXT_CHILD;
268 	return(1);
269 }
270 
271 int
272 man_head_alloc(struct man *man, int line, int pos, enum mant tok)
273 {
274 	struct man_node *p;
275 
276 	p = man_node_alloc(man, line, pos, MAN_HEAD, tok);
277 	if ( ! man_node_append(man, p))
278 		return(0);
279 	man->next = MAN_NEXT_CHILD;
280 	return(1);
281 }
282 
283 int
284 man_body_alloc(struct man *man, int line, int pos, enum mant tok)
285 {
286 	struct man_node *p;
287 
288 	p = man_node_alloc(man, line, pos, MAN_BODY, tok);
289 	if ( ! man_node_append(man, p))
290 		return(0);
291 	man->next = MAN_NEXT_CHILD;
292 	return(1);
293 }
294 
295 int
296 man_block_alloc(struct man *man, int line, int pos, enum mant tok)
297 {
298 	struct man_node *p;
299 
300 	p = man_node_alloc(man, line, pos, MAN_BLOCK, tok);
301 	if ( ! man_node_append(man, p))
302 		return(0);
303 	man->next = MAN_NEXT_CHILD;
304 	return(1);
305 }
306 
307 int
308 man_word_alloc(struct man *man, int line, int pos, const char *word)
309 {
310 	struct man_node	*n;
311 
312 	n = man_node_alloc(man, line, pos, MAN_TEXT, MAN_MAX);
313 	n->string = roff_strdup(man->roff, word);
314 
315 	if ( ! man_node_append(man, n))
316 		return(0);
317 
318 	man->next = MAN_NEXT_SIBLING;
319 	return(1);
320 }
321 
322 /*
323  * Free all of the resources held by a node.  This does NOT unlink a
324  * node from its context; for that, see man_node_unlink().
325  */
326 static void
327 man_node_free(struct man_node *p)
328 {
329 
330 	if (p->string)
331 		free(p->string);
332 	free(p);
333 }
334 
335 void
336 man_node_delete(struct man *man, struct man_node *p)
337 {
338 
339 	while (p->child)
340 		man_node_delete(man, p->child);
341 
342 	man_node_unlink(man, p);
343 	man_node_free(p);
344 }
345 
346 int
347 man_addeqn(struct man *man, const struct eqn *ep)
348 {
349 	struct man_node	*n;
350 
351 	assert( ! (MAN_HALT & man->flags));
352 
353 	n = man_node_alloc(man, ep->ln, ep->pos, MAN_EQN, MAN_MAX);
354 	n->eqn = ep;
355 
356 	if ( ! man_node_append(man, n))
357 		return(0);
358 
359 	man->next = MAN_NEXT_SIBLING;
360 	return(man_descope(man, ep->ln, ep->pos));
361 }
362 
363 int
364 man_addspan(struct man *man, const struct tbl_span *sp)
365 {
366 	struct man_node	*n;
367 
368 	assert( ! (MAN_HALT & man->flags));
369 
370 	n = man_node_alloc(man, sp->line, 0, MAN_TBL, MAN_MAX);
371 	n->span = sp;
372 
373 	if ( ! man_node_append(man, n))
374 		return(0);
375 
376 	man->next = MAN_NEXT_SIBLING;
377 	return(man_descope(man, sp->line, 0));
378 }
379 
380 static int
381 man_descope(struct man *man, int line, int offs)
382 {
383 	/*
384 	 * Co-ordinate what happens with having a next-line scope open:
385 	 * first close out the element scope (if applicable), then close
386 	 * out the block scope (also if applicable).
387 	 */
388 
389 	if (MAN_ELINE & man->flags) {
390 		man->flags &= ~MAN_ELINE;
391 		if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
392 			return(0);
393 	}
394 
395 	if ( ! (MAN_BLINE & man->flags))
396 		return(1);
397 	man->flags &= ~MAN_BLINE;
398 
399 	if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
400 		return(0);
401 	return(man_body_alloc(man, line, offs, man->last->tok));
402 }
403 
404 static int
405 man_ptext(struct man *man, int line, char *buf, int offs)
406 {
407 	int		 i;
408 
409 	/* Literal free-form text whitespace is preserved. */
410 
411 	if (MAN_LITERAL & man->flags) {
412 		if ( ! man_word_alloc(man, line, offs, buf + offs))
413 			return(0);
414 		return(man_descope(man, line, offs));
415 	}
416 
417 	for (i = offs; ' ' == buf[i]; i++)
418 		/* Skip leading whitespace. */ ;
419 
420 	/*
421 	 * Blank lines are ignored right after headings
422 	 * but add a single vertical space elsewhere.
423 	 */
424 
425 	if ('\0' == buf[i]) {
426 		/* Allocate a blank entry. */
427 		if (MAN_SH != man->last->tok &&
428 		    MAN_SS != man->last->tok) {
429 			if ( ! man_elem_alloc(man, line, offs, MAN_sp))
430 				return(0);
431 			man->next = MAN_NEXT_SIBLING;
432 		}
433 		return(1);
434 	}
435 
436 	/*
437 	 * Warn if the last un-escaped character is whitespace. Then
438 	 * strip away the remaining spaces (tabs stay!).
439 	 */
440 
441 	i = (int)strlen(buf);
442 	assert(i);
443 
444 	if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
445 		if (i > 1 && '\\' != buf[i - 2])
446 			man_pmsg(man, line, i - 1, MANDOCERR_EOLNSPACE);
447 
448 		for (--i; i && ' ' == buf[i]; i--)
449 			/* Spin back to non-space. */ ;
450 
451 		/* Jump ahead of escaped whitespace. */
452 		i += '\\' == buf[i] ? 2 : 1;
453 
454 		buf[i] = '\0';
455 	}
456 
457 	if ( ! man_word_alloc(man, line, offs, buf + offs))
458 		return(0);
459 
460 	/*
461 	 * End-of-sentence check.  If the last character is an unescaped
462 	 * EOS character, then flag the node as being the end of a
463 	 * sentence.  The front-end will know how to interpret this.
464 	 */
465 
466 	assert(i);
467 	if (mandoc_eos(buf, (size_t)i))
468 		man->last->flags |= MAN_EOS;
469 
470 	return(man_descope(man, line, offs));
471 }
472 
473 static int
474 man_pmacro(struct man *man, int ln, char *buf, int offs)
475 {
476 	int		 i, ppos;
477 	enum mant	 tok;
478 	char		 mac[5];
479 	struct man_node	*n;
480 
481 	if ('"' == buf[offs]) {
482 		man_pmsg(man, ln, offs, MANDOCERR_BADCOMMENT);
483 		return(1);
484 	} else if ('\0' == buf[offs])
485 		return(1);
486 
487 	ppos = offs;
488 
489 	/*
490 	 * Copy the first word into a nil-terminated buffer.
491 	 * Stop copying when a tab, space, or eoln is encountered.
492 	 */
493 
494 	i = 0;
495 	while (i < 4 && '\0' != buf[offs] && ' ' != buf[offs] &&
496 	    '\t' != buf[offs])
497 		mac[i++] = buf[offs++];
498 
499 	mac[i] = '\0';
500 
501 	tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX;
502 
503 	if (MAN_MAX == tok) {
504 		mandoc_vmsg(MANDOCERR_MACRO, man->parse, ln, ppos,
505 		    "%s", buf + ppos - 1);
506 		return(1);
507 	}
508 
509 	/* The macro is sane.  Jump to the next word. */
510 
511 	while (buf[offs] && ' ' == buf[offs])
512 		offs++;
513 
514 	/*
515 	 * Trailing whitespace.  Note that tabs are allowed to be passed
516 	 * into the parser as "text", so we only warn about spaces here.
517 	 */
518 
519 	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
520 		man_pmsg(man, ln, offs - 1, MANDOCERR_EOLNSPACE);
521 
522 	/*
523 	 * Remove prior ELINE macro, as it's being clobbered by a new
524 	 * macro.  Note that NSCOPED macros do not close out ELINE
525 	 * macros---they don't print text---so we let those slip by.
526 	 */
527 
528 	if ( ! (MAN_NSCOPED & man_macros[tok].flags) &&
529 			man->flags & MAN_ELINE) {
530 		n = man->last;
531 		assert(MAN_TEXT != n->type);
532 
533 		/* Remove repeated NSCOPED macros causing ELINE. */
534 
535 		if (MAN_NSCOPED & man_macros[n->tok].flags)
536 			n = n->parent;
537 
538 		mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line,
539 		    n->pos, "%s breaks %s", man_macronames[tok],
540 		    man_macronames[n->tok]);
541 
542 		man_node_delete(man, n);
543 		man->flags &= ~MAN_ELINE;
544 	}
545 
546 	/*
547 	 * Remove prior BLINE macro that is being clobbered.
548 	 */
549 	if ((man->flags & MAN_BLINE) &&
550 	    (MAN_BSCOPE & man_macros[tok].flags)) {
551 		n = man->last;
552 
553 		/* Might be a text node like 8 in
554 		 * .TP 8
555 		 * .SH foo
556 		 */
557 		if (MAN_TEXT == n->type)
558 			n = n->parent;
559 
560 		/* Remove element that didn't end BLINE, if any. */
561 		if ( ! (MAN_BSCOPE & man_macros[n->tok].flags))
562 			n = n->parent;
563 
564 		assert(MAN_HEAD == n->type);
565 		n = n->parent;
566 		assert(MAN_BLOCK == n->type);
567 		assert(MAN_SCOPED & man_macros[n->tok].flags);
568 
569 		mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line,
570 		    n->pos, "%s breaks %s", man_macronames[tok],
571 		    man_macronames[n->tok]);
572 
573 		man_node_delete(man, n);
574 		man->flags &= ~MAN_BLINE;
575 	}
576 
577 	/*
578 	 * Save the fact that we're in the next-line for a block.  In
579 	 * this way, embedded roff instructions can "remember" state
580 	 * when they exit.
581 	 */
582 
583 	if (MAN_BLINE & man->flags)
584 		man->flags |= MAN_BPLINE;
585 
586 	/* Call to handler... */
587 
588 	assert(man_macros[tok].fp);
589 	if ( ! (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf))
590 		goto err;
591 
592 	/* In quick mode (for mandocdb), abort after the NAME section. */
593 
594 	if (man->quick && MAN_SH == tok) {
595 		n = man->last;
596 		if (MAN_BODY == n->type &&
597 		    strcmp(n->prev->child->string, "NAME"))
598 			return(2);
599 	}
600 
601 	/*
602 	 * We weren't in a block-line scope when entering the
603 	 * above-parsed macro, so return.
604 	 */
605 
606 	if ( ! (MAN_BPLINE & man->flags)) {
607 		man->flags &= ~MAN_ILINE;
608 		return(1);
609 	}
610 	man->flags &= ~MAN_BPLINE;
611 
612 	/*
613 	 * If we're in a block scope, then allow this macro to slip by
614 	 * without closing scope around it.
615 	 */
616 
617 	if (MAN_ILINE & man->flags) {
618 		man->flags &= ~MAN_ILINE;
619 		return(1);
620 	}
621 
622 	/*
623 	 * If we've opened a new next-line element scope, then return
624 	 * now, as the next line will close out the block scope.
625 	 */
626 
627 	if (MAN_ELINE & man->flags)
628 		return(1);
629 
630 	/* Close out the block scope opened in the prior line.  */
631 
632 	assert(MAN_BLINE & man->flags);
633 	man->flags &= ~MAN_BLINE;
634 
635 	if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
636 		return(0);
637 	return(man_body_alloc(man, ln, ppos, man->last->tok));
638 
639 err:	/* Error out. */
640 
641 	man->flags |= MAN_HALT;
642 	return(0);
643 }
644 
645 /*
646  * Unlink a node from its context.  If "man" is provided, the last parse
647  * point will also be adjusted accordingly.
648  */
649 static void
650 man_node_unlink(struct man *man, struct man_node *n)
651 {
652 
653 	/* Adjust siblings. */
654 
655 	if (n->prev)
656 		n->prev->next = n->next;
657 	if (n->next)
658 		n->next->prev = n->prev;
659 
660 	/* Adjust parent. */
661 
662 	if (n->parent) {
663 		n->parent->nchild--;
664 		if (n->parent->child == n)
665 			n->parent->child = n->prev ? n->prev : n->next;
666 	}
667 
668 	/* Adjust parse point, if applicable. */
669 
670 	if (man && man->last == n) {
671 		/*XXX: this can occur when bailing from validation. */
672 		/*assert(NULL == n->next);*/
673 		if (n->prev) {
674 			man->last = n->prev;
675 			man->next = MAN_NEXT_SIBLING;
676 		} else {
677 			man->last = n->parent;
678 			man->next = MAN_NEXT_CHILD;
679 		}
680 	}
681 
682 	if (man && man->first == n)
683 		man->first = NULL;
684 }
685 
686 const struct mparse *
687 man_mparse(const struct man *man)
688 {
689 
690 	assert(man && man->parse);
691 	return(man->parse);
692 }
693 
694 void
695 man_deroff(char **dest, const struct man_node *n)
696 {
697 	char	*cp;
698 	size_t	 sz;
699 
700 	if (MAN_TEXT != n->type) {
701 		for (n = n->child; n; n = n->next)
702 			man_deroff(dest, n);
703 		return;
704 	}
705 
706 	/* Skip leading whitespace and escape sequences. */
707 
708 	cp = n->string;
709 	while ('\0' != *cp) {
710 		if ('\\' == *cp) {
711 			cp++;
712 			mandoc_escape((const char **)&cp, NULL, NULL);
713 		} else if (isspace((unsigned char)*cp))
714 			cp++;
715 		else
716 			break;
717 	}
718 
719 	/* Skip trailing whitespace. */
720 
721 	for (sz = strlen(cp); sz; sz--)
722 		if (0 == isspace((unsigned char)cp[sz-1]))
723 			break;
724 
725 	/* Skip empty strings. */
726 
727 	if (0 == sz)
728 		return;
729 
730 	if (NULL == *dest) {
731 		*dest = mandoc_strndup(cp, sz);
732 		return;
733 	}
734 
735 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
736 	free(*dest);
737 	*dest = cp;
738 }
739