xref: /openbsd-src/usr.bin/mandoc/man.c (revision e5157e49389faebcb42b7237d55fbf096d9c2523)
1 /*	$OpenBSD: man.c,v 1.90 2014/11/03 23:17:21 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include <sys/types.h>
20 
21 #include <assert.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27 
28 #include "man.h"
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "libman.h"
32 #include "libmandoc.h"
33 
34 const	char *const __man_macronames[MAN_MAX] = {
35 	"br",		"TH",		"SH",		"SS",
36 	"TP",		"LP",		"PP",		"P",
37 	"IP",		"HP",		"SM",		"SB",
38 	"BI",		"IB",		"BR",		"RB",
39 	"R",		"B",		"I",		"IR",
40 	"RI",		"na",		"sp",		"nf",
41 	"fi",		"RE",		"RS",		"DT",
42 	"UC",		"PD",		"AT",		"in",
43 	"ft",		"OP",		"EX",		"EE",
44 	"UR",		"UE",		"ll"
45 	};
46 
47 const	char * const *man_macronames = __man_macronames;
48 
49 static	struct man_node	*man_node_alloc(struct man *, int, int,
50 				enum man_type, enum mant);
51 static	int		 man_node_append(struct man *,
52 				struct man_node *);
53 static	void		 man_node_free(struct man_node *);
54 static	void		 man_node_unlink(struct man *,
55 				struct man_node *);
56 static	int		 man_ptext(struct man *, int, char *, int);
57 static	int		 man_pmacro(struct man *, int, char *, int);
58 static	void		 man_free1(struct man *);
59 static	void		 man_alloc1(struct man *);
60 static	int		 man_descope(struct man *, int, int);
61 
62 
63 const struct man_node *
64 man_node(const struct man *man)
65 {
66 
67 	return(man->first);
68 }
69 
70 const struct man_meta *
71 man_meta(const struct man *man)
72 {
73 
74 	return(&man->meta);
75 }
76 
77 void
78 man_reset(struct man *man)
79 {
80 
81 	man_free1(man);
82 	man_alloc1(man);
83 }
84 
85 void
86 man_free(struct man *man)
87 {
88 
89 	man_free1(man);
90 	free(man);
91 }
92 
93 struct man *
94 man_alloc(struct roff *roff, struct mparse *parse, int quick)
95 {
96 	struct man	*p;
97 
98 	p = mandoc_calloc(1, sizeof(struct man));
99 
100 	man_hash_init();
101 	p->parse = parse;
102 	p->quick = quick;
103 	p->roff = roff;
104 
105 	man_alloc1(p);
106 	return(p);
107 }
108 
109 int
110 man_endparse(struct man *man)
111 {
112 
113 	return(man_macroend(man));
114 }
115 
116 int
117 man_parseln(struct man *man, int ln, char *buf, int offs)
118 {
119 
120 	if (man->last->type != MAN_EQN || ln > man->last->line)
121 		man->flags |= MAN_NEWLINE;
122 
123 	return (roff_getcontrol(man->roff, buf, &offs) ?
124 	    man_pmacro(man, ln, buf, offs) :
125 	    man_ptext(man, ln, buf, offs));
126 }
127 
128 static void
129 man_free1(struct man *man)
130 {
131 
132 	if (man->first)
133 		man_node_delete(man, man->first);
134 	if (man->meta.title)
135 		free(man->meta.title);
136 	if (man->meta.source)
137 		free(man->meta.source);
138 	if (man->meta.date)
139 		free(man->meta.date);
140 	if (man->meta.vol)
141 		free(man->meta.vol);
142 	if (man->meta.msec)
143 		free(man->meta.msec);
144 }
145 
146 static void
147 man_alloc1(struct man *man)
148 {
149 
150 	memset(&man->meta, 0, sizeof(struct man_meta));
151 	man->flags = 0;
152 	man->last = mandoc_calloc(1, sizeof(struct man_node));
153 	man->first = man->last;
154 	man->last->type = MAN_ROOT;
155 	man->last->tok = MAN_MAX;
156 	man->next = MAN_NEXT_CHILD;
157 }
158 
159 
160 static int
161 man_node_append(struct man *man, struct man_node *p)
162 {
163 
164 	assert(man->last);
165 	assert(man->first);
166 	assert(MAN_ROOT != p->type);
167 
168 	switch (man->next) {
169 	case MAN_NEXT_SIBLING:
170 		man->last->next = p;
171 		p->prev = man->last;
172 		p->parent = man->last->parent;
173 		break;
174 	case MAN_NEXT_CHILD:
175 		man->last->child = p;
176 		p->parent = man->last;
177 		break;
178 	default:
179 		abort();
180 		/* NOTREACHED */
181 	}
182 
183 	assert(p->parent);
184 	p->parent->nchild++;
185 
186 	switch (p->type) {
187 	case MAN_BLOCK:
188 		if (p->tok == MAN_SH || p->tok == MAN_SS)
189 			man->flags &= ~MAN_LITERAL;
190 		break;
191 	case MAN_HEAD:
192 		assert(MAN_BLOCK == p->parent->type);
193 		p->parent->head = p;
194 		break;
195 	case MAN_TAIL:
196 		assert(MAN_BLOCK == p->parent->type);
197 		p->parent->tail = p;
198 		break;
199 	case MAN_BODY:
200 		assert(MAN_BLOCK == p->parent->type);
201 		p->parent->body = p;
202 		break;
203 	default:
204 		break;
205 	}
206 
207 	man->last = p;
208 
209 	switch (p->type) {
210 	case MAN_TBL:
211 		/* FALLTHROUGH */
212 	case MAN_TEXT:
213 		if ( ! man_valid_post(man))
214 			return(0);
215 		break;
216 	default:
217 		break;
218 	}
219 
220 	return(1);
221 }
222 
223 static struct man_node *
224 man_node_alloc(struct man *man, int line, int pos,
225 		enum man_type type, enum mant tok)
226 {
227 	struct man_node *p;
228 
229 	p = mandoc_calloc(1, sizeof(struct man_node));
230 	p->line = line;
231 	p->pos = pos;
232 	p->type = type;
233 	p->tok = tok;
234 
235 	if (MAN_NEWLINE & man->flags)
236 		p->flags |= MAN_LINE;
237 	man->flags &= ~MAN_NEWLINE;
238 	return(p);
239 }
240 
241 int
242 man_elem_alloc(struct man *man, int line, int pos, enum mant tok)
243 {
244 	struct man_node *p;
245 
246 	p = man_node_alloc(man, line, pos, MAN_ELEM, tok);
247 	if ( ! man_node_append(man, p))
248 		return(0);
249 	man->next = MAN_NEXT_CHILD;
250 	return(1);
251 }
252 
253 int
254 man_tail_alloc(struct man *man, int line, int pos, enum mant tok)
255 {
256 	struct man_node *p;
257 
258 	p = man_node_alloc(man, line, pos, MAN_TAIL, tok);
259 	if ( ! man_node_append(man, p))
260 		return(0);
261 	man->next = MAN_NEXT_CHILD;
262 	return(1);
263 }
264 
265 int
266 man_head_alloc(struct man *man, int line, int pos, enum mant tok)
267 {
268 	struct man_node *p;
269 
270 	p = man_node_alloc(man, line, pos, MAN_HEAD, tok);
271 	if ( ! man_node_append(man, p))
272 		return(0);
273 	man->next = MAN_NEXT_CHILD;
274 	return(1);
275 }
276 
277 int
278 man_body_alloc(struct man *man, int line, int pos, enum mant tok)
279 {
280 	struct man_node *p;
281 
282 	p = man_node_alloc(man, line, pos, MAN_BODY, tok);
283 	if ( ! man_node_append(man, p))
284 		return(0);
285 	man->next = MAN_NEXT_CHILD;
286 	return(1);
287 }
288 
289 int
290 man_block_alloc(struct man *man, int line, int pos, enum mant tok)
291 {
292 	struct man_node *p;
293 
294 	p = man_node_alloc(man, line, pos, MAN_BLOCK, tok);
295 	if ( ! man_node_append(man, p))
296 		return(0);
297 	man->next = MAN_NEXT_CHILD;
298 	return(1);
299 }
300 
301 int
302 man_word_alloc(struct man *man, int line, int pos, const char *word)
303 {
304 	struct man_node	*n;
305 
306 	n = man_node_alloc(man, line, pos, MAN_TEXT, MAN_MAX);
307 	n->string = roff_strdup(man->roff, word);
308 
309 	if ( ! man_node_append(man, n))
310 		return(0);
311 
312 	man->next = MAN_NEXT_SIBLING;
313 	return(1);
314 }
315 
316 void
317 man_word_append(struct man *man, const char *word)
318 {
319 	struct man_node	*n;
320 	char		*addstr, *newstr;
321 
322 	n = man->last;
323 	addstr = roff_strdup(man->roff, word);
324 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
325 	free(addstr);
326 	free(n->string);
327 	n->string = newstr;
328 	man->next = MAN_NEXT_SIBLING;
329 }
330 
331 /*
332  * Free all of the resources held by a node.  This does NOT unlink a
333  * node from its context; for that, see man_node_unlink().
334  */
335 static void
336 man_node_free(struct man_node *p)
337 {
338 
339 	if (p->string)
340 		free(p->string);
341 	free(p);
342 }
343 
344 void
345 man_node_delete(struct man *man, struct man_node *p)
346 {
347 
348 	while (p->child)
349 		man_node_delete(man, p->child);
350 
351 	man_node_unlink(man, p);
352 	man_node_free(p);
353 }
354 
355 int
356 man_addeqn(struct man *man, const struct eqn *ep)
357 {
358 	struct man_node	*n;
359 
360 	n = man_node_alloc(man, ep->ln, ep->pos, MAN_EQN, MAN_MAX);
361 	n->eqn = ep;
362 	if (ep->ln > man->last->line)
363 		n->flags |= MAN_LINE;
364 
365 	if ( ! man_node_append(man, n))
366 		return(0);
367 
368 	man->next = MAN_NEXT_SIBLING;
369 	return(man_descope(man, ep->ln, ep->pos));
370 }
371 
372 int
373 man_addspan(struct man *man, const struct tbl_span *sp)
374 {
375 	struct man_node	*n;
376 
377 	n = man_node_alloc(man, sp->line, 0, MAN_TBL, MAN_MAX);
378 	n->span = sp;
379 
380 	if ( ! man_node_append(man, n))
381 		return(0);
382 
383 	man->next = MAN_NEXT_SIBLING;
384 	return(man_descope(man, sp->line, 0));
385 }
386 
387 static int
388 man_descope(struct man *man, int line, int offs)
389 {
390 	/*
391 	 * Co-ordinate what happens with having a next-line scope open:
392 	 * first close out the element scope (if applicable), then close
393 	 * out the block scope (also if applicable).
394 	 */
395 
396 	if (MAN_ELINE & man->flags) {
397 		man->flags &= ~MAN_ELINE;
398 		if ( ! man_unscope(man, man->last->parent))
399 			return(0);
400 	}
401 
402 	if ( ! (MAN_BLINE & man->flags))
403 		return(1);
404 	man->flags &= ~MAN_BLINE;
405 
406 	if ( ! man_unscope(man, man->last->parent))
407 		return(0);
408 	return(man_body_alloc(man, line, offs, man->last->tok));
409 }
410 
411 static int
412 man_ptext(struct man *man, int line, char *buf, int offs)
413 {
414 	int		 i;
415 
416 	/* Literal free-form text whitespace is preserved. */
417 
418 	if (MAN_LITERAL & man->flags) {
419 		if ( ! man_word_alloc(man, line, offs, buf + offs))
420 			return(0);
421 		return(man_descope(man, line, offs));
422 	}
423 
424 	for (i = offs; ' ' == buf[i]; i++)
425 		/* Skip leading whitespace. */ ;
426 
427 	/*
428 	 * Blank lines are ignored right after headings
429 	 * but add a single vertical space elsewhere.
430 	 */
431 
432 	if ('\0' == buf[i]) {
433 		/* Allocate a blank entry. */
434 		if (MAN_SH != man->last->tok &&
435 		    MAN_SS != man->last->tok) {
436 			if ( ! man_elem_alloc(man, line, offs, MAN_sp))
437 				return(0);
438 			man->next = MAN_NEXT_SIBLING;
439 		}
440 		return(1);
441 	}
442 
443 	/*
444 	 * Warn if the last un-escaped character is whitespace. Then
445 	 * strip away the remaining spaces (tabs stay!).
446 	 */
447 
448 	i = (int)strlen(buf);
449 	assert(i);
450 
451 	if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
452 		if (i > 1 && '\\' != buf[i - 2])
453 			mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
454 			    line, i - 1, NULL);
455 
456 		for (--i; i && ' ' == buf[i]; i--)
457 			/* Spin back to non-space. */ ;
458 
459 		/* Jump ahead of escaped whitespace. */
460 		i += '\\' == buf[i] ? 2 : 1;
461 
462 		buf[i] = '\0';
463 	}
464 
465 	if ( ! man_word_alloc(man, line, offs, buf + offs))
466 		return(0);
467 
468 	/*
469 	 * End-of-sentence check.  If the last character is an unescaped
470 	 * EOS character, then flag the node as being the end of a
471 	 * sentence.  The front-end will know how to interpret this.
472 	 */
473 
474 	assert(i);
475 	if (mandoc_eos(buf, (size_t)i))
476 		man->last->flags |= MAN_EOS;
477 
478 	return(man_descope(man, line, offs));
479 }
480 
481 static int
482 man_pmacro(struct man *man, int ln, char *buf, int offs)
483 {
484 	char		 mac[5];
485 	struct man_node	*n;
486 	enum mant	 tok;
487 	int		 i, ppos;
488 	int		 bline;
489 
490 	ppos = offs;
491 
492 	/*
493 	 * Copy the first word into a nil-terminated buffer.
494 	 * Stop copying when a tab, space, or eoln is encountered.
495 	 */
496 
497 	i = 0;
498 	while (i < 4 && '\0' != buf[offs] && ' ' != buf[offs] &&
499 	    '\t' != buf[offs])
500 		mac[i++] = buf[offs++];
501 
502 	mac[i] = '\0';
503 
504 	tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX;
505 
506 	if (MAN_MAX == tok) {
507 		mandoc_msg(MANDOCERR_MACRO, man->parse,
508 		    ln, ppos, buf + ppos - 1);
509 		return(1);
510 	}
511 
512 	/* The macro is sane.  Jump to the next word. */
513 
514 	while (buf[offs] && ' ' == buf[offs])
515 		offs++;
516 
517 	/*
518 	 * Trailing whitespace.  Note that tabs are allowed to be passed
519 	 * into the parser as "text", so we only warn about spaces here.
520 	 */
521 
522 	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
523 		mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
524 		    ln, offs - 1, NULL);
525 
526 	/*
527 	 * Remove prior ELINE macro, as it's being clobbered by a new
528 	 * macro.  Note that NSCOPED macros do not close out ELINE
529 	 * macros---they don't print text---so we let those slip by.
530 	 */
531 
532 	if ( ! (MAN_NSCOPED & man_macros[tok].flags) &&
533 			man->flags & MAN_ELINE) {
534 		n = man->last;
535 		assert(MAN_TEXT != n->type);
536 
537 		/* Remove repeated NSCOPED macros causing ELINE. */
538 
539 		if (MAN_NSCOPED & man_macros[n->tok].flags)
540 			n = n->parent;
541 
542 		mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, n->line,
543 		    n->pos, "%s breaks %s", man_macronames[tok],
544 		    man_macronames[n->tok]);
545 
546 		man_node_delete(man, n);
547 		man->flags &= ~MAN_ELINE;
548 	}
549 
550 	/*
551 	 * Remove prior BLINE macro that is being clobbered.
552 	 */
553 	if ((man->flags & MAN_BLINE) &&
554 	    (MAN_BSCOPE & man_macros[tok].flags)) {
555 		n = man->last;
556 
557 		/* Might be a text node like 8 in
558 		 * .TP 8
559 		 * .SH foo
560 		 */
561 		if (MAN_TEXT == n->type)
562 			n = n->parent;
563 
564 		/* Remove element that didn't end BLINE, if any. */
565 		if ( ! (MAN_BSCOPE & man_macros[n->tok].flags))
566 			n = n->parent;
567 
568 		assert(MAN_HEAD == n->type);
569 		n = n->parent;
570 		assert(MAN_BLOCK == n->type);
571 		assert(MAN_SCOPED & man_macros[n->tok].flags);
572 
573 		mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, n->line,
574 		    n->pos, "%s breaks %s", man_macronames[tok],
575 		    man_macronames[n->tok]);
576 
577 		man_node_delete(man, n);
578 		man->flags &= ~MAN_BLINE;
579 	}
580 
581 	/* Remember whether we are in next-line scope for a block head. */
582 
583 	bline = man->flags & MAN_BLINE;
584 
585 	/* Call to handler... */
586 
587 	assert(man_macros[tok].fp);
588 	if ( ! (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf))
589 		return(0);
590 
591 	/* In quick mode (for mandocdb), abort after the NAME section. */
592 
593 	if (man->quick && MAN_SH == tok) {
594 		n = man->last;
595 		if (MAN_BODY == n->type &&
596 		    strcmp(n->prev->child->string, "NAME"))
597 			return(2);
598 	}
599 
600 	/*
601 	 * If we are in a next-line scope for a block head,
602 	 * close it out now and switch to the body,
603 	 * unless the next-line scope is allowed to continue.
604 	 */
605 
606 	if ( ! bline || man->flags & MAN_ELINE ||
607 	    man_macros[tok].flags & MAN_NSCOPED)
608 		return(1);
609 
610 	assert(MAN_BLINE & man->flags);
611 	man->flags &= ~MAN_BLINE;
612 
613 	if ( ! man_unscope(man, man->last->parent))
614 		return(0);
615 	return(man_body_alloc(man, ln, ppos, man->last->tok));
616 }
617 
618 /*
619  * Unlink a node from its context.  If "man" is provided, the last parse
620  * point will also be adjusted accordingly.
621  */
622 static void
623 man_node_unlink(struct man *man, struct man_node *n)
624 {
625 
626 	/* Adjust siblings. */
627 
628 	if (n->prev)
629 		n->prev->next = n->next;
630 	if (n->next)
631 		n->next->prev = n->prev;
632 
633 	/* Adjust parent. */
634 
635 	if (n->parent) {
636 		n->parent->nchild--;
637 		if (n->parent->child == n)
638 			n->parent->child = n->prev ? n->prev : n->next;
639 	}
640 
641 	/* Adjust parse point, if applicable. */
642 
643 	if (man && man->last == n) {
644 		/*XXX: this can occur when bailing from validation. */
645 		/*assert(NULL == n->next);*/
646 		if (n->prev) {
647 			man->last = n->prev;
648 			man->next = MAN_NEXT_SIBLING;
649 		} else {
650 			man->last = n->parent;
651 			man->next = MAN_NEXT_CHILD;
652 		}
653 	}
654 
655 	if (man && man->first == n)
656 		man->first = NULL;
657 }
658 
659 const struct mparse *
660 man_mparse(const struct man *man)
661 {
662 
663 	assert(man && man->parse);
664 	return(man->parse);
665 }
666 
667 void
668 man_deroff(char **dest, const struct man_node *n)
669 {
670 	char	*cp;
671 	size_t	 sz;
672 
673 	if (MAN_TEXT != n->type) {
674 		for (n = n->child; n; n = n->next)
675 			man_deroff(dest, n);
676 		return;
677 	}
678 
679 	/* Skip leading whitespace and escape sequences. */
680 
681 	cp = n->string;
682 	while ('\0' != *cp) {
683 		if ('\\' == *cp) {
684 			cp++;
685 			mandoc_escape((const char **)&cp, NULL, NULL);
686 		} else if (isspace((unsigned char)*cp))
687 			cp++;
688 		else
689 			break;
690 	}
691 
692 	/* Skip trailing whitespace. */
693 
694 	for (sz = strlen(cp); sz; sz--)
695 		if (0 == isspace((unsigned char)cp[sz-1]))
696 			break;
697 
698 	/* Skip empty strings. */
699 
700 	if (0 == sz)
701 		return;
702 
703 	if (NULL == *dest) {
704 		*dest = mandoc_strndup(cp, sz);
705 		return;
706 	}
707 
708 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
709 	free(*dest);
710 	*dest = cp;
711 }
712