xref: /openbsd-src/usr.bin/mandoc/man.c (revision 48950c12d106c85f315112191a0228d7b83b9510)
1 /*	$Id: man.c,v 1.69 2012/11/17 00:25:20 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <sys/types.h>
18 
19 #include <assert.h>
20 #include <stdarg.h>
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 
25 #include "man.h"
26 #include "mandoc.h"
27 #include "libman.h"
28 #include "libmandoc.h"
29 
30 const	char *const __man_macronames[MAN_MAX] = {
31 	"br",		"TH",		"SH",		"SS",
32 	"TP", 		"LP",		"PP",		"P",
33 	"IP",		"HP",		"SM",		"SB",
34 	"BI",		"IB",		"BR",		"RB",
35 	"R",		"B",		"I",		"IR",
36 	"RI",		"na",		"sp",		"nf",
37 	"fi",		"RE",		"RS",		"DT",
38 	"UC",		"PD",		"AT",		"in",
39 	"ft",		"OP",		"EX",		"EE"
40 	};
41 
42 const	char * const *man_macronames = __man_macronames;
43 
44 static	struct man_node	*man_node_alloc(struct man *, int, int,
45 				enum man_type, enum mant);
46 static	int		 man_node_append(struct man *,
47 				struct man_node *);
48 static	void		 man_node_free(struct man_node *);
49 static	void		 man_node_unlink(struct man *,
50 				struct man_node *);
51 static	int		 man_ptext(struct man *, int, char *, int);
52 static	int		 man_pmacro(struct man *, int, char *, int);
53 static	void		 man_free1(struct man *);
54 static	void		 man_alloc1(struct man *);
55 static	int		 man_descope(struct man *, int, int);
56 
57 
58 const struct man_node *
59 man_node(const struct man *man)
60 {
61 
62 	assert( ! (MAN_HALT & man->flags));
63 	return(man->first);
64 }
65 
66 
67 const struct man_meta *
68 man_meta(const struct man *man)
69 {
70 
71 	assert( ! (MAN_HALT & man->flags));
72 	return(&man->meta);
73 }
74 
75 
76 void
77 man_reset(struct man *man)
78 {
79 
80 	man_free1(man);
81 	man_alloc1(man);
82 }
83 
84 
85 void
86 man_free(struct man *man)
87 {
88 
89 	man_free1(man);
90 	free(man);
91 }
92 
93 
94 struct man *
95 man_alloc(struct roff *roff, struct mparse *parse)
96 {
97 	struct man	*p;
98 
99 	p = mandoc_calloc(1, sizeof(struct man));
100 
101 	man_hash_init();
102 	p->parse = parse;
103 	p->roff = roff;
104 
105 	man_alloc1(p);
106 	return(p);
107 }
108 
109 
110 int
111 man_endparse(struct man *man)
112 {
113 
114 	assert( ! (MAN_HALT & man->flags));
115 	if (man_macroend(man))
116 		return(1);
117 	man->flags |= MAN_HALT;
118 	return(0);
119 }
120 
121 
122 int
123 man_parseln(struct man *man, int ln, char *buf, int offs)
124 {
125 
126 	man->flags |= MAN_NEWLINE;
127 
128 	assert( ! (MAN_HALT & man->flags));
129 
130 	return (roff_getcontrol(man->roff, buf, &offs) ?
131 			man_pmacro(man, ln, buf, offs) :
132 			man_ptext(man, ln, buf, offs));
133 }
134 
135 
136 static void
137 man_free1(struct man *man)
138 {
139 
140 	if (man->first)
141 		man_node_delete(man, man->first);
142 	if (man->meta.title)
143 		free(man->meta.title);
144 	if (man->meta.source)
145 		free(man->meta.source);
146 	if (man->meta.date)
147 		free(man->meta.date);
148 	if (man->meta.vol)
149 		free(man->meta.vol);
150 	if (man->meta.msec)
151 		free(man->meta.msec);
152 }
153 
154 
155 static void
156 man_alloc1(struct man *man)
157 {
158 
159 	memset(&man->meta, 0, sizeof(struct man_meta));
160 	man->flags = 0;
161 	man->last = mandoc_calloc(1, sizeof(struct man_node));
162 	man->first = man->last;
163 	man->last->type = MAN_ROOT;
164 	man->last->tok = MAN_MAX;
165 	man->next = MAN_NEXT_CHILD;
166 }
167 
168 
169 static int
170 man_node_append(struct man *man, struct man_node *p)
171 {
172 
173 	assert(man->last);
174 	assert(man->first);
175 	assert(MAN_ROOT != p->type);
176 
177 	switch (man->next) {
178 	case (MAN_NEXT_SIBLING):
179 		man->last->next = p;
180 		p->prev = man->last;
181 		p->parent = man->last->parent;
182 		break;
183 	case (MAN_NEXT_CHILD):
184 		man->last->child = p;
185 		p->parent = man->last;
186 		break;
187 	default:
188 		abort();
189 		/* NOTREACHED */
190 	}
191 
192 	assert(p->parent);
193 	p->parent->nchild++;
194 
195 	if ( ! man_valid_pre(man, p))
196 		return(0);
197 
198 	switch (p->type) {
199 	case (MAN_HEAD):
200 		assert(MAN_BLOCK == p->parent->type);
201 		p->parent->head = p;
202 		break;
203 	case (MAN_TAIL):
204 		assert(MAN_BLOCK == p->parent->type);
205 		p->parent->tail = p;
206 		break;
207 	case (MAN_BODY):
208 		assert(MAN_BLOCK == p->parent->type);
209 		p->parent->body = p;
210 		break;
211 	default:
212 		break;
213 	}
214 
215 	man->last = p;
216 
217 	switch (p->type) {
218 	case (MAN_TBL):
219 		/* FALLTHROUGH */
220 	case (MAN_TEXT):
221 		if ( ! man_valid_post(man))
222 			return(0);
223 		break;
224 	default:
225 		break;
226 	}
227 
228 	return(1);
229 }
230 
231 
232 static struct man_node *
233 man_node_alloc(struct man *man, int line, int pos,
234 		enum man_type type, enum mant tok)
235 {
236 	struct man_node *p;
237 
238 	p = mandoc_calloc(1, sizeof(struct man_node));
239 	p->line = line;
240 	p->pos = pos;
241 	p->type = type;
242 	p->tok = tok;
243 
244 	if (MAN_NEWLINE & man->flags)
245 		p->flags |= MAN_LINE;
246 	man->flags &= ~MAN_NEWLINE;
247 	return(p);
248 }
249 
250 
251 int
252 man_elem_alloc(struct man *man, int line, int pos, enum mant tok)
253 {
254 	struct man_node *p;
255 
256 	p = man_node_alloc(man, line, pos, MAN_ELEM, tok);
257 	if ( ! man_node_append(man, p))
258 		return(0);
259 	man->next = MAN_NEXT_CHILD;
260 	return(1);
261 }
262 
263 
264 int
265 man_tail_alloc(struct man *man, int line, int pos, enum mant tok)
266 {
267 	struct man_node *p;
268 
269 	p = man_node_alloc(man, line, pos, MAN_TAIL, tok);
270 	if ( ! man_node_append(man, p))
271 		return(0);
272 	man->next = MAN_NEXT_CHILD;
273 	return(1);
274 }
275 
276 
277 int
278 man_head_alloc(struct man *man, int line, int pos, enum mant tok)
279 {
280 	struct man_node *p;
281 
282 	p = man_node_alloc(man, line, pos, MAN_HEAD, tok);
283 	if ( ! man_node_append(man, p))
284 		return(0);
285 	man->next = MAN_NEXT_CHILD;
286 	return(1);
287 }
288 
289 
290 int
291 man_body_alloc(struct man *man, int line, int pos, enum mant tok)
292 {
293 	struct man_node *p;
294 
295 	p = man_node_alloc(man, line, pos, MAN_BODY, tok);
296 	if ( ! man_node_append(man, p))
297 		return(0);
298 	man->next = MAN_NEXT_CHILD;
299 	return(1);
300 }
301 
302 
303 int
304 man_block_alloc(struct man *man, int line, int pos, enum mant tok)
305 {
306 	struct man_node *p;
307 
308 	p = man_node_alloc(man, line, pos, MAN_BLOCK, tok);
309 	if ( ! man_node_append(man, p))
310 		return(0);
311 	man->next = MAN_NEXT_CHILD;
312 	return(1);
313 }
314 
315 int
316 man_word_alloc(struct man *man, int line, int pos, const char *word)
317 {
318 	struct man_node	*n;
319 
320 	n = man_node_alloc(man, line, pos, MAN_TEXT, MAN_MAX);
321 	n->string = roff_strdup(man->roff, word);
322 
323 	if ( ! man_node_append(man, n))
324 		return(0);
325 
326 	man->next = MAN_NEXT_SIBLING;
327 	return(1);
328 }
329 
330 
331 /*
332  * Free all of the resources held by a node.  This does NOT unlink a
333  * node from its context; for that, see man_node_unlink().
334  */
335 static void
336 man_node_free(struct man_node *p)
337 {
338 
339 	if (p->string)
340 		free(p->string);
341 	free(p);
342 }
343 
344 
345 void
346 man_node_delete(struct man *man, struct man_node *p)
347 {
348 
349 	while (p->child)
350 		man_node_delete(man, p->child);
351 
352 	man_node_unlink(man, p);
353 	man_node_free(p);
354 }
355 
356 int
357 man_addeqn(struct man *man, const struct eqn *ep)
358 {
359 	struct man_node	*n;
360 
361 	assert( ! (MAN_HALT & man->flags));
362 
363 	n = man_node_alloc(man, ep->ln, ep->pos, MAN_EQN, MAN_MAX);
364 	n->eqn = ep;
365 
366 	if ( ! man_node_append(man, n))
367 		return(0);
368 
369 	man->next = MAN_NEXT_SIBLING;
370 	return(man_descope(man, ep->ln, ep->pos));
371 }
372 
373 int
374 man_addspan(struct man *man, const struct tbl_span *sp)
375 {
376 	struct man_node	*n;
377 
378 	assert( ! (MAN_HALT & man->flags));
379 
380 	n = man_node_alloc(man, sp->line, 0, MAN_TBL, MAN_MAX);
381 	n->span = sp;
382 
383 	if ( ! man_node_append(man, n))
384 		return(0);
385 
386 	man->next = MAN_NEXT_SIBLING;
387 	return(man_descope(man, sp->line, 0));
388 }
389 
390 static int
391 man_descope(struct man *man, int line, int offs)
392 {
393 	/*
394 	 * Co-ordinate what happens with having a next-line scope open:
395 	 * first close out the element scope (if applicable), then close
396 	 * out the block scope (also if applicable).
397 	 */
398 
399 	if (MAN_ELINE & man->flags) {
400 		man->flags &= ~MAN_ELINE;
401 		if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
402 			return(0);
403 	}
404 
405 	if ( ! (MAN_BLINE & man->flags))
406 		return(1);
407 	man->flags &= ~MAN_BLINE;
408 
409 	if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
410 		return(0);
411 	return(man_body_alloc(man, line, offs, man->last->tok));
412 }
413 
414 static int
415 man_ptext(struct man *man, int line, char *buf, int offs)
416 {
417 	int		 i;
418 
419 	/* Literal free-form text whitespace is preserved. */
420 
421 	if (MAN_LITERAL & man->flags) {
422 		if ( ! man_word_alloc(man, line, offs, buf + offs))
423 			return(0);
424 		return(man_descope(man, line, offs));
425 	}
426 
427 	/* Pump blank lines directly into the backend. */
428 
429 	for (i = offs; ' ' == buf[i]; i++)
430 		/* Skip leading whitespace. */ ;
431 
432 	if ('\0' == buf[i]) {
433 		/* Allocate a blank entry. */
434 		if ( ! man_elem_alloc(man, line, offs, MAN_sp))
435 			return(0);
436 		man->next = MAN_NEXT_SIBLING;
437 		return(1);
438 	}
439 
440 	/*
441 	 * Warn if the last un-escaped character is whitespace. Then
442 	 * strip away the remaining spaces (tabs stay!).
443 	 */
444 
445 	i = (int)strlen(buf);
446 	assert(i);
447 
448 	if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
449 		if (i > 1 && '\\' != buf[i - 2])
450 			man_pmsg(man, line, i - 1, MANDOCERR_EOLNSPACE);
451 
452 		for (--i; i && ' ' == buf[i]; i--)
453 			/* Spin back to non-space. */ ;
454 
455 		/* Jump ahead of escaped whitespace. */
456 		i += '\\' == buf[i] ? 2 : 1;
457 
458 		buf[i] = '\0';
459 	}
460 
461 	if ( ! man_word_alloc(man, line, offs, buf + offs))
462 		return(0);
463 
464 	/*
465 	 * End-of-sentence check.  If the last character is an unescaped
466 	 * EOS character, then flag the node as being the end of a
467 	 * sentence.  The front-end will know how to interpret this.
468 	 */
469 
470 	assert(i);
471 	if (mandoc_eos(buf, (size_t)i, 0))
472 		man->last->flags |= MAN_EOS;
473 
474 	return(man_descope(man, line, offs));
475 }
476 
477 static int
478 man_pmacro(struct man *man, int ln, char *buf, int offs)
479 {
480 	int		 i, ppos;
481 	enum mant	 tok;
482 	char		 mac[5];
483 	struct man_node	*n;
484 
485 	if ('"' == buf[offs]) {
486 		man_pmsg(man, ln, offs, MANDOCERR_BADCOMMENT);
487 		return(1);
488 	} else if ('\0' == buf[offs])
489 		return(1);
490 
491 	ppos = offs;
492 
493 	/*
494 	 * Copy the first word into a nil-terminated buffer.
495 	 * Stop copying when a tab, space, or eoln is encountered.
496 	 */
497 
498 	i = 0;
499 	while (i < 4 && '\0' != buf[offs] &&
500 			' ' != buf[offs] && '\t' != buf[offs])
501 		mac[i++] = buf[offs++];
502 
503 	mac[i] = '\0';
504 
505 	tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX;
506 
507 	if (MAN_MAX == tok) {
508 		mandoc_vmsg(MANDOCERR_MACRO, man->parse, ln,
509 				ppos, "%s", buf + ppos - 1);
510 		return(1);
511 	}
512 
513 	/* The macro is sane.  Jump to the next word. */
514 
515 	while (buf[offs] && ' ' == buf[offs])
516 		offs++;
517 
518 	/*
519 	 * Trailing whitespace.  Note that tabs are allowed to be passed
520 	 * into the parser as "text", so we only warn about spaces here.
521 	 */
522 
523 	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
524 		man_pmsg(man, ln, offs - 1, MANDOCERR_EOLNSPACE);
525 
526 	/*
527 	 * Remove prior ELINE macro, as it's being clobbered by a new
528 	 * macro.  Note that NSCOPED macros do not close out ELINE
529 	 * macros---they don't print text---so we let those slip by.
530 	 */
531 
532 	if ( ! (MAN_NSCOPED & man_macros[tok].flags) &&
533 			man->flags & MAN_ELINE) {
534 		n = man->last;
535 		assert(MAN_TEXT != n->type);
536 
537 		/* Remove repeated NSCOPED macros causing ELINE. */
538 
539 		if (MAN_NSCOPED & man_macros[n->tok].flags)
540 			n = n->parent;
541 
542 		mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line,
543 		    n->pos, "%s breaks %s", man_macronames[tok],
544 		    man_macronames[n->tok]);
545 
546 		man_node_delete(man, n);
547 		man->flags &= ~MAN_ELINE;
548 	}
549 
550 	/*
551 	 * Remove prior BLINE macro that is being clobbered.
552 	 */
553 	if ((man->flags & MAN_BLINE) &&
554 	    (MAN_BSCOPE & man_macros[tok].flags)) {
555 		n = man->last;
556 
557 		/* Might be a text node like 8 in
558 		 * .TP 8
559 		 * .SH foo
560 		 */
561 		if (MAN_TEXT == n->type)
562 			n = n->parent;
563 
564 		/* Remove element that didn't end BLINE, if any. */
565 		if ( ! (MAN_BSCOPE & man_macros[n->tok].flags))
566 			n = n->parent;
567 
568 		assert(MAN_HEAD == n->type);
569 		n = n->parent;
570 		assert(MAN_BLOCK == n->type);
571 		assert(MAN_SCOPED & man_macros[n->tok].flags);
572 
573 		mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line,
574 		    n->pos, "%s breaks %s", man_macronames[tok],
575 		    man_macronames[n->tok]);
576 
577 		man_node_delete(man, n);
578 		man->flags &= ~MAN_BLINE;
579 	}
580 
581 	/*
582 	 * Save the fact that we're in the next-line for a block.  In
583 	 * this way, embedded roff instructions can "remember" state
584 	 * when they exit.
585 	 */
586 
587 	if (MAN_BLINE & man->flags)
588 		man->flags |= MAN_BPLINE;
589 
590 	/* Call to handler... */
591 
592 	assert(man_macros[tok].fp);
593 	if ( ! (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf))
594 		goto err;
595 
596 	/*
597 	 * We weren't in a block-line scope when entering the
598 	 * above-parsed macro, so return.
599 	 */
600 
601 	if ( ! (MAN_BPLINE & man->flags)) {
602 		man->flags &= ~MAN_ILINE;
603 		return(1);
604 	}
605 	man->flags &= ~MAN_BPLINE;
606 
607 	/*
608 	 * If we're in a block scope, then allow this macro to slip by
609 	 * without closing scope around it.
610 	 */
611 
612 	if (MAN_ILINE & man->flags) {
613 		man->flags &= ~MAN_ILINE;
614 		return(1);
615 	}
616 
617 	/*
618 	 * If we've opened a new next-line element scope, then return
619 	 * now, as the next line will close out the block scope.
620 	 */
621 
622 	if (MAN_ELINE & man->flags)
623 		return(1);
624 
625 	/* Close out the block scope opened in the prior line.  */
626 
627 	assert(MAN_BLINE & man->flags);
628 	man->flags &= ~MAN_BLINE;
629 
630 	if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
631 		return(0);
632 	return(man_body_alloc(man, ln, ppos, man->last->tok));
633 
634 err:	/* Error out. */
635 
636 	man->flags |= MAN_HALT;
637 	return(0);
638 }
639 
640 /*
641  * Unlink a node from its context.  If "man" is provided, the last parse
642  * point will also be adjusted accordingly.
643  */
644 static void
645 man_node_unlink(struct man *man, struct man_node *n)
646 {
647 
648 	/* Adjust siblings. */
649 
650 	if (n->prev)
651 		n->prev->next = n->next;
652 	if (n->next)
653 		n->next->prev = n->prev;
654 
655 	/* Adjust parent. */
656 
657 	if (n->parent) {
658 		n->parent->nchild--;
659 		if (n->parent->child == n)
660 			n->parent->child = n->prev ? n->prev : n->next;
661 	}
662 
663 	/* Adjust parse point, if applicable. */
664 
665 	if (man && man->last == n) {
666 		/*XXX: this can occur when bailing from validation. */
667 		/*assert(NULL == n->next);*/
668 		if (n->prev) {
669 			man->last = n->prev;
670 			man->next = MAN_NEXT_SIBLING;
671 		} else {
672 			man->last = n->parent;
673 			man->next = MAN_NEXT_CHILD;
674 		}
675 	}
676 
677 	if (man && man->first == n)
678 		man->first = NULL;
679 }
680 
681 const struct mparse *
682 man_mparse(const struct man *man)
683 {
684 
685 	assert(man && man->parse);
686 	return(man->parse);
687 }
688