xref: /netbsd-src/external/bsd/mdocml/dist/man.c (revision 6167eca2d062f3691f8b22e3b8ea212d6dde852a)
1*6167eca2Schristos /*	Id: man.c,v 1.187 2019/01/05 00:36:50 schwarze Exp  */
24154958bSjoerg /*
348741257Sjoerg  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4*6167eca2Schristos  * Copyright (c) 2013-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
5fec65c98Schristos  * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org>
64154958bSjoerg  *
74154958bSjoerg  * Permission to use, copy, modify, and distribute this software for any
84154958bSjoerg  * purpose with or without fee is hereby granted, provided that the above
94154958bSjoerg  * copyright notice and this permission notice appear in all copies.
104154958bSjoerg  *
11f47368cfSchristos  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
124154958bSjoerg  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13f47368cfSchristos  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
144154958bSjoerg  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
154154958bSjoerg  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
164154958bSjoerg  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
174154958bSjoerg  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
184154958bSjoerg  */
19d5e63c8dSjoerg #include "config.h"
20d5e63c8dSjoerg 
214154958bSjoerg #include <sys/types.h>
224154958bSjoerg 
234154958bSjoerg #include <assert.h>
24fec65c98Schristos #include <ctype.h>
254154958bSjoerg #include <stdarg.h>
264154958bSjoerg #include <stdlib.h>
274154958bSjoerg #include <stdio.h>
284154958bSjoerg #include <string.h>
294154958bSjoerg 
30fec65c98Schristos #include "mandoc_aux.h"
31f47368cfSchristos #include "mandoc.h"
32f47368cfSchristos #include "roff.h"
33f47368cfSchristos #include "man.h"
343514411fSjoerg #include "libmandoc.h"
35f47368cfSchristos #include "roff_int.h"
36f47368cfSchristos #include "libman.h"
374154958bSjoerg 
38*6167eca2Schristos static	char		*man_hasc(char *);
39f47368cfSchristos static	int		 man_ptext(struct roff_man *, int, char *, int);
40f47368cfSchristos static	int		 man_pmacro(struct roff_man *, int, char *, int);
414154958bSjoerg 
424154958bSjoerg 
434154958bSjoerg int
man_parseln(struct roff_man * man,int ln,char * buf,int offs)44f47368cfSchristos man_parseln(struct roff_man *man, int ln, char *buf, int offs)
454154958bSjoerg {
464154958bSjoerg 
47f47368cfSchristos 	if (man->last->type != ROFFT_EQN || ln > man->last->line)
4870f041f9Sjoerg 		man->flags |= MAN_NEWLINE;
4948741257Sjoerg 
50f47368cfSchristos 	return roff_getcontrol(man->roff, buf, &offs) ?
5170f041f9Sjoerg 	    man_pmacro(man, ln, buf, offs) :
52f47368cfSchristos 	    man_ptext(man, ln, buf, offs);
534154958bSjoerg }
544154958bSjoerg 
55*6167eca2Schristos /*
56*6167eca2Schristos  * If the string ends with \c, return a pointer to the backslash.
57*6167eca2Schristos  * Otherwise, return NULL.
58*6167eca2Schristos  */
59*6167eca2Schristos static char *
man_hasc(char * start)60*6167eca2Schristos man_hasc(char *start)
61c0d9444aSjoerg {
62*6167eca2Schristos 	char	*cp, *ep;
63*6167eca2Schristos 
64*6167eca2Schristos 	ep = strchr(start, '\0') - 2;
65*6167eca2Schristos 	if (ep < start || ep[0] != '\\' || ep[1] != 'c')
66*6167eca2Schristos 		return NULL;
67*6167eca2Schristos 	for (cp = ep; cp > start; cp--)
68*6167eca2Schristos 		if (cp[-1] != '\\')
69*6167eca2Schristos 			break;
70*6167eca2Schristos 	return (ep - cp) % 2 ? NULL : ep;
71*6167eca2Schristos }
72*6167eca2Schristos 
73*6167eca2Schristos void
man_descope(struct roff_man * man,int line,int offs,char * start)74*6167eca2Schristos man_descope(struct roff_man *man, int line, int offs, char *start)
75*6167eca2Schristos {
76*6167eca2Schristos 	/* Trailing \c keeps next-line scope open. */
77*6167eca2Schristos 
78*6167eca2Schristos 	if (start != NULL && man_hasc(start) != NULL)
79*6167eca2Schristos 		return;
80*6167eca2Schristos 
814154958bSjoerg 	/*
824154958bSjoerg 	 * Co-ordinate what happens with having a next-line scope open:
83*6167eca2Schristos 	 * first close out the element scopes (if applicable),
84*6167eca2Schristos 	 * then close out the block scope (also if applicable).
854154958bSjoerg 	 */
864154958bSjoerg 
87fec65c98Schristos 	if (man->flags & MAN_ELINE) {
88*6167eca2Schristos 		while (man->last->parent->type != ROFFT_ROOT &&
89*6167eca2Schristos 		    man_macro(man->last->parent->tok)->flags & MAN_ESCOPED)
90fec65c98Schristos 			man_unscope(man, man->last->parent);
91*6167eca2Schristos 		man->flags &= ~MAN_ELINE;
924154958bSjoerg 	}
93fec65c98Schristos 	if ( ! (man->flags & MAN_BLINE))
94fec65c98Schristos 		return;
95fec65c98Schristos 	man_unscope(man, man->last->parent);
96f47368cfSchristos 	roff_body_alloc(man, line, offs, man->last->tok);
97*6167eca2Schristos 	man->flags &= ~(MAN_BLINE | ROFF_NONOFILL);
984154958bSjoerg }
994154958bSjoerg 
10022af4063Sjoerg static int
man_ptext(struct roff_man * man,int line,char * buf,int offs)101f47368cfSchristos man_ptext(struct roff_man *man, int line, char *buf, int offs)
10222af4063Sjoerg {
103c0d9444aSjoerg 	int		 i;
10414e7489eSchristos 	char		*ep;
1050a84adc5Sjoerg 
106*6167eca2Schristos 	/* In no-fill mode, whitespace is preserved on text lines. */
107c0d9444aSjoerg 
108*6167eca2Schristos 	if (man->flags & ROFF_NOFILL) {
109f47368cfSchristos 		roff_word_alloc(man, line, offs, buf + offs);
110*6167eca2Schristos 		man_descope(man, line, offs, buf + offs);
111f47368cfSchristos 		return 1;
112c0d9444aSjoerg 	}
113c0d9444aSjoerg 
114fec65c98Schristos 	for (i = offs; buf[i] == ' '; i++)
115c0d9444aSjoerg 		/* Skip leading whitespace. */ ;
116c0d9444aSjoerg 
11770f041f9Sjoerg 	/*
11814e7489eSchristos 	 * Blank lines are ignored in next line scope
11914e7489eSchristos 	 * and right after headings and cancel preceding \c,
12070f041f9Sjoerg 	 * but add a single vertical space elsewhere.
12170f041f9Sjoerg 	 */
12270f041f9Sjoerg 
123fec65c98Schristos 	if (buf[i] == '\0') {
12414e7489eSchristos 		if (man->flags & (MAN_ELINE | MAN_BLINE)) {
125*6167eca2Schristos 			mandoc_msg(MANDOCERR_BLK_BLANK, line, 0, NULL);
12614e7489eSchristos 			return 1;
12770f041f9Sjoerg 		}
12814e7489eSchristos 		if (man->last->tok == MAN_SH || man->last->tok == MAN_SS)
12914e7489eSchristos 			return 1;
130*6167eca2Schristos 		if (man->last->type == ROFFT_TEXT &&
131*6167eca2Schristos 		    ((ep = man_hasc(man->last->string)) != NULL)) {
13214e7489eSchristos 			*ep = '\0';
13314e7489eSchristos 			return 1;
13414e7489eSchristos 		}
13514e7489eSchristos 		roff_elem_alloc(man, line, offs, ROFF_sp);
13614e7489eSchristos 		man->next = ROFF_NEXT_SIBLING;
137f47368cfSchristos 		return 1;
138c0d9444aSjoerg 	}
139c0d9444aSjoerg 
140c0d9444aSjoerg 	/*
141c0d9444aSjoerg 	 * Warn if the last un-escaped character is whitespace. Then
142c0d9444aSjoerg 	 * strip away the remaining spaces (tabs stay!).
143c0d9444aSjoerg 	 */
144c0d9444aSjoerg 
145c0d9444aSjoerg 	i = (int)strlen(buf);
146c0d9444aSjoerg 	assert(i);
147c0d9444aSjoerg 
148c0d9444aSjoerg 	if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
149c0d9444aSjoerg 		if (i > 1 && '\\' != buf[i - 2])
150*6167eca2Schristos 			mandoc_msg(MANDOCERR_SPACE_EOL, line, i - 1, NULL);
151c0d9444aSjoerg 
152c0d9444aSjoerg 		for (--i; i && ' ' == buf[i]; i--)
153c0d9444aSjoerg 			/* Spin back to non-space. */ ;
154c0d9444aSjoerg 
155c0d9444aSjoerg 		/* Jump ahead of escaped whitespace. */
156c0d9444aSjoerg 		i += '\\' == buf[i] ? 2 : 1;
157c0d9444aSjoerg 
158c0d9444aSjoerg 		buf[i] = '\0';
159c0d9444aSjoerg 	}
160f47368cfSchristos 	roff_word_alloc(man, line, offs, buf + offs);
161c0d9444aSjoerg 
162c0d9444aSjoerg 	/*
163c0d9444aSjoerg 	 * End-of-sentence check.  If the last character is an unescaped
164c0d9444aSjoerg 	 * EOS character, then flag the node as being the end of a
165c0d9444aSjoerg 	 * sentence.  The front-end will know how to interpret this.
166c0d9444aSjoerg 	 */
167c0d9444aSjoerg 
168c0d9444aSjoerg 	assert(i);
16970f041f9Sjoerg 	if (mandoc_eos(buf, (size_t)i))
17037ef69edSchristos 		man->last->flags |= NODE_EOS;
171c0d9444aSjoerg 
172*6167eca2Schristos 	man_descope(man, line, offs, buf + offs);
173f47368cfSchristos 	return 1;
17422af4063Sjoerg }
17522af4063Sjoerg 
176c0d9444aSjoerg static int
man_pmacro(struct roff_man * man,int ln,char * buf,int offs)177f47368cfSchristos man_pmacro(struct roff_man *man, int ln, char *buf, int offs)
1784154958bSjoerg {
179f47368cfSchristos 	struct roff_node *n;
180fec65c98Schristos 	const char	*cp;
18114e7489eSchristos 	size_t		 sz;
18214e7489eSchristos 	enum roff_tok	 tok;
18314e7489eSchristos 	int		 ppos;
184fec65c98Schristos 	int		 bline;
18514e7489eSchristos 
18614e7489eSchristos 	/* Determine the line macro. */
1874154958bSjoerg 
18848741257Sjoerg 	ppos = offs;
18914e7489eSchristos 	tok = TOKEN_NONE;
19014e7489eSchristos 	for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++)
19114e7489eSchristos 		offs++;
19214e7489eSchristos 	if (sz > 0 && sz < 4)
19314e7489eSchristos 		tok = roffhash_find(man->manmac, buf + ppos, sz);
194f47368cfSchristos 	if (tok == TOKEN_NONE) {
195*6167eca2Schristos 		mandoc_msg(MANDOCERR_MACRO, ln, ppos, "%s", buf + ppos - 1);
196f47368cfSchristos 		return 1;
1974154958bSjoerg 	}
1984154958bSjoerg 
199fec65c98Schristos 	/* Skip a leading escape sequence or tab. */
2004154958bSjoerg 
201fec65c98Schristos 	switch (buf[offs]) {
202fec65c98Schristos 	case '\\':
203fec65c98Schristos 		cp = buf + offs + 1;
204fec65c98Schristos 		mandoc_escape(&cp, NULL, NULL);
205fec65c98Schristos 		offs = cp - buf;
206fec65c98Schristos 		break;
207fec65c98Schristos 	case '\t':
208fec65c98Schristos 		offs++;
209fec65c98Schristos 		break;
210fec65c98Schristos 	default:
211fec65c98Schristos 		break;
212fec65c98Schristos 	}
213fec65c98Schristos 
214fec65c98Schristos 	/* Jump to the next non-whitespace word. */
215fec65c98Schristos 
21614e7489eSchristos 	while (buf[offs] == ' ')
21748741257Sjoerg 		offs++;
2184154958bSjoerg 
2190a84adc5Sjoerg 	/*
2200a84adc5Sjoerg 	 * Trailing whitespace.  Note that tabs are allowed to be passed
2210a84adc5Sjoerg 	 * into the parser as "text", so we only warn about spaces here.
2220a84adc5Sjoerg 	 */
223d5e63c8dSjoerg 
224fec65c98Schristos 	if (buf[offs] == '\0' && buf[offs - 1] == ' ')
225*6167eca2Schristos 		mandoc_msg(MANDOCERR_SPACE_EOL, ln, offs - 1, NULL);
226d5e63c8dSjoerg 
22731e1f4e3Sjoerg 	/*
228fec65c98Schristos 	 * Some macros break next-line scopes; otherwise, remember
229fec65c98Schristos 	 * whether we are in next-line scope for a block head.
23031e1f4e3Sjoerg 	 */
2314154958bSjoerg 
232fec65c98Schristos 	man_breakscope(man, tok);
233fec65c98Schristos 	bline = man->flags & MAN_BLINE;
234fec65c98Schristos 
23514e7489eSchristos 	/*
23614e7489eSchristos 	 * If the line in next-line scope ends with \c, keep the
23714e7489eSchristos 	 * next-line scope open for the subsequent input line.
23814e7489eSchristos 	 * That is not at all portable, only groff >= 1.22.4
23914e7489eSchristos 	 * does it, but *if* this weird idiom occurs in a manual
24014e7489eSchristos 	 * page, that's very likely what the author intended.
24114e7489eSchristos 	 */
24214e7489eSchristos 
243*6167eca2Schristos 	if (bline && man_hasc(buf + offs))
24414e7489eSchristos 		bline = 0;
24514e7489eSchristos 
246fec65c98Schristos 	/* Call to handler... */
247fec65c98Schristos 
248*6167eca2Schristos 	(*man_macro(tok)->fp)(man, tok, ln, ppos, &offs, buf);
249fec65c98Schristos 
250fec65c98Schristos 	/* In quick mode (for mandocdb), abort after the NAME section. */
251fec65c98Schristos 
252fec65c98Schristos 	if (man->quick && tok == MAN_SH) {
25370f041f9Sjoerg 		n = man->last;
254f47368cfSchristos 		if (n->type == ROFFT_BODY &&
255fec65c98Schristos 		    strcmp(n->prev->child->string, "NAME"))
256f47368cfSchristos 			return 2;
257fec65c98Schristos 	}
25831e1f4e3Sjoerg 
259fec65c98Schristos 	/*
260fec65c98Schristos 	 * If we are in a next-line scope for a block head,
261fec65c98Schristos 	 * close it out now and switch to the body,
262fec65c98Schristos 	 * unless the next-line scope is allowed to continue.
263fec65c98Schristos 	 */
26431e1f4e3Sjoerg 
265*6167eca2Schristos 	if (bline == 0 ||
266*6167eca2Schristos 	    (man->flags & MAN_BLINE) == 0 ||
267*6167eca2Schristos 	    man->flags & MAN_ELINE ||
268*6167eca2Schristos 	    man_macro(tok)->flags & MAN_NSCOPED)
269f47368cfSchristos 		return 1;
270fec65c98Schristos 
271fec65c98Schristos 	man_unscope(man, man->last->parent);
272f47368cfSchristos 	roff_body_alloc(man, ln, ppos, man->last->tok);
273*6167eca2Schristos 	man->flags &= ~(MAN_BLINE | ROFF_NONOFILL);
274f47368cfSchristos 	return 1;
275fec65c98Schristos }
276fec65c98Schristos 
277fec65c98Schristos void
man_breakscope(struct roff_man * man,int tok)278f47368cfSchristos man_breakscope(struct roff_man *man, int tok)
279fec65c98Schristos {
280f47368cfSchristos 	struct roff_node *n;
281fec65c98Schristos 
282fec65c98Schristos 	/*
283fec65c98Schristos 	 * An element next line scope is open,
284fec65c98Schristos 	 * and the new macro is not allowed inside elements.
285fec65c98Schristos 	 * Delete the element that is being broken.
286fec65c98Schristos 	 */
287fec65c98Schristos 
28814e7489eSchristos 	if (man->flags & MAN_ELINE && (tok < MAN_TH ||
289*6167eca2Schristos 	    (man_macro(tok)->flags & MAN_NSCOPED) == 0)) {
290fec65c98Schristos 		n = man->last;
29114e7489eSchristos 		if (n->type == ROFFT_TEXT)
29214e7489eSchristos 			n = n->parent;
29314e7489eSchristos 		if (n->tok < MAN_TH ||
294*6167eca2Schristos 		    (man_macro(n->tok)->flags & (MAN_NSCOPED | MAN_ESCOPED))
295*6167eca2Schristos 		     == MAN_NSCOPED)
296c0d9444aSjoerg 			n = n->parent;
297c0d9444aSjoerg 
298*6167eca2Schristos 		mandoc_msg(MANDOCERR_BLK_LINE, n->line, n->pos,
299*6167eca2Schristos 		    "%s breaks %s", roff_name[tok], roff_name[n->tok]);
3004154958bSjoerg 
301f47368cfSchristos 		roff_node_delete(man, n);
30270f041f9Sjoerg 		man->flags &= ~MAN_ELINE;
3034154958bSjoerg 	}
3044154958bSjoerg 
30531e1f4e3Sjoerg 	/*
306f47368cfSchristos 	 * Weird special case:
307f47368cfSchristos 	 * Switching fill mode closes section headers.
308f47368cfSchristos 	 */
309f47368cfSchristos 
310f47368cfSchristos 	if (man->flags & MAN_BLINE &&
311*6167eca2Schristos 	    (tok == ROFF_nf || tok == ROFF_fi) &&
312f47368cfSchristos 	    (man->last->tok == MAN_SH || man->last->tok == MAN_SS)) {
313f47368cfSchristos 		n = man->last;
314f47368cfSchristos 		man_unscope(man, n);
315f47368cfSchristos 		roff_body_alloc(man, n->line, n->pos, n->tok);
316*6167eca2Schristos 		man->flags &= ~(MAN_BLINE | ROFF_NONOFILL);
317f47368cfSchristos 	}
318f47368cfSchristos 
319f47368cfSchristos 	/*
320fec65c98Schristos 	 * A block header next line scope is open,
321fec65c98Schristos 	 * and the new macro is not allowed inside block headers.
322fec65c98Schristos 	 * Delete the block that is being broken.
323cf816816Sjoerg 	 */
324fec65c98Schristos 
325*6167eca2Schristos 	if (man->flags & MAN_BLINE && tok != ROFF_nf && tok != ROFF_fi &&
326*6167eca2Schristos 	    (tok < MAN_TH || man_macro(tok)->flags & MAN_XSCOPE)) {
32770f041f9Sjoerg 		n = man->last;
328f47368cfSchristos 		if (n->type == ROFFT_TEXT)
329fec65c98Schristos 			n = n->parent;
33014e7489eSchristos 		if (n->tok < MAN_TH ||
331*6167eca2Schristos 		    (man_macro(n->tok)->flags & MAN_XSCOPE) == 0)
332cf816816Sjoerg 			n = n->parent;
333cf816816Sjoerg 
334f47368cfSchristos 		assert(n->type == ROFFT_HEAD);
335cf816816Sjoerg 		n = n->parent;
336f47368cfSchristos 		assert(n->type == ROFFT_BLOCK);
337*6167eca2Schristos 		assert(man_macro(n->tok)->flags & MAN_BSCOPED);
338cf816816Sjoerg 
339*6167eca2Schristos 		mandoc_msg(MANDOCERR_BLK_LINE, n->line, n->pos,
340*6167eca2Schristos 		    "%s breaks %s", roff_name[tok], roff_name[n->tok]);
341cf816816Sjoerg 
342f47368cfSchristos 		roff_node_delete(man, n);
343*6167eca2Schristos 		man->flags &= ~(MAN_BLINE | ROFF_NONOFILL);
344cf816816Sjoerg 	}
3454154958bSjoerg }
346