xref: /openbsd-src/usr.bin/mandoc/man.c (revision ae3cb403620ab940fbaabb3055fac045a63d56b7)
1 /*	$OpenBSD: man.c,v 1.124 2017/06/28 12:52:27 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include <sys/types.h>
20 
21 #include <assert.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27 
28 #include "mandoc_aux.h"
29 #include "mandoc.h"
30 #include "roff.h"
31 #include "man.h"
32 #include "libmandoc.h"
33 #include "roff_int.h"
34 #include "libman.h"
35 
36 static	void		 man_descope(struct roff_man *, int, int);
37 static	int		 man_ptext(struct roff_man *, int, char *, int);
38 static	int		 man_pmacro(struct roff_man *, int, char *, int);
39 
40 
41 int
42 man_parseln(struct roff_man *man, int ln, char *buf, int offs)
43 {
44 
45 	if (man->last->type != ROFFT_EQN || ln > man->last->line)
46 		man->flags |= MAN_NEWLINE;
47 
48 	return roff_getcontrol(man->roff, buf, &offs) ?
49 	    man_pmacro(man, ln, buf, offs) :
50 	    man_ptext(man, ln, buf, offs);
51 }
52 
53 static void
54 man_descope(struct roff_man *man, int line, int offs)
55 {
56 	/*
57 	 * Co-ordinate what happens with having a next-line scope open:
58 	 * first close out the element scope (if applicable), then close
59 	 * out the block scope (also if applicable).
60 	 */
61 
62 	if (man->flags & MAN_ELINE) {
63 		man->flags &= ~MAN_ELINE;
64 		man_unscope(man, man->last->parent);
65 	}
66 	if ( ! (man->flags & MAN_BLINE))
67 		return;
68 	man->flags &= ~MAN_BLINE;
69 	man_unscope(man, man->last->parent);
70 	roff_body_alloc(man, line, offs, man->last->tok);
71 }
72 
73 static int
74 man_ptext(struct roff_man *man, int line, char *buf, int offs)
75 {
76 	int		 i;
77 	const char 	*cp, *sp;
78 	char		*ep;
79 
80 	/* Literal free-form text whitespace is preserved. */
81 
82 	if (man->flags & MAN_LITERAL) {
83 		roff_word_alloc(man, line, offs, buf + offs);
84 		man_descope(man, line, offs);
85 		return 1;
86 	}
87 
88 	for (i = offs; buf[i] == ' '; i++)
89 		/* Skip leading whitespace. */ ;
90 
91 	/*
92 	 * Blank lines are ignored in next line scope
93 	 * and right after headings and cancel preceding \c,
94 	 * but add a single vertical space elsewhere.
95 	 */
96 
97 	if (buf[i] == '\0') {
98 		if (man->flags & (MAN_ELINE | MAN_BLINE)) {
99 			mandoc_msg(MANDOCERR_BLK_BLANK, man->parse,
100 			    line, 0, NULL);
101 			return 1;
102 		}
103 		if (man->last->tok == MAN_SH || man->last->tok == MAN_SS)
104 			return 1;
105 		switch (man->last->type) {
106 		case ROFFT_TEXT:
107 			sp = man->last->string;
108 			cp = ep = strchr(sp, '\0') - 2;
109 			if (cp < sp || cp[0] != '\\' || cp[1] != 'c')
110 				break;
111 			while (cp > sp && cp[-1] == '\\')
112 				cp--;
113 			if ((ep - cp) % 2)
114 				break;
115 			*ep = '\0';
116 			return 1;
117 		default:
118 			break;
119 		}
120 		roff_elem_alloc(man, line, offs, ROFF_sp);
121 		man->next = ROFF_NEXT_SIBLING;
122 		return 1;
123 	}
124 
125 	/*
126 	 * Warn if the last un-escaped character is whitespace. Then
127 	 * strip away the remaining spaces (tabs stay!).
128 	 */
129 
130 	i = (int)strlen(buf);
131 	assert(i);
132 
133 	if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
134 		if (i > 1 && '\\' != buf[i - 2])
135 			mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
136 			    line, i - 1, NULL);
137 
138 		for (--i; i && ' ' == buf[i]; i--)
139 			/* Spin back to non-space. */ ;
140 
141 		/* Jump ahead of escaped whitespace. */
142 		i += '\\' == buf[i] ? 2 : 1;
143 
144 		buf[i] = '\0';
145 	}
146 	roff_word_alloc(man, line, offs, buf + offs);
147 
148 	/*
149 	 * End-of-sentence check.  If the last character is an unescaped
150 	 * EOS character, then flag the node as being the end of a
151 	 * sentence.  The front-end will know how to interpret this.
152 	 */
153 
154 	assert(i);
155 	if (mandoc_eos(buf, (size_t)i))
156 		man->last->flags |= NODE_EOS;
157 
158 	man_descope(man, line, offs);
159 	return 1;
160 }
161 
162 static int
163 man_pmacro(struct roff_man *man, int ln, char *buf, int offs)
164 {
165 	struct roff_node *n;
166 	const char	*cp;
167 	size_t		 sz;
168 	enum roff_tok	 tok;
169 	int		 ppos;
170 	int		 bline;
171 
172 	/* Determine the line macro. */
173 
174 	ppos = offs;
175 	tok = TOKEN_NONE;
176 	for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++)
177 		offs++;
178 	if (sz > 0 && sz < 4)
179 		tok = roffhash_find(man->manmac, buf + ppos, sz);
180 	if (tok == TOKEN_NONE) {
181 		mandoc_msg(MANDOCERR_MACRO, man->parse,
182 		    ln, ppos, buf + ppos - 1);
183 		return 1;
184 	}
185 
186 	/* Skip a leading escape sequence or tab. */
187 
188 	switch (buf[offs]) {
189 	case '\\':
190 		cp = buf + offs + 1;
191 		mandoc_escape(&cp, NULL, NULL);
192 		offs = cp - buf;
193 		break;
194 	case '\t':
195 		offs++;
196 		break;
197 	default:
198 		break;
199 	}
200 
201 	/* Jump to the next non-whitespace word. */
202 
203 	while (buf[offs] == ' ')
204 		offs++;
205 
206 	/*
207 	 * Trailing whitespace.  Note that tabs are allowed to be passed
208 	 * into the parser as "text", so we only warn about spaces here.
209 	 */
210 
211 	if (buf[offs] == '\0' && buf[offs - 1] == ' ')
212 		mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
213 		    ln, offs - 1, NULL);
214 
215 	/*
216 	 * Some macros break next-line scopes; otherwise, remember
217 	 * whether we are in next-line scope for a block head.
218 	 */
219 
220 	man_breakscope(man, tok);
221 	bline = man->flags & MAN_BLINE;
222 
223 	/*
224 	 * If the line in next-line scope ends with \c, keep the
225 	 * next-line scope open for the subsequent input line.
226 	 * That is not at all portable, only groff >= 1.22.4
227 	 * does it, but *if* this weird idiom occurs in a manual
228 	 * page, that's very likely what the author intended.
229 	 */
230 
231 	if (bline) {
232 		cp = strchr(buf + offs, '\0') - 2;
233 		if (cp >= buf && cp[0] == '\\' && cp[1] == 'c')
234 			bline = 0;
235 	}
236 
237 	/* Call to handler... */
238 
239 	assert(man_macros[tok].fp);
240 	(*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf);
241 
242 	/* In quick mode (for mandocdb), abort after the NAME section. */
243 
244 	if (man->quick && tok == MAN_SH) {
245 		n = man->last;
246 		if (n->type == ROFFT_BODY &&
247 		    strcmp(n->prev->child->string, "NAME"))
248 			return 2;
249 	}
250 
251 	/*
252 	 * If we are in a next-line scope for a block head,
253 	 * close it out now and switch to the body,
254 	 * unless the next-line scope is allowed to continue.
255 	 */
256 
257 	if ( ! bline || man->flags & MAN_ELINE ||
258 	    man_macros[tok].flags & MAN_NSCOPED)
259 		return 1;
260 
261 	assert(man->flags & MAN_BLINE);
262 	man->flags &= ~MAN_BLINE;
263 
264 	man_unscope(man, man->last->parent);
265 	roff_body_alloc(man, ln, ppos, man->last->tok);
266 	return 1;
267 }
268 
269 void
270 man_breakscope(struct roff_man *man, int tok)
271 {
272 	struct roff_node *n;
273 
274 	/*
275 	 * An element next line scope is open,
276 	 * and the new macro is not allowed inside elements.
277 	 * Delete the element that is being broken.
278 	 */
279 
280 	if (man->flags & MAN_ELINE && (tok < MAN_TH ||
281 	    ! (man_macros[tok].flags & MAN_NSCOPED))) {
282 		n = man->last;
283 		if (n->type == ROFFT_TEXT)
284 			n = n->parent;
285 		if (n->tok < MAN_TH ||
286 		    man_macros[n->tok].flags & MAN_NSCOPED)
287 			n = n->parent;
288 
289 		mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse,
290 		    n->line, n->pos, "%s breaks %s",
291 		    roff_name[tok], roff_name[n->tok]);
292 
293 		roff_node_delete(man, n);
294 		man->flags &= ~MAN_ELINE;
295 	}
296 
297 	/*
298 	 * Weird special case:
299 	 * Switching fill mode closes section headers.
300 	 */
301 
302 	if (man->flags & MAN_BLINE &&
303 	    (tok == MAN_nf || tok == MAN_fi) &&
304 	    (man->last->tok == MAN_SH || man->last->tok == MAN_SS)) {
305 		n = man->last;
306 		man_unscope(man, n);
307 		roff_body_alloc(man, n->line, n->pos, n->tok);
308 		man->flags &= ~MAN_BLINE;
309 	}
310 
311 	/*
312 	 * A block header next line scope is open,
313 	 * and the new macro is not allowed inside block headers.
314 	 * Delete the block that is being broken.
315 	 */
316 
317 	if (man->flags & MAN_BLINE && (tok < MAN_TH ||
318 	    man_macros[tok].flags & MAN_BSCOPE)) {
319 		n = man->last;
320 		if (n->type == ROFFT_TEXT)
321 			n = n->parent;
322 		if (n->tok < MAN_TH ||
323 		    (man_macros[n->tok].flags & MAN_BSCOPE) == 0)
324 			n = n->parent;
325 
326 		assert(n->type == ROFFT_HEAD);
327 		n = n->parent;
328 		assert(n->type == ROFFT_BLOCK);
329 		assert(man_macros[n->tok].flags & MAN_SCOPED);
330 
331 		mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse,
332 		    n->line, n->pos, "%s breaks %s",
333 		    roff_name[tok], roff_name[n->tok]);
334 
335 		roff_node_delete(man, n);
336 		man->flags &= ~MAN_BLINE;
337 	}
338 }
339 
340 const struct mparse *
341 man_mparse(const struct roff_man *man)
342 {
343 
344 	assert(man && man->parse);
345 	return man->parse;
346 }
347 
348 void
349 man_state(struct roff_man *man, struct roff_node *n)
350 {
351 
352 	switch(n->tok) {
353 	case MAN_nf:
354 	case MAN_EX:
355 		if (man->flags & MAN_LITERAL && ! (n->flags & NODE_VALID))
356 			mandoc_msg(MANDOCERR_NF_SKIP, man->parse,
357 			    n->line, n->pos, "nf");
358 		man->flags |= MAN_LITERAL;
359 		break;
360 	case MAN_fi:
361 	case MAN_EE:
362 		if ( ! (man->flags & MAN_LITERAL) &&
363 		     ! (n->flags & NODE_VALID))
364 			mandoc_msg(MANDOCERR_FI_SKIP, man->parse,
365 			    n->line, n->pos, "fi");
366 		man->flags &= ~MAN_LITERAL;
367 		break;
368 	default:
369 		break;
370 	}
371 	man->last->flags |= NODE_VALID;
372 }
373 
374 void
375 man_validate(struct roff_man *man)
376 {
377 
378 	man->last = man->first;
379 	man_node_validate(man);
380 	man->flags &= ~MAN_LITERAL;
381 }
382