xref: /openbsd-src/usr.bin/mandoc/man_validate.c (revision f933361f20df4def12f9bc8391f68d6bfad3bb75)
1 /*	$OpenBSD: man_validate.c,v 1.101 2017/06/17 22:40:27 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <stdarg.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <time.h>
28 
29 #include "mandoc_aux.h"
30 #include "mandoc.h"
31 #include "roff.h"
32 #include "man.h"
33 #include "libmandoc.h"
34 #include "roff_int.h"
35 #include "libman.h"
36 
37 #define	CHKARGS	  struct roff_man *man, struct roff_node *n
38 
39 typedef	void	(*v_check)(CHKARGS);
40 
41 static	void	  check_par(CHKARGS);
42 static	void	  check_part(CHKARGS);
43 static	void	  check_root(CHKARGS);
44 static	void	  check_text(CHKARGS);
45 
46 static	void	  post_AT(CHKARGS);
47 static	void	  post_IP(CHKARGS);
48 static	void	  post_OP(CHKARGS);
49 static	void	  post_TH(CHKARGS);
50 static	void	  post_UC(CHKARGS);
51 static	void	  post_UR(CHKARGS);
52 static	void	  post_in(CHKARGS);
53 static	void	  post_vs(CHKARGS);
54 
55 static	const v_check __man_valids[MAN_MAX - MAN_TH] = {
56 	post_TH,    /* TH */
57 	NULL,       /* SH */
58 	NULL,       /* SS */
59 	NULL,       /* TP */
60 	check_par,  /* LP */
61 	check_par,  /* PP */
62 	check_par,  /* P */
63 	post_IP,    /* IP */
64 	NULL,       /* HP */
65 	NULL,       /* SM */
66 	NULL,       /* SB */
67 	NULL,       /* BI */
68 	NULL,       /* IB */
69 	NULL,       /* BR */
70 	NULL,       /* RB */
71 	NULL,       /* R */
72 	NULL,       /* B */
73 	NULL,       /* I */
74 	NULL,       /* IR */
75 	NULL,       /* RI */
76 	NULL,       /* nf */
77 	NULL,       /* fi */
78 	NULL,       /* RE */
79 	check_part, /* RS */
80 	NULL,       /* DT */
81 	post_UC,    /* UC */
82 	NULL,       /* PD */
83 	post_AT,    /* AT */
84 	post_in,    /* in */
85 	post_OP,    /* OP */
86 	NULL,       /* EX */
87 	NULL,       /* EE */
88 	post_UR,    /* UR */
89 	NULL,       /* UE */
90 };
91 static	const v_check *man_valids = __man_valids - MAN_TH;
92 
93 
94 void
95 man_node_validate(struct roff_man *man)
96 {
97 	struct roff_node *n;
98 	const v_check	 *cp;
99 
100 	n = man->last;
101 	man->last = man->last->child;
102 	while (man->last != NULL) {
103 		man_node_validate(man);
104 		if (man->last == n)
105 			man->last = man->last->child;
106 		else
107 			man->last = man->last->next;
108 	}
109 
110 	man->last = n;
111 	man->next = ROFF_NEXT_SIBLING;
112 	switch (n->type) {
113 	case ROFFT_TEXT:
114 		check_text(man, n);
115 		break;
116 	case ROFFT_ROOT:
117 		check_root(man, n);
118 		break;
119 	case ROFFT_EQN:
120 	case ROFFT_TBL:
121 		break;
122 	default:
123 		if (n->tok < ROFF_MAX) {
124 			switch (n->tok) {
125 			case ROFF_br:
126 			case ROFF_sp:
127 				post_vs(man, n);
128 				break;
129 			default:
130 				roff_validate(man);
131 				break;
132 			}
133 			break;
134 		}
135 		assert(n->tok >= MAN_TH && n->tok < MAN_MAX);
136 		cp = man_valids + n->tok;
137 		if (*cp)
138 			(*cp)(man, n);
139 		if (man->last == n)
140 			man_state(man, n);
141 		break;
142 	}
143 }
144 
145 static void
146 check_root(CHKARGS)
147 {
148 
149 	assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0);
150 
151 	if (NULL == man->first->child)
152 		mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse,
153 		    n->line, n->pos, NULL);
154 	else
155 		man->meta.hasbody = 1;
156 
157 	if (NULL == man->meta.title) {
158 		mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
159 		    n->line, n->pos, NULL);
160 
161 		/*
162 		 * If a title hasn't been set, do so now (by
163 		 * implication, date and section also aren't set).
164 		 */
165 
166 		man->meta.title = mandoc_strdup("");
167 		man->meta.msec = mandoc_strdup("");
168 		man->meta.date = man->quick ? mandoc_strdup("") :
169 		    mandoc_normdate(man, NULL, n->line, n->pos);
170 	}
171 
172 	if (man->meta.os_e &&
173 	    (man->meta.rcsids & (1 << man->meta.os_e)) == 0)
174 		mandoc_msg(MANDOCERR_RCS_MISSING, man->parse, 0, 0, NULL);
175 }
176 
177 static void
178 check_text(CHKARGS)
179 {
180 	char		*cp, *p;
181 
182 	if (MAN_LITERAL & man->flags)
183 		return;
184 
185 	cp = n->string;
186 	for (p = cp; NULL != (p = strchr(p, '\t')); p++)
187 		mandoc_msg(MANDOCERR_FI_TAB, man->parse,
188 		    n->line, n->pos + (p - cp), NULL);
189 }
190 
191 static void
192 post_OP(CHKARGS)
193 {
194 
195 	if (n->child == NULL)
196 		mandoc_msg(MANDOCERR_OP_EMPTY, man->parse,
197 		    n->line, n->pos, "OP");
198 	else if (n->child->next != NULL && n->child->next->next != NULL) {
199 		n = n->child->next->next;
200 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse,
201 		    n->line, n->pos, "OP ... %s", n->string);
202 	}
203 }
204 
205 static void
206 post_UR(CHKARGS)
207 {
208 
209 	if (n->type == ROFFT_HEAD && n->child == NULL)
210 		mandoc_vmsg(MANDOCERR_UR_NOHEAD, man->parse,
211 		    n->line, n->pos, "UR");
212 	check_part(man, n);
213 }
214 
215 static void
216 check_part(CHKARGS)
217 {
218 
219 	if (n->type == ROFFT_BODY && n->child == NULL)
220 		mandoc_msg(MANDOCERR_BLK_EMPTY, man->parse,
221 		    n->line, n->pos, roff_name[n->tok]);
222 }
223 
224 static void
225 check_par(CHKARGS)
226 {
227 
228 	switch (n->type) {
229 	case ROFFT_BLOCK:
230 		if (n->body->child == NULL)
231 			roff_node_delete(man, n);
232 		break;
233 	case ROFFT_BODY:
234 		if (n->child == NULL)
235 			mandoc_vmsg(MANDOCERR_PAR_SKIP,
236 			    man->parse, n->line, n->pos,
237 			    "%s empty", roff_name[n->tok]);
238 		break;
239 	case ROFFT_HEAD:
240 		if (n->child != NULL)
241 			mandoc_vmsg(MANDOCERR_ARG_SKIP,
242 			    man->parse, n->line, n->pos, "%s %s%s",
243 			    roff_name[n->tok], n->child->string,
244 			    n->child->next != NULL ? " ..." : "");
245 		break;
246 	default:
247 		break;
248 	}
249 }
250 
251 static void
252 post_IP(CHKARGS)
253 {
254 
255 	switch (n->type) {
256 	case ROFFT_BLOCK:
257 		if (n->head->child == NULL && n->body->child == NULL)
258 			roff_node_delete(man, n);
259 		break;
260 	case ROFFT_BODY:
261 		if (n->parent->head->child == NULL && n->child == NULL)
262 			mandoc_vmsg(MANDOCERR_PAR_SKIP,
263 			    man->parse, n->line, n->pos,
264 			    "%s empty", roff_name[n->tok]);
265 		break;
266 	default:
267 		break;
268 	}
269 }
270 
271 static void
272 post_TH(CHKARGS)
273 {
274 	struct roff_node *nb;
275 	const char	*p;
276 
277 	free(man->meta.title);
278 	free(man->meta.vol);
279 	free(man->meta.os);
280 	free(man->meta.msec);
281 	free(man->meta.date);
282 
283 	man->meta.title = man->meta.vol = man->meta.date =
284 	    man->meta.msec = man->meta.os = NULL;
285 
286 	nb = n;
287 
288 	/* ->TITLE<- MSEC DATE OS VOL */
289 
290 	n = n->child;
291 	if (n && n->string) {
292 		for (p = n->string; '\0' != *p; p++) {
293 			/* Only warn about this once... */
294 			if (isalpha((unsigned char)*p) &&
295 			    ! isupper((unsigned char)*p)) {
296 				mandoc_vmsg(MANDOCERR_TITLE_CASE,
297 				    man->parse, n->line,
298 				    n->pos + (p - n->string),
299 				    "TH %s", n->string);
300 				break;
301 			}
302 		}
303 		man->meta.title = mandoc_strdup(n->string);
304 	} else {
305 		man->meta.title = mandoc_strdup("");
306 		mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
307 		    nb->line, nb->pos, "TH");
308 	}
309 
310 	/* TITLE ->MSEC<- DATE OS VOL */
311 
312 	if (n)
313 		n = n->next;
314 	if (n && n->string)
315 		man->meta.msec = mandoc_strdup(n->string);
316 	else {
317 		man->meta.msec = mandoc_strdup("");
318 		mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse,
319 		    nb->line, nb->pos, "TH %s", man->meta.title);
320 	}
321 
322 	/* TITLE MSEC ->DATE<- OS VOL */
323 
324 	if (n)
325 		n = n->next;
326 	if (n && n->string && '\0' != n->string[0]) {
327 		man->meta.date = man->quick ?
328 		    mandoc_strdup(n->string) :
329 		    mandoc_normdate(man, n->string, n->line, n->pos);
330 	} else {
331 		man->meta.date = mandoc_strdup("");
332 		mandoc_msg(MANDOCERR_DATE_MISSING, man->parse,
333 		    n ? n->line : nb->line,
334 		    n ? n->pos : nb->pos, "TH");
335 	}
336 
337 	/* TITLE MSEC DATE ->OS<- VOL */
338 
339 	if (n && (n = n->next))
340 		man->meta.os = mandoc_strdup(n->string);
341 	else if (man->defos != NULL)
342 		man->meta.os = mandoc_strdup(man->defos);
343 	man->meta.os_e = man->meta.os == NULL ? MDOC_OS_OTHER :
344 	    strstr(man->meta.os, "OpenBSD") != NULL ? MDOC_OS_OPENBSD :
345 	    strstr(man->meta.os, "NetBSD") != NULL ? MDOC_OS_NETBSD :
346 	    MDOC_OS_OTHER;
347 
348 	/* TITLE MSEC DATE OS ->VOL<- */
349 	/* If missing, use the default VOL name for MSEC. */
350 
351 	if (n && (n = n->next))
352 		man->meta.vol = mandoc_strdup(n->string);
353 	else if ('\0' != man->meta.msec[0] &&
354 	    (NULL != (p = mandoc_a2msec(man->meta.msec))))
355 		man->meta.vol = mandoc_strdup(p);
356 
357 	if (n != NULL && (n = n->next) != NULL)
358 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse,
359 		    n->line, n->pos, "TH ... %s", n->string);
360 
361 	/*
362 	 * Remove the `TH' node after we've processed it for our
363 	 * meta-data.
364 	 */
365 	roff_node_delete(man, man->last);
366 }
367 
368 static void
369 post_UC(CHKARGS)
370 {
371 	static const char * const bsd_versions[] = {
372 	    "3rd Berkeley Distribution",
373 	    "4th Berkeley Distribution",
374 	    "4.2 Berkeley Distribution",
375 	    "4.3 Berkeley Distribution",
376 	    "4.4 Berkeley Distribution",
377 	};
378 
379 	const char	*p, *s;
380 
381 	n = n->child;
382 
383 	if (n == NULL || n->type != ROFFT_TEXT)
384 		p = bsd_versions[0];
385 	else {
386 		s = n->string;
387 		if (0 == strcmp(s, "3"))
388 			p = bsd_versions[0];
389 		else if (0 == strcmp(s, "4"))
390 			p = bsd_versions[1];
391 		else if (0 == strcmp(s, "5"))
392 			p = bsd_versions[2];
393 		else if (0 == strcmp(s, "6"))
394 			p = bsd_versions[3];
395 		else if (0 == strcmp(s, "7"))
396 			p = bsd_versions[4];
397 		else
398 			p = bsd_versions[0];
399 	}
400 
401 	free(man->meta.os);
402 	man->meta.os = mandoc_strdup(p);
403 }
404 
405 static void
406 post_AT(CHKARGS)
407 {
408 	static const char * const unix_versions[] = {
409 	    "7th Edition",
410 	    "System III",
411 	    "System V",
412 	    "System V Release 2",
413 	};
414 
415 	struct roff_node *nn;
416 	const char	*p, *s;
417 
418 	n = n->child;
419 
420 	if (n == NULL || n->type != ROFFT_TEXT)
421 		p = unix_versions[0];
422 	else {
423 		s = n->string;
424 		if (0 == strcmp(s, "3"))
425 			p = unix_versions[0];
426 		else if (0 == strcmp(s, "4"))
427 			p = unix_versions[1];
428 		else if (0 == strcmp(s, "5")) {
429 			nn = n->next;
430 			if (nn != NULL &&
431 			    nn->type == ROFFT_TEXT &&
432 			    nn->string[0] != '\0')
433 				p = unix_versions[3];
434 			else
435 				p = unix_versions[2];
436 		} else
437 			p = unix_versions[0];
438 	}
439 
440 	free(man->meta.os);
441 	man->meta.os = mandoc_strdup(p);
442 }
443 
444 static void
445 post_in(CHKARGS)
446 {
447 	char	*s;
448 
449 	if (n->parent->tok != MAN_TP ||
450 	    n->parent->type != ROFFT_HEAD ||
451 	    n->child == NULL ||
452 	    *n->child->string == '+' ||
453 	    *n->child->string == '-')
454 		return;
455 	mandoc_asprintf(&s, "+%s", n->child->string);
456 	free(n->child->string);
457 	n->child->string = s;
458 }
459 
460 static void
461 post_vs(CHKARGS)
462 {
463 
464 	if (NULL != n->prev)
465 		return;
466 
467 	switch (n->parent->tok) {
468 	case MAN_SH:
469 	case MAN_SS:
470 	case MAN_PP:
471 	case MAN_LP:
472 	case MAN_P:
473 		mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos,
474 		    "%s after %s", roff_name[n->tok],
475 		    roff_name[n->parent->tok]);
476 		/* FALLTHROUGH */
477 	case TOKEN_NONE:
478 		/*
479 		 * Don't warn about this because it occurs in pod2man
480 		 * and would cause considerable (unfixable) warnage.
481 		 */
482 		roff_node_delete(man, n);
483 		break;
484 	default:
485 		break;
486 	}
487 }
488