xref: /openbsd-src/usr.bin/mandoc/man_validate.c (revision ae3cb403620ab940fbaabb3055fac045a63d56b7)
1 /*	$OpenBSD: man_validate.c,v 1.104 2017/07/26 10:33:02 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <stdarg.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <time.h>
28 
29 #include "mandoc_aux.h"
30 #include "mandoc.h"
31 #include "roff.h"
32 #include "man.h"
33 #include "libmandoc.h"
34 #include "roff_int.h"
35 #include "libman.h"
36 
37 #define	CHKARGS	  struct roff_man *man, struct roff_node *n
38 
39 typedef	void	(*v_check)(CHKARGS);
40 
41 static	void	  check_par(CHKARGS);
42 static	void	  check_part(CHKARGS);
43 static	void	  check_root(CHKARGS);
44 static	void	  check_text(CHKARGS);
45 
46 static	void	  post_AT(CHKARGS);
47 static	void	  post_IP(CHKARGS);
48 static	void	  post_OP(CHKARGS);
49 static	void	  post_TH(CHKARGS);
50 static	void	  post_UC(CHKARGS);
51 static	void	  post_UR(CHKARGS);
52 static	void	  post_in(CHKARGS);
53 static	void	  post_vs(CHKARGS);
54 
55 static	const v_check __man_valids[MAN_MAX - MAN_TH] = {
56 	post_TH,    /* TH */
57 	NULL,       /* SH */
58 	NULL,       /* SS */
59 	NULL,       /* TP */
60 	check_par,  /* LP */
61 	check_par,  /* PP */
62 	check_par,  /* P */
63 	post_IP,    /* IP */
64 	NULL,       /* HP */
65 	NULL,       /* SM */
66 	NULL,       /* SB */
67 	NULL,       /* BI */
68 	NULL,       /* IB */
69 	NULL,       /* BR */
70 	NULL,       /* RB */
71 	NULL,       /* R */
72 	NULL,       /* B */
73 	NULL,       /* I */
74 	NULL,       /* IR */
75 	NULL,       /* RI */
76 	NULL,       /* nf */
77 	NULL,       /* fi */
78 	NULL,       /* RE */
79 	check_part, /* RS */
80 	NULL,       /* DT */
81 	post_UC,    /* UC */
82 	NULL,       /* PD */
83 	post_AT,    /* AT */
84 	post_in,    /* in */
85 	post_OP,    /* OP */
86 	NULL,       /* EX */
87 	NULL,       /* EE */
88 	post_UR,    /* UR */
89 	NULL,       /* UE */
90 	post_UR,    /* MT */
91 	NULL,       /* ME */
92 };
93 static	const v_check *man_valids = __man_valids - MAN_TH;
94 
95 
96 void
97 man_node_validate(struct roff_man *man)
98 {
99 	struct roff_node *n;
100 	const v_check	 *cp;
101 
102 	n = man->last;
103 	man->last = man->last->child;
104 	while (man->last != NULL) {
105 		man_node_validate(man);
106 		if (man->last == n)
107 			man->last = man->last->child;
108 		else
109 			man->last = man->last->next;
110 	}
111 
112 	man->last = n;
113 	man->next = ROFF_NEXT_SIBLING;
114 	switch (n->type) {
115 	case ROFFT_TEXT:
116 		check_text(man, n);
117 		break;
118 	case ROFFT_ROOT:
119 		check_root(man, n);
120 		break;
121 	case ROFFT_EQN:
122 	case ROFFT_TBL:
123 		break;
124 	default:
125 		if (n->tok < ROFF_MAX) {
126 			switch (n->tok) {
127 			case ROFF_br:
128 			case ROFF_sp:
129 				post_vs(man, n);
130 				break;
131 			default:
132 				roff_validate(man);
133 				break;
134 			}
135 			break;
136 		}
137 		assert(n->tok >= MAN_TH && n->tok < MAN_MAX);
138 		cp = man_valids + n->tok;
139 		if (*cp)
140 			(*cp)(man, n);
141 		if (man->last == n)
142 			man_state(man, n);
143 		break;
144 	}
145 }
146 
147 static void
148 check_root(CHKARGS)
149 {
150 
151 	assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0);
152 
153 	if (NULL == man->first->child)
154 		mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse,
155 		    n->line, n->pos, NULL);
156 	else
157 		man->meta.hasbody = 1;
158 
159 	if (NULL == man->meta.title) {
160 		mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
161 		    n->line, n->pos, NULL);
162 
163 		/*
164 		 * If a title hasn't been set, do so now (by
165 		 * implication, date and section also aren't set).
166 		 */
167 
168 		man->meta.title = mandoc_strdup("");
169 		man->meta.msec = mandoc_strdup("");
170 		man->meta.date = man->quick ? mandoc_strdup("") :
171 		    mandoc_normdate(man, NULL, n->line, n->pos);
172 	}
173 
174 	if (man->meta.os_e &&
175 	    (man->meta.rcsids & (1 << man->meta.os_e)) == 0)
176 		mandoc_msg(MANDOCERR_RCS_MISSING, man->parse, 0, 0,
177 		    man->meta.os_e == MANDOC_OS_OPENBSD ?
178 		    "(OpenBSD)" : "(NetBSD)");
179 }
180 
181 static void
182 check_text(CHKARGS)
183 {
184 	char		*cp, *p;
185 
186 	if (MAN_LITERAL & man->flags)
187 		return;
188 
189 	cp = n->string;
190 	for (p = cp; NULL != (p = strchr(p, '\t')); p++)
191 		mandoc_msg(MANDOCERR_FI_TAB, man->parse,
192 		    n->line, n->pos + (p - cp), NULL);
193 }
194 
195 static void
196 post_OP(CHKARGS)
197 {
198 
199 	if (n->child == NULL)
200 		mandoc_msg(MANDOCERR_OP_EMPTY, man->parse,
201 		    n->line, n->pos, "OP");
202 	else if (n->child->next != NULL && n->child->next->next != NULL) {
203 		n = n->child->next->next;
204 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse,
205 		    n->line, n->pos, "OP ... %s", n->string);
206 	}
207 }
208 
209 static void
210 post_UR(CHKARGS)
211 {
212 	if (n->type == ROFFT_HEAD && n->child == NULL)
213 		mandoc_msg(MANDOCERR_UR_NOHEAD, man->parse,
214 		    n->line, n->pos, roff_name[n->tok]);
215 	check_part(man, n);
216 }
217 
218 static void
219 check_part(CHKARGS)
220 {
221 
222 	if (n->type == ROFFT_BODY && n->child == NULL)
223 		mandoc_msg(MANDOCERR_BLK_EMPTY, man->parse,
224 		    n->line, n->pos, roff_name[n->tok]);
225 }
226 
227 static void
228 check_par(CHKARGS)
229 {
230 
231 	switch (n->type) {
232 	case ROFFT_BLOCK:
233 		if (n->body->child == NULL)
234 			roff_node_delete(man, n);
235 		break;
236 	case ROFFT_BODY:
237 		if (n->child == NULL)
238 			mandoc_vmsg(MANDOCERR_PAR_SKIP,
239 			    man->parse, n->line, n->pos,
240 			    "%s empty", roff_name[n->tok]);
241 		break;
242 	case ROFFT_HEAD:
243 		if (n->child != NULL)
244 			mandoc_vmsg(MANDOCERR_ARG_SKIP,
245 			    man->parse, n->line, n->pos, "%s %s%s",
246 			    roff_name[n->tok], n->child->string,
247 			    n->child->next != NULL ? " ..." : "");
248 		break;
249 	default:
250 		break;
251 	}
252 }
253 
254 static void
255 post_IP(CHKARGS)
256 {
257 
258 	switch (n->type) {
259 	case ROFFT_BLOCK:
260 		if (n->head->child == NULL && n->body->child == NULL)
261 			roff_node_delete(man, n);
262 		break;
263 	case ROFFT_BODY:
264 		if (n->parent->head->child == NULL && n->child == NULL)
265 			mandoc_vmsg(MANDOCERR_PAR_SKIP,
266 			    man->parse, n->line, n->pos,
267 			    "%s empty", roff_name[n->tok]);
268 		break;
269 	default:
270 		break;
271 	}
272 }
273 
274 static void
275 post_TH(CHKARGS)
276 {
277 	struct roff_node *nb;
278 	const char	*p;
279 
280 	free(man->meta.title);
281 	free(man->meta.vol);
282 	free(man->meta.os);
283 	free(man->meta.msec);
284 	free(man->meta.date);
285 
286 	man->meta.title = man->meta.vol = man->meta.date =
287 	    man->meta.msec = man->meta.os = NULL;
288 
289 	nb = n;
290 
291 	/* ->TITLE<- MSEC DATE OS VOL */
292 
293 	n = n->child;
294 	if (n && n->string) {
295 		for (p = n->string; '\0' != *p; p++) {
296 			/* Only warn about this once... */
297 			if (isalpha((unsigned char)*p) &&
298 			    ! isupper((unsigned char)*p)) {
299 				mandoc_vmsg(MANDOCERR_TITLE_CASE,
300 				    man->parse, n->line,
301 				    n->pos + (p - n->string),
302 				    "TH %s", n->string);
303 				break;
304 			}
305 		}
306 		man->meta.title = mandoc_strdup(n->string);
307 	} else {
308 		man->meta.title = mandoc_strdup("");
309 		mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
310 		    nb->line, nb->pos, "TH");
311 	}
312 
313 	/* TITLE ->MSEC<- DATE OS VOL */
314 
315 	if (n)
316 		n = n->next;
317 	if (n && n->string)
318 		man->meta.msec = mandoc_strdup(n->string);
319 	else {
320 		man->meta.msec = mandoc_strdup("");
321 		mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse,
322 		    nb->line, nb->pos, "TH %s", man->meta.title);
323 	}
324 
325 	/* TITLE MSEC ->DATE<- OS VOL */
326 
327 	if (n)
328 		n = n->next;
329 	if (n && n->string && '\0' != n->string[0]) {
330 		man->meta.date = man->quick ?
331 		    mandoc_strdup(n->string) :
332 		    mandoc_normdate(man, n->string, n->line, n->pos);
333 	} else {
334 		man->meta.date = mandoc_strdup("");
335 		mandoc_msg(MANDOCERR_DATE_MISSING, man->parse,
336 		    n ? n->line : nb->line,
337 		    n ? n->pos : nb->pos, "TH");
338 	}
339 
340 	/* TITLE MSEC DATE ->OS<- VOL */
341 
342 	if (n && (n = n->next))
343 		man->meta.os = mandoc_strdup(n->string);
344 	else if (man->os_s != NULL)
345 		man->meta.os = mandoc_strdup(man->os_s);
346 	if (man->meta.os_e == MANDOC_OS_OTHER && man->meta.os != NULL) {
347 		if (strstr(man->meta.os, "OpenBSD") != NULL)
348 			man->meta.os_e = MANDOC_OS_OPENBSD;
349 		else if (strstr(man->meta.os, "NetBSD") != NULL)
350 			man->meta.os_e = MANDOC_OS_NETBSD;
351 	}
352 
353 	/* TITLE MSEC DATE OS ->VOL<- */
354 	/* If missing, use the default VOL name for MSEC. */
355 
356 	if (n && (n = n->next))
357 		man->meta.vol = mandoc_strdup(n->string);
358 	else if ('\0' != man->meta.msec[0] &&
359 	    (NULL != (p = mandoc_a2msec(man->meta.msec))))
360 		man->meta.vol = mandoc_strdup(p);
361 
362 	if (n != NULL && (n = n->next) != NULL)
363 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse,
364 		    n->line, n->pos, "TH ... %s", n->string);
365 
366 	/*
367 	 * Remove the `TH' node after we've processed it for our
368 	 * meta-data.
369 	 */
370 	roff_node_delete(man, man->last);
371 }
372 
373 static void
374 post_UC(CHKARGS)
375 {
376 	static const char * const bsd_versions[] = {
377 	    "3rd Berkeley Distribution",
378 	    "4th Berkeley Distribution",
379 	    "4.2 Berkeley Distribution",
380 	    "4.3 Berkeley Distribution",
381 	    "4.4 Berkeley Distribution",
382 	};
383 
384 	const char	*p, *s;
385 
386 	n = n->child;
387 
388 	if (n == NULL || n->type != ROFFT_TEXT)
389 		p = bsd_versions[0];
390 	else {
391 		s = n->string;
392 		if (0 == strcmp(s, "3"))
393 			p = bsd_versions[0];
394 		else if (0 == strcmp(s, "4"))
395 			p = bsd_versions[1];
396 		else if (0 == strcmp(s, "5"))
397 			p = bsd_versions[2];
398 		else if (0 == strcmp(s, "6"))
399 			p = bsd_versions[3];
400 		else if (0 == strcmp(s, "7"))
401 			p = bsd_versions[4];
402 		else
403 			p = bsd_versions[0];
404 	}
405 
406 	free(man->meta.os);
407 	man->meta.os = mandoc_strdup(p);
408 }
409 
410 static void
411 post_AT(CHKARGS)
412 {
413 	static const char * const unix_versions[] = {
414 	    "7th Edition",
415 	    "System III",
416 	    "System V",
417 	    "System V Release 2",
418 	};
419 
420 	struct roff_node *nn;
421 	const char	*p, *s;
422 
423 	n = n->child;
424 
425 	if (n == NULL || n->type != ROFFT_TEXT)
426 		p = unix_versions[0];
427 	else {
428 		s = n->string;
429 		if (0 == strcmp(s, "3"))
430 			p = unix_versions[0];
431 		else if (0 == strcmp(s, "4"))
432 			p = unix_versions[1];
433 		else if (0 == strcmp(s, "5")) {
434 			nn = n->next;
435 			if (nn != NULL &&
436 			    nn->type == ROFFT_TEXT &&
437 			    nn->string[0] != '\0')
438 				p = unix_versions[3];
439 			else
440 				p = unix_versions[2];
441 		} else
442 			p = unix_versions[0];
443 	}
444 
445 	free(man->meta.os);
446 	man->meta.os = mandoc_strdup(p);
447 }
448 
449 static void
450 post_in(CHKARGS)
451 {
452 	char	*s;
453 
454 	if (n->parent->tok != MAN_TP ||
455 	    n->parent->type != ROFFT_HEAD ||
456 	    n->child == NULL ||
457 	    *n->child->string == '+' ||
458 	    *n->child->string == '-')
459 		return;
460 	mandoc_asprintf(&s, "+%s", n->child->string);
461 	free(n->child->string);
462 	n->child->string = s;
463 }
464 
465 static void
466 post_vs(CHKARGS)
467 {
468 
469 	if (NULL != n->prev)
470 		return;
471 
472 	switch (n->parent->tok) {
473 	case MAN_SH:
474 	case MAN_SS:
475 	case MAN_PP:
476 	case MAN_LP:
477 	case MAN_P:
478 		mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos,
479 		    "%s after %s", roff_name[n->tok],
480 		    roff_name[n->parent->tok]);
481 		/* FALLTHROUGH */
482 	case TOKEN_NONE:
483 		/*
484 		 * Don't warn about this because it occurs in pod2man
485 		 * and would cause considerable (unfixable) warnage.
486 		 */
487 		roff_node_delete(man, n);
488 		break;
489 	default:
490 		break;
491 	}
492 }
493