xref: /openbsd-src/usr.bin/mandoc/man_validate.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: man_validate.c,v 1.120 2020/01/19 16:16:32 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2012-2020 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <stdarg.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <time.h>
29 
30 #include "mandoc_aux.h"
31 #include "mandoc.h"
32 #include "roff.h"
33 #include "man.h"
34 #include "libmandoc.h"
35 #include "roff_int.h"
36 #include "libman.h"
37 
38 #define	CHKARGS	  struct roff_man *man, struct roff_node *n
39 
40 typedef	void	(*v_check)(CHKARGS);
41 
42 static	void	  check_abort(CHKARGS) __attribute__((__noreturn__));
43 static	void	  check_par(CHKARGS);
44 static	void	  check_part(CHKARGS);
45 static	void	  check_root(CHKARGS);
46 static	void	  check_text(CHKARGS);
47 
48 static	void	  post_AT(CHKARGS);
49 static	void	  post_EE(CHKARGS);
50 static	void	  post_EX(CHKARGS);
51 static	void	  post_IP(CHKARGS);
52 static	void	  post_OP(CHKARGS);
53 static	void	  post_SH(CHKARGS);
54 static	void	  post_TH(CHKARGS);
55 static	void	  post_UC(CHKARGS);
56 static	void	  post_UR(CHKARGS);
57 static	void	  post_in(CHKARGS);
58 
59 static	const v_check man_valids[MAN_MAX - MAN_TH] = {
60 	post_TH,    /* TH */
61 	post_SH,    /* SH */
62 	post_SH,    /* SS */
63 	NULL,       /* TP */
64 	NULL,       /* TQ */
65 	check_abort,/* LP */
66 	check_par,  /* PP */
67 	check_abort,/* P */
68 	post_IP,    /* IP */
69 	NULL,       /* HP */
70 	NULL,       /* SM */
71 	NULL,       /* SB */
72 	NULL,       /* BI */
73 	NULL,       /* IB */
74 	NULL,       /* BR */
75 	NULL,       /* RB */
76 	NULL,       /* R */
77 	NULL,       /* B */
78 	NULL,       /* I */
79 	NULL,       /* IR */
80 	NULL,       /* RI */
81 	NULL,       /* RE */
82 	check_part, /* RS */
83 	NULL,       /* DT */
84 	post_UC,    /* UC */
85 	NULL,       /* PD */
86 	post_AT,    /* AT */
87 	post_in,    /* in */
88 	NULL,       /* SY */
89 	NULL,       /* YS */
90 	post_OP,    /* OP */
91 	post_EX,    /* EX */
92 	post_EE,    /* EE */
93 	post_UR,    /* UR */
94 	NULL,       /* UE */
95 	post_UR,    /* MT */
96 	NULL,       /* ME */
97 };
98 
99 
100 /* Validate the subtree rooted at man->last. */
101 void
102 man_validate(struct roff_man *man)
103 {
104 	struct roff_node *n;
105 	const v_check	 *cp;
106 
107 	/*
108 	 * Translate obsolete macros such that later code
109 	 * does not need to look for them.
110 	 */
111 
112 	n = man->last;
113 	switch (n->tok) {
114 	case MAN_LP:
115 	case MAN_P:
116 		n->tok = MAN_PP;
117 		break;
118 	default:
119 		break;
120 	}
121 
122 	/*
123 	 * Iterate over all children, recursing into each one
124 	 * in turn, depth-first.
125 	 */
126 
127 	man->last = man->last->child;
128 	while (man->last != NULL) {
129 		man_validate(man);
130 		if (man->last == n)
131 			man->last = man->last->child;
132 		else
133 			man->last = man->last->next;
134 	}
135 
136 	/* Finally validate the macro itself. */
137 
138 	man->last = n;
139 	man->next = ROFF_NEXT_SIBLING;
140 	switch (n->type) {
141 	case ROFFT_TEXT:
142 		check_text(man, n);
143 		break;
144 	case ROFFT_ROOT:
145 		check_root(man, n);
146 		break;
147 	case ROFFT_COMMENT:
148 	case ROFFT_EQN:
149 	case ROFFT_TBL:
150 		break;
151 	default:
152 		if (n->tok < ROFF_MAX) {
153 			roff_validate(man);
154 			break;
155 		}
156 		assert(n->tok >= MAN_TH && n->tok < MAN_MAX);
157 		cp = man_valids + (n->tok - MAN_TH);
158 		if (*cp)
159 			(*cp)(man, n);
160 		if (man->last == n)
161 			n->flags |= NODE_VALID;
162 		break;
163 	}
164 }
165 
166 static void
167 check_root(CHKARGS)
168 {
169 	assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0);
170 
171 	if (n->last == NULL || n->last->type == ROFFT_COMMENT)
172 		mandoc_msg(MANDOCERR_DOC_EMPTY, n->line, n->pos, NULL);
173 	else
174 		man->meta.hasbody = 1;
175 
176 	if (NULL == man->meta.title) {
177 		mandoc_msg(MANDOCERR_TH_NOTITLE, n->line, n->pos, NULL);
178 
179 		/*
180 		 * If a title hasn't been set, do so now (by
181 		 * implication, date and section also aren't set).
182 		 */
183 
184 		man->meta.title = mandoc_strdup("");
185 		man->meta.msec = mandoc_strdup("");
186 		man->meta.date = mandoc_normdate(NULL, NULL);
187 	}
188 
189 	if (man->meta.os_e &&
190 	    (man->meta.rcsids & (1 << man->meta.os_e)) == 0)
191 		mandoc_msg(MANDOCERR_RCS_MISSING, 0, 0,
192 		    man->meta.os_e == MANDOC_OS_OPENBSD ?
193 		    "(OpenBSD)" : "(NetBSD)");
194 }
195 
196 static void
197 check_abort(CHKARGS)
198 {
199 	abort();
200 }
201 
202 static void
203 check_text(CHKARGS)
204 {
205 	char		*cp, *p;
206 
207 	if (n->flags & NODE_NOFILL)
208 		return;
209 
210 	cp = n->string;
211 	for (p = cp; NULL != (p = strchr(p, '\t')); p++)
212 		mandoc_msg(MANDOCERR_FI_TAB,
213 		    n->line, n->pos + (int)(p - cp), NULL);
214 }
215 
216 static void
217 post_EE(CHKARGS)
218 {
219 	if ((n->flags & NODE_NOFILL) == 0)
220 		mandoc_msg(MANDOCERR_FI_SKIP, n->line, n->pos, "EE");
221 }
222 
223 static void
224 post_EX(CHKARGS)
225 {
226 	if (n->flags & NODE_NOFILL)
227 		mandoc_msg(MANDOCERR_NF_SKIP, n->line, n->pos, "EX");
228 }
229 
230 static void
231 post_OP(CHKARGS)
232 {
233 
234 	if (n->child == NULL)
235 		mandoc_msg(MANDOCERR_OP_EMPTY, n->line, n->pos, "OP");
236 	else if (n->child->next != NULL && n->child->next->next != NULL) {
237 		n = n->child->next->next;
238 		mandoc_msg(MANDOCERR_ARG_EXCESS,
239 		    n->line, n->pos, "OP ... %s", n->string);
240 	}
241 }
242 
243 static void
244 post_SH(CHKARGS)
245 {
246 	struct roff_node	*nc;
247 
248 	if (n->type != ROFFT_BODY || (nc = n->child) == NULL)
249 		return;
250 
251 	if (nc->tok == MAN_PP && nc->body->child != NULL) {
252 		while (nc->body->last != NULL) {
253 			man->next = ROFF_NEXT_CHILD;
254 			roff_node_relink(man, nc->body->last);
255 			man->last = n;
256 		}
257 	}
258 
259 	if (nc->tok == MAN_PP || nc->tok == ROFF_sp || nc->tok == ROFF_br) {
260 		mandoc_msg(MANDOCERR_PAR_SKIP, nc->line, nc->pos,
261 		    "%s after %s", roff_name[nc->tok], roff_name[n->tok]);
262 		roff_node_delete(man, nc);
263 	}
264 
265 	/*
266 	 * Trailing PP is empty, so it is deleted by check_par().
267 	 * Trailing sp is significant.
268 	 */
269 
270 	if ((nc = n->last) != NULL && nc->tok == ROFF_br) {
271 		mandoc_msg(MANDOCERR_PAR_SKIP,
272 		    nc->line, nc->pos, "%s at the end of %s",
273 		    roff_name[nc->tok], roff_name[n->tok]);
274 		roff_node_delete(man, nc);
275 	}
276 }
277 
278 static void
279 post_UR(CHKARGS)
280 {
281 	if (n->type == ROFFT_HEAD && n->child == NULL)
282 		mandoc_msg(MANDOCERR_UR_NOHEAD, n->line, n->pos,
283 		    "%s", roff_name[n->tok]);
284 	check_part(man, n);
285 }
286 
287 static void
288 check_part(CHKARGS)
289 {
290 
291 	if (n->type == ROFFT_BODY && n->child == NULL)
292 		mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos,
293 		    "%s", roff_name[n->tok]);
294 }
295 
296 static void
297 check_par(CHKARGS)
298 {
299 
300 	switch (n->type) {
301 	case ROFFT_BLOCK:
302 		if (n->body->child == NULL)
303 			roff_node_delete(man, n);
304 		break;
305 	case ROFFT_BODY:
306 		if (n->child != NULL &&
307 		    (n->child->tok == ROFF_sp || n->child->tok == ROFF_br)) {
308 			mandoc_msg(MANDOCERR_PAR_SKIP,
309 			    n->child->line, n->child->pos,
310 			    "%s after %s", roff_name[n->child->tok],
311 			    roff_name[n->tok]);
312 			roff_node_delete(man, n->child);
313 		}
314 		if (n->child == NULL)
315 			mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos,
316 			    "%s empty", roff_name[n->tok]);
317 		break;
318 	case ROFFT_HEAD:
319 		if (n->child != NULL)
320 			mandoc_msg(MANDOCERR_ARG_SKIP,
321 			    n->line, n->pos, "%s %s%s",
322 			    roff_name[n->tok], n->child->string,
323 			    n->child->next != NULL ? " ..." : "");
324 		break;
325 	default:
326 		break;
327 	}
328 }
329 
330 static void
331 post_IP(CHKARGS)
332 {
333 
334 	switch (n->type) {
335 	case ROFFT_BLOCK:
336 		if (n->head->child == NULL && n->body->child == NULL)
337 			roff_node_delete(man, n);
338 		break;
339 	case ROFFT_BODY:
340 		if (n->parent->head->child == NULL && n->child == NULL)
341 			mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos,
342 			    "%s empty", roff_name[n->tok]);
343 		break;
344 	default:
345 		break;
346 	}
347 }
348 
349 static void
350 post_TH(CHKARGS)
351 {
352 	struct roff_node *nb;
353 	const char	*p;
354 
355 	free(man->meta.title);
356 	free(man->meta.vol);
357 	free(man->meta.os);
358 	free(man->meta.msec);
359 	free(man->meta.date);
360 
361 	man->meta.title = man->meta.vol = man->meta.date =
362 	    man->meta.msec = man->meta.os = NULL;
363 
364 	nb = n;
365 
366 	/* ->TITLE<- MSEC DATE OS VOL */
367 
368 	n = n->child;
369 	if (n != NULL && n->string != NULL) {
370 		for (p = n->string; *p != '\0'; p++) {
371 			/* Only warn about this once... */
372 			if (isalpha((unsigned char)*p) &&
373 			    ! isupper((unsigned char)*p)) {
374 				mandoc_msg(MANDOCERR_TITLE_CASE, n->line,
375 				    n->pos + (int)(p - n->string),
376 				    "TH %s", n->string);
377 				break;
378 			}
379 		}
380 		man->meta.title = mandoc_strdup(n->string);
381 	} else {
382 		man->meta.title = mandoc_strdup("");
383 		mandoc_msg(MANDOCERR_TH_NOTITLE, nb->line, nb->pos, "TH");
384 	}
385 
386 	/* TITLE ->MSEC<- DATE OS VOL */
387 
388 	if (n != NULL)
389 		n = n->next;
390 	if (n != NULL && n->string != NULL)
391 		man->meta.msec = mandoc_strdup(n->string);
392 	else {
393 		man->meta.msec = mandoc_strdup("");
394 		mandoc_msg(MANDOCERR_MSEC_MISSING,
395 		    nb->line, nb->pos, "TH %s", man->meta.title);
396 	}
397 
398 	/* TITLE MSEC ->DATE<- OS VOL */
399 
400 	if (n != NULL)
401 		n = n->next;
402 	if (man->quick && n != NULL)
403 		man->meta.date = mandoc_strdup("");
404 	else
405 		man->meta.date = mandoc_normdate(n, nb);
406 
407 	/* TITLE MSEC DATE ->OS<- VOL */
408 
409 	if (n && (n = n->next))
410 		man->meta.os = mandoc_strdup(n->string);
411 	else if (man->os_s != NULL)
412 		man->meta.os = mandoc_strdup(man->os_s);
413 	if (man->meta.os_e == MANDOC_OS_OTHER && man->meta.os != NULL) {
414 		if (strstr(man->meta.os, "OpenBSD") != NULL)
415 			man->meta.os_e = MANDOC_OS_OPENBSD;
416 		else if (strstr(man->meta.os, "NetBSD") != NULL)
417 			man->meta.os_e = MANDOC_OS_NETBSD;
418 	}
419 
420 	/* TITLE MSEC DATE OS ->VOL<- */
421 	/* If missing, use the default VOL name for MSEC. */
422 
423 	if (n && (n = n->next))
424 		man->meta.vol = mandoc_strdup(n->string);
425 	else if ('\0' != man->meta.msec[0] &&
426 	    (NULL != (p = mandoc_a2msec(man->meta.msec))))
427 		man->meta.vol = mandoc_strdup(p);
428 
429 	if (n != NULL && (n = n->next) != NULL)
430 		mandoc_msg(MANDOCERR_ARG_EXCESS,
431 		    n->line, n->pos, "TH ... %s", n->string);
432 
433 	/*
434 	 * Remove the `TH' node after we've processed it for our
435 	 * meta-data.
436 	 */
437 	roff_node_delete(man, man->last);
438 }
439 
440 static void
441 post_UC(CHKARGS)
442 {
443 	static const char * const bsd_versions[] = {
444 	    "3rd Berkeley Distribution",
445 	    "4th Berkeley Distribution",
446 	    "4.2 Berkeley Distribution",
447 	    "4.3 Berkeley Distribution",
448 	    "4.4 Berkeley Distribution",
449 	};
450 
451 	const char	*p, *s;
452 
453 	n = n->child;
454 
455 	if (n == NULL || n->type != ROFFT_TEXT)
456 		p = bsd_versions[0];
457 	else {
458 		s = n->string;
459 		if (0 == strcmp(s, "3"))
460 			p = bsd_versions[0];
461 		else if (0 == strcmp(s, "4"))
462 			p = bsd_versions[1];
463 		else if (0 == strcmp(s, "5"))
464 			p = bsd_versions[2];
465 		else if (0 == strcmp(s, "6"))
466 			p = bsd_versions[3];
467 		else if (0 == strcmp(s, "7"))
468 			p = bsd_versions[4];
469 		else
470 			p = bsd_versions[0];
471 	}
472 
473 	free(man->meta.os);
474 	man->meta.os = mandoc_strdup(p);
475 }
476 
477 static void
478 post_AT(CHKARGS)
479 {
480 	static const char * const unix_versions[] = {
481 	    "7th Edition",
482 	    "System III",
483 	    "System V",
484 	    "System V Release 2",
485 	};
486 
487 	struct roff_node *nn;
488 	const char	*p, *s;
489 
490 	n = n->child;
491 
492 	if (n == NULL || n->type != ROFFT_TEXT)
493 		p = unix_versions[0];
494 	else {
495 		s = n->string;
496 		if (0 == strcmp(s, "3"))
497 			p = unix_versions[0];
498 		else if (0 == strcmp(s, "4"))
499 			p = unix_versions[1];
500 		else if (0 == strcmp(s, "5")) {
501 			nn = n->next;
502 			if (nn != NULL &&
503 			    nn->type == ROFFT_TEXT &&
504 			    nn->string[0] != '\0')
505 				p = unix_versions[3];
506 			else
507 				p = unix_versions[2];
508 		} else
509 			p = unix_versions[0];
510 	}
511 
512 	free(man->meta.os);
513 	man->meta.os = mandoc_strdup(p);
514 }
515 
516 static void
517 post_in(CHKARGS)
518 {
519 	char	*s;
520 
521 	if (n->parent->tok != MAN_TP ||
522 	    n->parent->type != ROFFT_HEAD ||
523 	    n->child == NULL ||
524 	    *n->child->string == '+' ||
525 	    *n->child->string == '-')
526 		return;
527 	mandoc_asprintf(&s, "+%s", n->child->string);
528 	free(n->child->string);
529 	n->child->string = s;
530 }
531