xref: /openbsd-src/usr.bin/mandoc/man_validate.c (revision 5719c9cb982c64f3c0ad4d33b93e1830a06d9c68)
1 /*	$OpenBSD: man_validate.c,v 1.119 2019/06/27 15:05:14 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2012-2018 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <stdarg.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <time.h>
29 
30 #include "mandoc_aux.h"
31 #include "mandoc.h"
32 #include "roff.h"
33 #include "man.h"
34 #include "libmandoc.h"
35 #include "roff_int.h"
36 #include "libman.h"
37 
38 #define	CHKARGS	  struct roff_man *man, struct roff_node *n
39 
40 typedef	void	(*v_check)(CHKARGS);
41 
42 static	void	  check_abort(CHKARGS) __attribute__((__noreturn__));
43 static	void	  check_par(CHKARGS);
44 static	void	  check_part(CHKARGS);
45 static	void	  check_root(CHKARGS);
46 static	void	  check_text(CHKARGS);
47 
48 static	void	  post_AT(CHKARGS);
49 static	void	  post_EE(CHKARGS);
50 static	void	  post_EX(CHKARGS);
51 static	void	  post_IP(CHKARGS);
52 static	void	  post_OP(CHKARGS);
53 static	void	  post_SH(CHKARGS);
54 static	void	  post_TH(CHKARGS);
55 static	void	  post_UC(CHKARGS);
56 static	void	  post_UR(CHKARGS);
57 static	void	  post_in(CHKARGS);
58 
59 static	const v_check man_valids[MAN_MAX - MAN_TH] = {
60 	post_TH,    /* TH */
61 	post_SH,    /* SH */
62 	post_SH,    /* SS */
63 	NULL,       /* TP */
64 	NULL,       /* TQ */
65 	check_abort,/* LP */
66 	check_par,  /* PP */
67 	check_abort,/* P */
68 	post_IP,    /* IP */
69 	NULL,       /* HP */
70 	NULL,       /* SM */
71 	NULL,       /* SB */
72 	NULL,       /* BI */
73 	NULL,       /* IB */
74 	NULL,       /* BR */
75 	NULL,       /* RB */
76 	NULL,       /* R */
77 	NULL,       /* B */
78 	NULL,       /* I */
79 	NULL,       /* IR */
80 	NULL,       /* RI */
81 	NULL,       /* RE */
82 	check_part, /* RS */
83 	NULL,       /* DT */
84 	post_UC,    /* UC */
85 	NULL,       /* PD */
86 	post_AT,    /* AT */
87 	post_in,    /* in */
88 	NULL,       /* SY */
89 	NULL,       /* YS */
90 	post_OP,    /* OP */
91 	post_EX,    /* EX */
92 	post_EE,    /* EE */
93 	post_UR,    /* UR */
94 	NULL,       /* UE */
95 	post_UR,    /* MT */
96 	NULL,       /* ME */
97 };
98 
99 
100 /* Validate the subtree rooted at man->last. */
101 void
102 man_validate(struct roff_man *man)
103 {
104 	struct roff_node *n;
105 	const v_check	 *cp;
106 
107 	/*
108 	 * Translate obsolete macros such that later code
109 	 * does not need to look for them.
110 	 */
111 
112 	n = man->last;
113 	switch (n->tok) {
114 	case MAN_LP:
115 	case MAN_P:
116 		n->tok = MAN_PP;
117 		break;
118 	default:
119 		break;
120 	}
121 
122 	/*
123 	 * Iterate over all children, recursing into each one
124 	 * in turn, depth-first.
125 	 */
126 
127 	man->last = man->last->child;
128 	while (man->last != NULL) {
129 		man_validate(man);
130 		if (man->last == n)
131 			man->last = man->last->child;
132 		else
133 			man->last = man->last->next;
134 	}
135 
136 	/* Finally validate the macro itself. */
137 
138 	man->last = n;
139 	man->next = ROFF_NEXT_SIBLING;
140 	switch (n->type) {
141 	case ROFFT_TEXT:
142 		check_text(man, n);
143 		break;
144 	case ROFFT_ROOT:
145 		check_root(man, n);
146 		break;
147 	case ROFFT_COMMENT:
148 	case ROFFT_EQN:
149 	case ROFFT_TBL:
150 		break;
151 	default:
152 		if (n->tok < ROFF_MAX) {
153 			roff_validate(man);
154 			break;
155 		}
156 		assert(n->tok >= MAN_TH && n->tok < MAN_MAX);
157 		cp = man_valids + (n->tok - MAN_TH);
158 		if (*cp)
159 			(*cp)(man, n);
160 		if (man->last == n)
161 			n->flags |= NODE_VALID;
162 		break;
163 	}
164 }
165 
166 static void
167 check_root(CHKARGS)
168 {
169 	assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0);
170 
171 	if (n->last == NULL || n->last->type == ROFFT_COMMENT)
172 		mandoc_msg(MANDOCERR_DOC_EMPTY, n->line, n->pos, NULL);
173 	else
174 		man->meta.hasbody = 1;
175 
176 	if (NULL == man->meta.title) {
177 		mandoc_msg(MANDOCERR_TH_NOTITLE, n->line, n->pos, NULL);
178 
179 		/*
180 		 * If a title hasn't been set, do so now (by
181 		 * implication, date and section also aren't set).
182 		 */
183 
184 		man->meta.title = mandoc_strdup("");
185 		man->meta.msec = mandoc_strdup("");
186 		man->meta.date = mandoc_normdate(man, NULL, n->line, n->pos);
187 	}
188 
189 	if (man->meta.os_e &&
190 	    (man->meta.rcsids & (1 << man->meta.os_e)) == 0)
191 		mandoc_msg(MANDOCERR_RCS_MISSING, 0, 0,
192 		    man->meta.os_e == MANDOC_OS_OPENBSD ?
193 		    "(OpenBSD)" : "(NetBSD)");
194 }
195 
196 static void
197 check_abort(CHKARGS)
198 {
199 	abort();
200 }
201 
202 static void
203 check_text(CHKARGS)
204 {
205 	char		*cp, *p;
206 
207 	if (n->flags & NODE_NOFILL)
208 		return;
209 
210 	cp = n->string;
211 	for (p = cp; NULL != (p = strchr(p, '\t')); p++)
212 		mandoc_msg(MANDOCERR_FI_TAB,
213 		    n->line, n->pos + (int)(p - cp), NULL);
214 }
215 
216 static void
217 post_EE(CHKARGS)
218 {
219 	if ((n->flags & NODE_NOFILL) == 0)
220 		mandoc_msg(MANDOCERR_FI_SKIP, n->line, n->pos, "EE");
221 }
222 
223 static void
224 post_EX(CHKARGS)
225 {
226 	if (n->flags & NODE_NOFILL)
227 		mandoc_msg(MANDOCERR_NF_SKIP, n->line, n->pos, "EX");
228 }
229 
230 static void
231 post_OP(CHKARGS)
232 {
233 
234 	if (n->child == NULL)
235 		mandoc_msg(MANDOCERR_OP_EMPTY, n->line, n->pos, "OP");
236 	else if (n->child->next != NULL && n->child->next->next != NULL) {
237 		n = n->child->next->next;
238 		mandoc_msg(MANDOCERR_ARG_EXCESS,
239 		    n->line, n->pos, "OP ... %s", n->string);
240 	}
241 }
242 
243 static void
244 post_SH(CHKARGS)
245 {
246 	struct roff_node	*nc;
247 
248 	if (n->type != ROFFT_BODY || (nc = n->child) == NULL)
249 		return;
250 
251 	if (nc->tok == MAN_PP && nc->body->child != NULL) {
252 		while (nc->body->last != NULL) {
253 			man->next = ROFF_NEXT_CHILD;
254 			roff_node_relink(man, nc->body->last);
255 			man->last = n;
256 		}
257 	}
258 
259 	if (nc->tok == MAN_PP || nc->tok == ROFF_sp || nc->tok == ROFF_br) {
260 		mandoc_msg(MANDOCERR_PAR_SKIP, nc->line, nc->pos,
261 		    "%s after %s", roff_name[nc->tok], roff_name[n->tok]);
262 		roff_node_delete(man, nc);
263 	}
264 
265 	/*
266 	 * Trailing PP is empty, so it is deleted by check_par().
267 	 * Trailing sp is significant.
268 	 */
269 
270 	if ((nc = n->last) != NULL && nc->tok == ROFF_br) {
271 		mandoc_msg(MANDOCERR_PAR_SKIP,
272 		    nc->line, nc->pos, "%s at the end of %s",
273 		    roff_name[nc->tok], roff_name[n->tok]);
274 		roff_node_delete(man, nc);
275 	}
276 }
277 
278 static void
279 post_UR(CHKARGS)
280 {
281 	if (n->type == ROFFT_HEAD && n->child == NULL)
282 		mandoc_msg(MANDOCERR_UR_NOHEAD, n->line, n->pos,
283 		    "%s", roff_name[n->tok]);
284 	check_part(man, n);
285 }
286 
287 static void
288 check_part(CHKARGS)
289 {
290 
291 	if (n->type == ROFFT_BODY && n->child == NULL)
292 		mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos,
293 		    "%s", roff_name[n->tok]);
294 }
295 
296 static void
297 check_par(CHKARGS)
298 {
299 
300 	switch (n->type) {
301 	case ROFFT_BLOCK:
302 		if (n->body->child == NULL)
303 			roff_node_delete(man, n);
304 		break;
305 	case ROFFT_BODY:
306 		if (n->child != NULL &&
307 		    (n->child->tok == ROFF_sp || n->child->tok == ROFF_br)) {
308 			mandoc_msg(MANDOCERR_PAR_SKIP,
309 			    n->child->line, n->child->pos,
310 			    "%s after %s", roff_name[n->child->tok],
311 			    roff_name[n->tok]);
312 			roff_node_delete(man, n->child);
313 		}
314 		if (n->child == NULL)
315 			mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos,
316 			    "%s empty", roff_name[n->tok]);
317 		break;
318 	case ROFFT_HEAD:
319 		if (n->child != NULL)
320 			mandoc_msg(MANDOCERR_ARG_SKIP,
321 			    n->line, n->pos, "%s %s%s",
322 			    roff_name[n->tok], n->child->string,
323 			    n->child->next != NULL ? " ..." : "");
324 		break;
325 	default:
326 		break;
327 	}
328 }
329 
330 static void
331 post_IP(CHKARGS)
332 {
333 
334 	switch (n->type) {
335 	case ROFFT_BLOCK:
336 		if (n->head->child == NULL && n->body->child == NULL)
337 			roff_node_delete(man, n);
338 		break;
339 	case ROFFT_BODY:
340 		if (n->parent->head->child == NULL && n->child == NULL)
341 			mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos,
342 			    "%s empty", roff_name[n->tok]);
343 		break;
344 	default:
345 		break;
346 	}
347 }
348 
349 static void
350 post_TH(CHKARGS)
351 {
352 	struct roff_node *nb;
353 	const char	*p;
354 
355 	free(man->meta.title);
356 	free(man->meta.vol);
357 	free(man->meta.os);
358 	free(man->meta.msec);
359 	free(man->meta.date);
360 
361 	man->meta.title = man->meta.vol = man->meta.date =
362 	    man->meta.msec = man->meta.os = NULL;
363 
364 	nb = n;
365 
366 	/* ->TITLE<- MSEC DATE OS VOL */
367 
368 	n = n->child;
369 	if (n != NULL && n->string != NULL) {
370 		for (p = n->string; *p != '\0'; p++) {
371 			/* Only warn about this once... */
372 			if (isalpha((unsigned char)*p) &&
373 			    ! isupper((unsigned char)*p)) {
374 				mandoc_msg(MANDOCERR_TITLE_CASE, n->line,
375 				    n->pos + (int)(p - n->string),
376 				    "TH %s", n->string);
377 				break;
378 			}
379 		}
380 		man->meta.title = mandoc_strdup(n->string);
381 	} else {
382 		man->meta.title = mandoc_strdup("");
383 		mandoc_msg(MANDOCERR_TH_NOTITLE, nb->line, nb->pos, "TH");
384 	}
385 
386 	/* TITLE ->MSEC<- DATE OS VOL */
387 
388 	if (n != NULL)
389 		n = n->next;
390 	if (n != NULL && n->string != NULL)
391 		man->meta.msec = mandoc_strdup(n->string);
392 	else {
393 		man->meta.msec = mandoc_strdup("");
394 		mandoc_msg(MANDOCERR_MSEC_MISSING,
395 		    nb->line, nb->pos, "TH %s", man->meta.title);
396 	}
397 
398 	/* TITLE MSEC ->DATE<- OS VOL */
399 
400 	if (n != NULL)
401 		n = n->next;
402 	if (n != NULL && n->string != NULL && n->string[0] != '\0')
403 		man->meta.date = mandoc_normdate(man,
404 		    n->string, n->line, n->pos);
405 	else {
406 		man->meta.date = mandoc_strdup("");
407 		mandoc_msg(MANDOCERR_DATE_MISSING,
408 		    n == NULL ? nb->line : n->line,
409 		    n == NULL ? nb->pos : n->pos, "TH");
410 	}
411 
412 	/* TITLE MSEC DATE ->OS<- VOL */
413 
414 	if (n && (n = n->next))
415 		man->meta.os = mandoc_strdup(n->string);
416 	else if (man->os_s != NULL)
417 		man->meta.os = mandoc_strdup(man->os_s);
418 	if (man->meta.os_e == MANDOC_OS_OTHER && man->meta.os != NULL) {
419 		if (strstr(man->meta.os, "OpenBSD") != NULL)
420 			man->meta.os_e = MANDOC_OS_OPENBSD;
421 		else if (strstr(man->meta.os, "NetBSD") != NULL)
422 			man->meta.os_e = MANDOC_OS_NETBSD;
423 	}
424 
425 	/* TITLE MSEC DATE OS ->VOL<- */
426 	/* If missing, use the default VOL name for MSEC. */
427 
428 	if (n && (n = n->next))
429 		man->meta.vol = mandoc_strdup(n->string);
430 	else if ('\0' != man->meta.msec[0] &&
431 	    (NULL != (p = mandoc_a2msec(man->meta.msec))))
432 		man->meta.vol = mandoc_strdup(p);
433 
434 	if (n != NULL && (n = n->next) != NULL)
435 		mandoc_msg(MANDOCERR_ARG_EXCESS,
436 		    n->line, n->pos, "TH ... %s", n->string);
437 
438 	/*
439 	 * Remove the `TH' node after we've processed it for our
440 	 * meta-data.
441 	 */
442 	roff_node_delete(man, man->last);
443 }
444 
445 static void
446 post_UC(CHKARGS)
447 {
448 	static const char * const bsd_versions[] = {
449 	    "3rd Berkeley Distribution",
450 	    "4th Berkeley Distribution",
451 	    "4.2 Berkeley Distribution",
452 	    "4.3 Berkeley Distribution",
453 	    "4.4 Berkeley Distribution",
454 	};
455 
456 	const char	*p, *s;
457 
458 	n = n->child;
459 
460 	if (n == NULL || n->type != ROFFT_TEXT)
461 		p = bsd_versions[0];
462 	else {
463 		s = n->string;
464 		if (0 == strcmp(s, "3"))
465 			p = bsd_versions[0];
466 		else if (0 == strcmp(s, "4"))
467 			p = bsd_versions[1];
468 		else if (0 == strcmp(s, "5"))
469 			p = bsd_versions[2];
470 		else if (0 == strcmp(s, "6"))
471 			p = bsd_versions[3];
472 		else if (0 == strcmp(s, "7"))
473 			p = bsd_versions[4];
474 		else
475 			p = bsd_versions[0];
476 	}
477 
478 	free(man->meta.os);
479 	man->meta.os = mandoc_strdup(p);
480 }
481 
482 static void
483 post_AT(CHKARGS)
484 {
485 	static const char * const unix_versions[] = {
486 	    "7th Edition",
487 	    "System III",
488 	    "System V",
489 	    "System V Release 2",
490 	};
491 
492 	struct roff_node *nn;
493 	const char	*p, *s;
494 
495 	n = n->child;
496 
497 	if (n == NULL || n->type != ROFFT_TEXT)
498 		p = unix_versions[0];
499 	else {
500 		s = n->string;
501 		if (0 == strcmp(s, "3"))
502 			p = unix_versions[0];
503 		else if (0 == strcmp(s, "4"))
504 			p = unix_versions[1];
505 		else if (0 == strcmp(s, "5")) {
506 			nn = n->next;
507 			if (nn != NULL &&
508 			    nn->type == ROFFT_TEXT &&
509 			    nn->string[0] != '\0')
510 				p = unix_versions[3];
511 			else
512 				p = unix_versions[2];
513 		} else
514 			p = unix_versions[0];
515 	}
516 
517 	free(man->meta.os);
518 	man->meta.os = mandoc_strdup(p);
519 }
520 
521 static void
522 post_in(CHKARGS)
523 {
524 	char	*s;
525 
526 	if (n->parent->tok != MAN_TP ||
527 	    n->parent->type != ROFFT_HEAD ||
528 	    n->child == NULL ||
529 	    *n->child->string == '+' ||
530 	    *n->child->string == '-')
531 		return;
532 	mandoc_asprintf(&s, "+%s", n->child->string);
533 	free(n->child->string);
534 	n->child->string = s;
535 }
536