xref: /openbsd-src/usr.bin/mandoc/man_validate.c (revision 7d464165e831b6257b4befbd30d2fbb433d300de)
1 /*	$Id: man_validate.c,v 1.43 2011/04/21 22:59:54 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <stdarg.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <time.h>
28 
29 #include "mandoc.h"
30 #include "libman.h"
31 #include "libmandoc.h"
32 
33 #define	CHKARGS	  struct man *m, struct man_node *n
34 
35 typedef	int	(*v_check)(CHKARGS);
36 
37 struct	man_valid {
38 	v_check	 *pres;
39 	v_check	 *posts;
40 };
41 
42 static	int	  check_bline(CHKARGS);
43 static	int	  check_eq0(CHKARGS);
44 static	int	  check_ft(CHKARGS);
45 static	int	  check_le1(CHKARGS);
46 static	int	  check_ge2(CHKARGS);
47 static	int	  check_le5(CHKARGS);
48 static	int	  check_par(CHKARGS);
49 static	int	  check_part(CHKARGS);
50 static	int	  check_root(CHKARGS);
51 static	int	  check_sec(CHKARGS);
52 static	int	  check_text(CHKARGS);
53 
54 static	int	  post_AT(CHKARGS);
55 static	int	  post_fi(CHKARGS);
56 static	int	  post_nf(CHKARGS);
57 static	int	  post_TH(CHKARGS);
58 static	int	  post_UC(CHKARGS);
59 
60 static	v_check	  posts_at[] = { post_AT, NULL };
61 static	v_check	  posts_eq0[] = { check_eq0, NULL };
62 static	v_check	  posts_fi[] = { check_eq0, post_fi, NULL };
63 static	v_check	  posts_le1[] = { check_le1, NULL };
64 static	v_check	  posts_ft[] = { check_ft, NULL };
65 static	v_check	  posts_nf[] = { check_eq0, post_nf, NULL };
66 static	v_check	  posts_par[] = { check_par, NULL };
67 static	v_check	  posts_part[] = { check_part, NULL };
68 static	v_check	  posts_sec[] = { check_sec, NULL };
69 static	v_check	  posts_th[] = { check_ge2, check_le5, post_TH, NULL };
70 static	v_check	  posts_uc[] = { post_UC, NULL };
71 static	v_check	  pres_bline[] = { check_bline, NULL };
72 
73 
74 static	const struct man_valid man_valids[MAN_MAX] = {
75 	{ NULL, posts_eq0 }, /* br */
76 	{ pres_bline, posts_th }, /* TH */
77 	{ pres_bline, posts_sec }, /* SH */
78 	{ pres_bline, posts_sec }, /* SS */
79 	{ pres_bline, NULL }, /* TP */
80 	{ pres_bline, posts_par }, /* LP */
81 	{ pres_bline, posts_par }, /* PP */
82 	{ pres_bline, posts_par }, /* P */
83 	{ pres_bline, NULL }, /* IP */
84 	{ pres_bline, NULL }, /* HP */
85 	{ NULL, NULL }, /* SM */
86 	{ NULL, NULL }, /* SB */
87 	{ NULL, NULL }, /* BI */
88 	{ NULL, NULL }, /* IB */
89 	{ NULL, NULL }, /* BR */
90 	{ NULL, NULL }, /* RB */
91 	{ NULL, NULL }, /* R */
92 	{ NULL, NULL }, /* B */
93 	{ NULL, NULL }, /* I */
94 	{ NULL, NULL }, /* IR */
95 	{ NULL, NULL }, /* RI */
96 	{ NULL, posts_eq0 }, /* na */ /* FIXME: should warn only. */
97 	{ NULL, posts_le1 }, /* sp */ /* FIXME: should warn only. */
98 	{ pres_bline, posts_nf }, /* nf */
99 	{ pres_bline, posts_fi }, /* fi */
100 	{ NULL, NULL }, /* RE */
101 	{ NULL, posts_part }, /* RS */
102 	{ NULL, NULL }, /* DT */
103 	{ NULL, posts_uc }, /* UC */
104 	{ NULL, NULL }, /* PD */
105 	{ NULL, posts_at }, /* AT */
106 	{ NULL, NULL }, /* in */
107 	{ NULL, posts_ft }, /* ft */
108 };
109 
110 
111 int
112 man_valid_pre(struct man *m, struct man_node *n)
113 {
114 	v_check		*cp;
115 
116 	switch (n->type) {
117 	case (MAN_TEXT):
118 		/* FALLTHROUGH */
119 	case (MAN_ROOT):
120 		/* FALLTHROUGH */
121 	case (MAN_EQN):
122 		/* FALLTHROUGH */
123 	case (MAN_TBL):
124 		return(1);
125 	default:
126 		break;
127 	}
128 
129 	if (NULL == (cp = man_valids[n->tok].pres))
130 		return(1);
131 	for ( ; *cp; cp++)
132 		if ( ! (*cp)(m, n))
133 			return(0);
134 	return(1);
135 }
136 
137 
138 int
139 man_valid_post(struct man *m)
140 {
141 	v_check		*cp;
142 
143 	if (MAN_VALID & m->last->flags)
144 		return(1);
145 	m->last->flags |= MAN_VALID;
146 
147 	switch (m->last->type) {
148 	case (MAN_TEXT):
149 		return(check_text(m, m->last));
150 	case (MAN_ROOT):
151 		return(check_root(m, m->last));
152 	case (MAN_EQN):
153 		/* FALLTHROUGH */
154 	case (MAN_TBL):
155 		return(1);
156 	default:
157 		break;
158 	}
159 
160 	if (NULL == (cp = man_valids[m->last->tok].posts))
161 		return(1);
162 	for ( ; *cp; cp++)
163 		if ( ! (*cp)(m, m->last))
164 			return(0);
165 
166 	return(1);
167 }
168 
169 
170 static int
171 check_root(CHKARGS)
172 {
173 
174 	if (MAN_BLINE & m->flags)
175 		man_nmsg(m, n, MANDOCERR_SCOPEEXIT);
176 	else if (MAN_ELINE & m->flags)
177 		man_nmsg(m, n, MANDOCERR_SCOPEEXIT);
178 
179 	m->flags &= ~MAN_BLINE;
180 	m->flags &= ~MAN_ELINE;
181 
182 	if (NULL == m->first->child) {
183 		man_nmsg(m, n, MANDOCERR_NODOCBODY);
184 		return(0);
185 	} else if (NULL == m->meta.title) {
186 		man_nmsg(m, n, MANDOCERR_NOTITLE);
187 
188 		/*
189 		 * If a title hasn't been set, do so now (by
190 		 * implication, date and section also aren't set).
191 		 */
192 
193 	        m->meta.title = mandoc_strdup("unknown");
194 		m->meta.msec = mandoc_strdup("1");
195 		m->meta.date = mandoc_normdate(NULL,
196 		    m->msg, m->data, n->line, n->pos);
197 	}
198 
199 	return(1);
200 }
201 
202 
203 static int
204 check_text(CHKARGS)
205 {
206 	char		*p;
207 	int		 pos, c;
208 	size_t		 sz;
209 
210 	for (p = n->string, pos = n->pos + 1; *p; p++, pos++) {
211 		sz = strcspn(p, "\t\\");
212 		p += (int)sz;
213 
214 		if ('\0' == *p)
215 			break;
216 
217 		pos += (int)sz;
218 
219 		if ('\t' == *p) {
220 			if (MAN_LITERAL & m->flags)
221 				continue;
222 			man_pmsg(m, n->line, pos, MANDOCERR_BADTAB);
223 			continue;
224 		}
225 
226 		/* Check the special character. */
227 
228 		c = mandoc_special(p);
229 		if (c) {
230 			p += c - 1;
231 			pos += c - 1;
232 		} else
233 			man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE);
234 	}
235 
236 	return(1);
237 }
238 
239 
240 #define	INEQ_DEFINE(x, ineq, name) \
241 static int \
242 check_##name(CHKARGS) \
243 { \
244 	if (n->nchild ineq (x)) \
245 		return(1); \
246 	man_vmsg(m, MANDOCERR_ARGCOUNT, n->line, n->pos, \
247 			"line arguments %s %d (have %d)", \
248 			#ineq, (x), n->nchild); \
249 	return(1); \
250 }
251 
252 INEQ_DEFINE(0, ==, eq0)
253 INEQ_DEFINE(1, <=, le1)
254 INEQ_DEFINE(2, >=, ge2)
255 INEQ_DEFINE(5, <=, le5)
256 
257 static int
258 check_ft(CHKARGS)
259 {
260 	char	*cp;
261 	int	 ok;
262 
263 	if (0 == n->nchild)
264 		return(1);
265 
266 	ok = 0;
267 	cp = n->child->string;
268 	switch (*cp) {
269 	case ('1'):
270 		/* FALLTHROUGH */
271 	case ('2'):
272 		/* FALLTHROUGH */
273 	case ('3'):
274 		/* FALLTHROUGH */
275 	case ('4'):
276 		/* FALLTHROUGH */
277 	case ('I'):
278 		/* FALLTHROUGH */
279 	case ('P'):
280 		/* FALLTHROUGH */
281 	case ('R'):
282 		if ('\0' == cp[1])
283 			ok = 1;
284 		break;
285 	case ('B'):
286 		if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2]))
287 			ok = 1;
288 		break;
289 	case ('C'):
290 		if ('W' == cp[1] && '\0' == cp[2])
291 			ok = 1;
292 		break;
293 	default:
294 		break;
295 	}
296 
297 	if (0 == ok) {
298 		man_vmsg(m, MANDOCERR_BADFONT,
299 				n->line, n->pos, "%s", cp);
300 		*cp = '\0';
301 	}
302 
303 	if (1 < n->nchild)
304 		man_vmsg(m, MANDOCERR_ARGCOUNT, n->line, n->pos,
305 				"want one child (have %d)", n->nchild);
306 
307 	return(1);
308 }
309 
310 static int
311 check_sec(CHKARGS)
312 {
313 
314 	if (MAN_HEAD == n->type && 0 == n->nchild) {
315 		man_nmsg(m, n, MANDOCERR_SYNTARGCOUNT);
316 		return(0);
317 	} else if (MAN_BODY == n->type && 0 == n->nchild)
318 		man_nmsg(m, n, MANDOCERR_NOBODY);
319 
320 	return(1);
321 }
322 
323 
324 static int
325 check_part(CHKARGS)
326 {
327 
328 	if (MAN_BODY == n->type && 0 == n->nchild)
329 		man_nmsg(m, n, MANDOCERR_NOBODY);
330 
331 	return(1);
332 }
333 
334 
335 static int
336 check_par(CHKARGS)
337 {
338 
339 	switch (n->type) {
340 	case (MAN_BLOCK):
341 		if (0 == n->body->nchild)
342 			man_node_delete(m, n);
343 		break;
344 	case (MAN_BODY):
345 		if (0 == n->nchild)
346 			man_nmsg(m, n, MANDOCERR_IGNPAR);
347 		break;
348 	case (MAN_HEAD):
349 		if (n->nchild)
350 			man_nmsg(m, n, MANDOCERR_ARGSLOST);
351 		break;
352 	default:
353 		break;
354 	}
355 
356 	return(1);
357 }
358 
359 
360 static int
361 check_bline(CHKARGS)
362 {
363 
364 	assert( ! (MAN_ELINE & m->flags));
365 	if (MAN_BLINE & m->flags) {
366 		man_nmsg(m, n, MANDOCERR_SYNTLINESCOPE);
367 		return(0);
368 	}
369 
370 	return(1);
371 }
372 
373 static int
374 post_TH(CHKARGS)
375 {
376 	const char	*p;
377 	int		 line, pos;
378 
379 	if (m->meta.title)
380 		free(m->meta.title);
381 	if (m->meta.vol)
382 		free(m->meta.vol);
383 	if (m->meta.source)
384 		free(m->meta.source);
385 	if (m->meta.msec)
386 		free(m->meta.msec);
387 	if (m->meta.date)
388 		free(m->meta.date);
389 
390 	line = n->line;
391 	pos = n->pos;
392 	m->meta.title = m->meta.vol = m->meta.date =
393 		m->meta.msec = m->meta.source = NULL;
394 
395 	/* ->TITLE<- MSEC DATE SOURCE VOL */
396 
397 	n = n->child;
398 	if (n && n->string) {
399 		for (p = n->string; '\0' != *p; p++) {
400 			/* Only warn about this once... */
401 			if (isalpha((u_char)*p) && ! isupper((u_char)*p)) {
402 				man_nmsg(m, n, MANDOCERR_UPPERCASE);
403 				break;
404 			}
405 		}
406 		m->meta.title = mandoc_strdup(n->string);
407 	} else
408 		m->meta.title = mandoc_strdup("");
409 
410 	/* TITLE ->MSEC<- DATE SOURCE VOL */
411 
412 	if (n)
413 		n = n->next;
414 	if (n && n->string)
415 		m->meta.msec = mandoc_strdup(n->string);
416 	else
417 		m->meta.msec = mandoc_strdup("");
418 
419 	/* TITLE MSEC ->DATE<- SOURCE VOL */
420 
421 	if (n)
422 		n = n->next;
423 	if (n)
424 		pos = n->pos;
425 	m->meta.date = mandoc_normdate(n ? n->string : NULL,
426 	    m->msg, m->data, line, pos);
427 
428 	/* TITLE MSEC DATE ->SOURCE<- VOL */
429 
430 	if (n && (n = n->next))
431 		m->meta.source = mandoc_strdup(n->string);
432 
433 	/* TITLE MSEC DATE SOURCE ->VOL<- */
434 
435 	if (n && (n = n->next))
436 		m->meta.vol = mandoc_strdup(n->string);
437 
438 	/*
439 	 * Remove the `TH' node after we've processed it for our
440 	 * meta-data.
441 	 */
442 	man_node_delete(m, m->last);
443 	return(1);
444 }
445 
446 static int
447 post_nf(CHKARGS)
448 {
449 
450 	if (MAN_LITERAL & m->flags)
451 		man_nmsg(m, n, MANDOCERR_SCOPEREP);
452 
453 	m->flags |= MAN_LITERAL;
454 	return(1);
455 }
456 
457 static int
458 post_fi(CHKARGS)
459 {
460 
461 	if ( ! (MAN_LITERAL & m->flags))
462 		man_nmsg(m, n, MANDOCERR_WNOSCOPE);
463 
464 	m->flags &= ~MAN_LITERAL;
465 	return(1);
466 }
467 
468 static int
469 post_UC(CHKARGS)
470 {
471 	static const char * const bsd_versions[] = {
472 	    "3rd Berkeley Distribution",
473 	    "4th Berkeley Distribution",
474 	    "4.2 Berkeley Distribution",
475 	    "4.3 Berkeley Distribution",
476 	    "4.4 Berkeley Distribution",
477 	};
478 
479 	const char	*p, *s;
480 
481 	n = n->child;
482 	n = m->last->child;
483 
484 	if (NULL == n || MAN_TEXT != n->type)
485 		p = bsd_versions[0];
486 	else {
487 		s = n->string;
488 		if (0 == strcmp(s, "3"))
489 			p = bsd_versions[0];
490 		else if (0 == strcmp(s, "4"))
491 			p = bsd_versions[1];
492 		else if (0 == strcmp(s, "5"))
493 			p = bsd_versions[2];
494 		else if (0 == strcmp(s, "6"))
495 			p = bsd_versions[3];
496 		else if (0 == strcmp(s, "7"))
497 			p = bsd_versions[4];
498 		else
499 			p = bsd_versions[0];
500 	}
501 
502 	if (m->meta.source)
503 		free(m->meta.source);
504 
505 	m->meta.source = mandoc_strdup(p);
506 	return(1);
507 }
508 
509 static int
510 post_AT(CHKARGS)
511 {
512 	static const char * const unix_versions[] = {
513 	    "7th Edition",
514 	    "System III",
515 	    "System V",
516 	    "System V Release 2",
517 	};
518 
519 	const char	*p, *s;
520 	struct man_node	*nn;
521 
522 	n = n->child;
523 
524 	if (NULL == n || MAN_TEXT != n->type)
525 		p = unix_versions[0];
526 	else {
527 		s = n->string;
528 		if (0 == strcmp(s, "3"))
529 			p = unix_versions[0];
530 		else if (0 == strcmp(s, "4"))
531 			p = unix_versions[1];
532 		else if (0 == strcmp(s, "5")) {
533 			nn = n->next;
534 			if (nn && MAN_TEXT == nn->type && nn->string[0])
535 				p = unix_versions[3];
536 			else
537 				p = unix_versions[2];
538 		} else
539 			p = unix_versions[0];
540 	}
541 
542 	if (m->meta.source)
543 		free(m->meta.source);
544 
545 	m->meta.source = mandoc_strdup(p);
546 	return(1);
547 }
548