1 /* $Id: man_validate.c,v 1.56 2012/07/29 12:35:05 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <errno.h> 23 #include <limits.h> 24 #include <stdarg.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <time.h> 28 29 #include "man.h" 30 #include "mandoc.h" 31 #include "libman.h" 32 #include "libmandoc.h" 33 34 #define CHKARGS struct man *m, struct man_node *n 35 36 typedef int (*v_check)(CHKARGS); 37 38 struct man_valid { 39 v_check *pres; 40 v_check *posts; 41 }; 42 43 static int check_eq0(CHKARGS); 44 static int check_eq2(CHKARGS); 45 static int check_le1(CHKARGS); 46 static int check_ge2(CHKARGS); 47 static int check_le5(CHKARGS); 48 static int check_par(CHKARGS); 49 static int check_part(CHKARGS); 50 static int check_root(CHKARGS); 51 static void check_text(CHKARGS); 52 53 static int post_AT(CHKARGS); 54 static int post_IP(CHKARGS); 55 static int post_vs(CHKARGS); 56 static int post_fi(CHKARGS); 57 static int post_ft(CHKARGS); 58 static int post_nf(CHKARGS); 59 static int post_sec(CHKARGS); 60 static int post_TH(CHKARGS); 61 static int post_UC(CHKARGS); 62 static int pre_sec(CHKARGS); 63 64 static v_check posts_at[] = { post_AT, NULL }; 65 static v_check posts_br[] = { post_vs, check_eq0, NULL }; 66 static v_check posts_eq0[] = { check_eq0, NULL }; 67 static v_check posts_eq2[] = { check_eq2, NULL }; 68 static v_check posts_fi[] = { check_eq0, post_fi, NULL }; 69 static v_check posts_ft[] = { post_ft, NULL }; 70 static v_check posts_ip[] = { post_IP, NULL }; 71 static v_check posts_le1[] = { check_le1, NULL }; 72 static v_check posts_nf[] = { check_eq0, post_nf, NULL }; 73 static v_check posts_par[] = { check_par, NULL }; 74 static v_check posts_part[] = { check_part, NULL }; 75 static v_check posts_sec[] = { post_sec, NULL }; 76 static v_check posts_sp[] = { post_vs, check_le1, NULL }; 77 static v_check posts_th[] = { check_ge2, check_le5, post_TH, NULL }; 78 static v_check posts_uc[] = { post_UC, NULL }; 79 static v_check pres_sec[] = { pre_sec, NULL }; 80 81 static const struct man_valid man_valids[MAN_MAX] = { 82 { NULL, posts_br }, /* br */ 83 { NULL, posts_th }, /* TH */ 84 { pres_sec, posts_sec }, /* SH */ 85 { pres_sec, posts_sec }, /* SS */ 86 { NULL, NULL }, /* TP */ 87 { NULL, posts_par }, /* LP */ 88 { NULL, posts_par }, /* PP */ 89 { NULL, posts_par }, /* P */ 90 { NULL, posts_ip }, /* IP */ 91 { NULL, NULL }, /* HP */ 92 { NULL, NULL }, /* SM */ 93 { NULL, NULL }, /* SB */ 94 { NULL, NULL }, /* BI */ 95 { NULL, NULL }, /* IB */ 96 { NULL, NULL }, /* BR */ 97 { NULL, NULL }, /* RB */ 98 { NULL, NULL }, /* R */ 99 { NULL, NULL }, /* B */ 100 { NULL, NULL }, /* I */ 101 { NULL, NULL }, /* IR */ 102 { NULL, NULL }, /* RI */ 103 { NULL, posts_eq0 }, /* na */ 104 { NULL, posts_sp }, /* sp */ 105 { NULL, posts_nf }, /* nf */ 106 { NULL, posts_fi }, /* fi */ 107 { NULL, NULL }, /* RE */ 108 { NULL, posts_part }, /* RS */ 109 { NULL, NULL }, /* DT */ 110 { NULL, posts_uc }, /* UC */ 111 { NULL, posts_le1 }, /* PD */ 112 { NULL, posts_at }, /* AT */ 113 { NULL, NULL }, /* in */ 114 { NULL, posts_ft }, /* ft */ 115 { NULL, posts_eq2 }, /* OP */ 116 { NULL, posts_nf }, /* EX */ 117 { NULL, posts_fi }, /* EE */ 118 }; 119 120 121 int 122 man_valid_pre(struct man *m, struct man_node *n) 123 { 124 v_check *cp; 125 126 switch (n->type) { 127 case (MAN_TEXT): 128 /* FALLTHROUGH */ 129 case (MAN_ROOT): 130 /* FALLTHROUGH */ 131 case (MAN_EQN): 132 /* FALLTHROUGH */ 133 case (MAN_TBL): 134 return(1); 135 default: 136 break; 137 } 138 139 if (NULL == (cp = man_valids[n->tok].pres)) 140 return(1); 141 for ( ; *cp; cp++) 142 if ( ! (*cp)(m, n)) 143 return(0); 144 return(1); 145 } 146 147 148 int 149 man_valid_post(struct man *m) 150 { 151 v_check *cp; 152 153 if (MAN_VALID & m->last->flags) 154 return(1); 155 m->last->flags |= MAN_VALID; 156 157 switch (m->last->type) { 158 case (MAN_TEXT): 159 check_text(m, m->last); 160 return(1); 161 case (MAN_ROOT): 162 return(check_root(m, m->last)); 163 case (MAN_EQN): 164 /* FALLTHROUGH */ 165 case (MAN_TBL): 166 return(1); 167 default: 168 break; 169 } 170 171 if (NULL == (cp = man_valids[m->last->tok].posts)) 172 return(1); 173 for ( ; *cp; cp++) 174 if ( ! (*cp)(m, m->last)) 175 return(0); 176 177 return(1); 178 } 179 180 181 static int 182 check_root(CHKARGS) 183 { 184 185 if (MAN_BLINE & m->flags) 186 man_nmsg(m, n, MANDOCERR_SCOPEEXIT); 187 else if (MAN_ELINE & m->flags) 188 man_nmsg(m, n, MANDOCERR_SCOPEEXIT); 189 190 m->flags &= ~MAN_BLINE; 191 m->flags &= ~MAN_ELINE; 192 193 if (NULL == m->first->child) { 194 man_nmsg(m, n, MANDOCERR_NODOCBODY); 195 return(0); 196 } else if (NULL == m->meta.title) { 197 man_nmsg(m, n, MANDOCERR_NOTITLE); 198 199 /* 200 * If a title hasn't been set, do so now (by 201 * implication, date and section also aren't set). 202 */ 203 204 m->meta.title = mandoc_strdup("unknown"); 205 m->meta.msec = mandoc_strdup("1"); 206 m->meta.date = mandoc_normdate 207 (m->parse, NULL, n->line, n->pos); 208 } 209 210 return(1); 211 } 212 213 static void 214 check_text(CHKARGS) 215 { 216 char *cp, *p; 217 218 if (MAN_LITERAL & m->flags) 219 return; 220 221 cp = n->string; 222 for (p = cp; NULL != (p = strchr(p, '\t')); p++) 223 man_pmsg(m, n->line, (int)(p - cp), MANDOCERR_BADTAB); 224 } 225 226 #define INEQ_DEFINE(x, ineq, name) \ 227 static int \ 228 check_##name(CHKARGS) \ 229 { \ 230 if (n->nchild ineq (x)) \ 231 return(1); \ 232 mandoc_vmsg(MANDOCERR_ARGCOUNT, m->parse, n->line, n->pos, \ 233 "line arguments %s %d (have %d)", \ 234 #ineq, (x), n->nchild); \ 235 return(1); \ 236 } 237 238 INEQ_DEFINE(0, ==, eq0) 239 INEQ_DEFINE(2, ==, eq2) 240 INEQ_DEFINE(1, <=, le1) 241 INEQ_DEFINE(2, >=, ge2) 242 INEQ_DEFINE(5, <=, le5) 243 244 static int 245 post_ft(CHKARGS) 246 { 247 char *cp; 248 int ok; 249 250 if (0 == n->nchild) 251 return(1); 252 253 ok = 0; 254 cp = n->child->string; 255 switch (*cp) { 256 case ('1'): 257 /* FALLTHROUGH */ 258 case ('2'): 259 /* FALLTHROUGH */ 260 case ('3'): 261 /* FALLTHROUGH */ 262 case ('4'): 263 /* FALLTHROUGH */ 264 case ('I'): 265 /* FALLTHROUGH */ 266 case ('P'): 267 /* FALLTHROUGH */ 268 case ('R'): 269 if ('\0' == cp[1]) 270 ok = 1; 271 break; 272 case ('B'): 273 if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2])) 274 ok = 1; 275 break; 276 case ('C'): 277 if ('W' == cp[1] && '\0' == cp[2]) 278 ok = 1; 279 break; 280 default: 281 break; 282 } 283 284 if (0 == ok) { 285 mandoc_vmsg 286 (MANDOCERR_BADFONT, m->parse, 287 n->line, n->pos, "%s", cp); 288 *cp = '\0'; 289 } 290 291 if (1 < n->nchild) 292 mandoc_vmsg 293 (MANDOCERR_ARGCOUNT, m->parse, n->line, 294 n->pos, "want one child (have %d)", 295 n->nchild); 296 297 return(1); 298 } 299 300 static int 301 pre_sec(CHKARGS) 302 { 303 304 if (MAN_BLOCK == n->type) 305 m->flags &= ~MAN_LITERAL; 306 return(1); 307 } 308 309 static int 310 post_sec(CHKARGS) 311 { 312 313 if ( ! (MAN_HEAD == n->type && 0 == n->nchild)) 314 return(1); 315 316 man_nmsg(m, n, MANDOCERR_SYNTARGCOUNT); 317 return(0); 318 } 319 320 static int 321 check_part(CHKARGS) 322 { 323 324 if (MAN_BODY == n->type && 0 == n->nchild) 325 mandoc_msg(MANDOCERR_ARGCWARN, m->parse, n->line, 326 n->pos, "want children (have none)"); 327 328 return(1); 329 } 330 331 332 static int 333 check_par(CHKARGS) 334 { 335 336 switch (n->type) { 337 case (MAN_BLOCK): 338 if (0 == n->body->nchild) 339 man_node_delete(m, n); 340 break; 341 case (MAN_BODY): 342 if (0 == n->nchild) 343 man_nmsg(m, n, MANDOCERR_IGNPAR); 344 break; 345 case (MAN_HEAD): 346 if (n->nchild) 347 man_nmsg(m, n, MANDOCERR_ARGSLOST); 348 break; 349 default: 350 break; 351 } 352 353 return(1); 354 } 355 356 static int 357 post_IP(CHKARGS) 358 { 359 360 switch (n->type) { 361 case (MAN_BLOCK): 362 if (0 == n->head->nchild && 0 == n->body->nchild) 363 man_node_delete(m, n); 364 break; 365 case (MAN_BODY): 366 if (0 == n->parent->head->nchild && 0 == n->nchild) 367 man_nmsg(m, n, MANDOCERR_IGNPAR); 368 break; 369 default: 370 break; 371 } 372 return(1); 373 } 374 375 static int 376 post_TH(CHKARGS) 377 { 378 const char *p; 379 int line, pos; 380 381 if (m->meta.title) 382 free(m->meta.title); 383 if (m->meta.vol) 384 free(m->meta.vol); 385 if (m->meta.source) 386 free(m->meta.source); 387 if (m->meta.msec) 388 free(m->meta.msec); 389 if (m->meta.date) 390 free(m->meta.date); 391 392 line = n->line; 393 pos = n->pos; 394 m->meta.title = m->meta.vol = m->meta.date = 395 m->meta.msec = m->meta.source = NULL; 396 397 /* ->TITLE<- MSEC DATE SOURCE VOL */ 398 399 n = n->child; 400 if (n && n->string) { 401 for (p = n->string; '\0' != *p; p++) { 402 /* Only warn about this once... */ 403 if (isalpha((unsigned char)*p) && 404 ! isupper((unsigned char)*p)) { 405 man_nmsg(m, n, MANDOCERR_UPPERCASE); 406 break; 407 } 408 } 409 m->meta.title = mandoc_strdup(n->string); 410 } else 411 m->meta.title = mandoc_strdup(""); 412 413 /* TITLE ->MSEC<- DATE SOURCE VOL */ 414 415 if (n) 416 n = n->next; 417 if (n && n->string) 418 m->meta.msec = mandoc_strdup(n->string); 419 else 420 m->meta.msec = mandoc_strdup(""); 421 422 /* TITLE MSEC ->DATE<- SOURCE VOL */ 423 424 if (n) 425 n = n->next; 426 if (n && n->string && '\0' != n->string[0]) { 427 pos = n->pos; 428 m->meta.date = mandoc_normdate 429 (m->parse, n->string, line, pos); 430 } else 431 m->meta.date = mandoc_strdup(""); 432 433 /* TITLE MSEC DATE ->SOURCE<- VOL */ 434 435 if (n && (n = n->next)) 436 m->meta.source = mandoc_strdup(n->string); 437 438 /* TITLE MSEC DATE SOURCE ->VOL<- */ 439 /* If missing, use the default VOL name for MSEC. */ 440 441 if (n && (n = n->next)) 442 m->meta.vol = mandoc_strdup(n->string); 443 else if ('\0' != m->meta.msec[0] && 444 (NULL != (p = mandoc_a2msec(m->meta.msec)))) 445 m->meta.vol = mandoc_strdup(p); 446 447 /* 448 * Remove the `TH' node after we've processed it for our 449 * meta-data. 450 */ 451 man_node_delete(m, m->last); 452 return(1); 453 } 454 455 static int 456 post_nf(CHKARGS) 457 { 458 459 if (MAN_LITERAL & m->flags) 460 man_nmsg(m, n, MANDOCERR_SCOPEREP); 461 462 m->flags |= MAN_LITERAL; 463 return(1); 464 } 465 466 static int 467 post_fi(CHKARGS) 468 { 469 470 if ( ! (MAN_LITERAL & m->flags)) 471 man_nmsg(m, n, MANDOCERR_WNOSCOPE); 472 473 m->flags &= ~MAN_LITERAL; 474 return(1); 475 } 476 477 static int 478 post_UC(CHKARGS) 479 { 480 static const char * const bsd_versions[] = { 481 "3rd Berkeley Distribution", 482 "4th Berkeley Distribution", 483 "4.2 Berkeley Distribution", 484 "4.3 Berkeley Distribution", 485 "4.4 Berkeley Distribution", 486 }; 487 488 const char *p, *s; 489 490 n = n->child; 491 492 if (NULL == n || MAN_TEXT != n->type) 493 p = bsd_versions[0]; 494 else { 495 s = n->string; 496 if (0 == strcmp(s, "3")) 497 p = bsd_versions[0]; 498 else if (0 == strcmp(s, "4")) 499 p = bsd_versions[1]; 500 else if (0 == strcmp(s, "5")) 501 p = bsd_versions[2]; 502 else if (0 == strcmp(s, "6")) 503 p = bsd_versions[3]; 504 else if (0 == strcmp(s, "7")) 505 p = bsd_versions[4]; 506 else 507 p = bsd_versions[0]; 508 } 509 510 if (m->meta.source) 511 free(m->meta.source); 512 513 m->meta.source = mandoc_strdup(p); 514 return(1); 515 } 516 517 static int 518 post_AT(CHKARGS) 519 { 520 static const char * const unix_versions[] = { 521 "7th Edition", 522 "System III", 523 "System V", 524 "System V Release 2", 525 }; 526 527 const char *p, *s; 528 struct man_node *nn; 529 530 n = n->child; 531 532 if (NULL == n || MAN_TEXT != n->type) 533 p = unix_versions[0]; 534 else { 535 s = n->string; 536 if (0 == strcmp(s, "3")) 537 p = unix_versions[0]; 538 else if (0 == strcmp(s, "4")) 539 p = unix_versions[1]; 540 else if (0 == strcmp(s, "5")) { 541 nn = n->next; 542 if (nn && MAN_TEXT == nn->type && nn->string[0]) 543 p = unix_versions[3]; 544 else 545 p = unix_versions[2]; 546 } else 547 p = unix_versions[0]; 548 } 549 550 if (m->meta.source) 551 free(m->meta.source); 552 553 m->meta.source = mandoc_strdup(p); 554 return(1); 555 } 556 557 static int 558 post_vs(CHKARGS) 559 { 560 561 if (NULL != n->prev) 562 return(1); 563 564 switch (n->parent->tok) { 565 case (MAN_SH): 566 /* FALLTHROUGH */ 567 case (MAN_SS): 568 man_nmsg(m, n, MANDOCERR_IGNPAR); 569 /* FALLTHROUGH */ 570 case (MAN_MAX): 571 /* 572 * Don't warn about this because it occurs in pod2man 573 * and would cause considerable (unfixable) warnage. 574 */ 575 man_node_delete(m, n); 576 break; 577 default: 578 break; 579 } 580 581 return(1); 582 } 583