1 /* $Id: man_validate.c,v 1.43 2011/04/21 22:59:54 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <errno.h> 23 #include <limits.h> 24 #include <stdarg.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <time.h> 28 29 #include "mandoc.h" 30 #include "libman.h" 31 #include "libmandoc.h" 32 33 #define CHKARGS struct man *m, struct man_node *n 34 35 typedef int (*v_check)(CHKARGS); 36 37 struct man_valid { 38 v_check *pres; 39 v_check *posts; 40 }; 41 42 static int check_bline(CHKARGS); 43 static int check_eq0(CHKARGS); 44 static int check_ft(CHKARGS); 45 static int check_le1(CHKARGS); 46 static int check_ge2(CHKARGS); 47 static int check_le5(CHKARGS); 48 static int check_par(CHKARGS); 49 static int check_part(CHKARGS); 50 static int check_root(CHKARGS); 51 static int check_sec(CHKARGS); 52 static int check_text(CHKARGS); 53 54 static int post_AT(CHKARGS); 55 static int post_fi(CHKARGS); 56 static int post_nf(CHKARGS); 57 static int post_TH(CHKARGS); 58 static int post_UC(CHKARGS); 59 60 static v_check posts_at[] = { post_AT, NULL }; 61 static v_check posts_eq0[] = { check_eq0, NULL }; 62 static v_check posts_fi[] = { check_eq0, post_fi, NULL }; 63 static v_check posts_le1[] = { check_le1, NULL }; 64 static v_check posts_ft[] = { check_ft, NULL }; 65 static v_check posts_nf[] = { check_eq0, post_nf, NULL }; 66 static v_check posts_par[] = { check_par, NULL }; 67 static v_check posts_part[] = { check_part, NULL }; 68 static v_check posts_sec[] = { check_sec, NULL }; 69 static v_check posts_th[] = { check_ge2, check_le5, post_TH, NULL }; 70 static v_check posts_uc[] = { post_UC, NULL }; 71 static v_check pres_bline[] = { check_bline, NULL }; 72 73 74 static const struct man_valid man_valids[MAN_MAX] = { 75 { NULL, posts_eq0 }, /* br */ 76 { pres_bline, posts_th }, /* TH */ 77 { pres_bline, posts_sec }, /* SH */ 78 { pres_bline, posts_sec }, /* SS */ 79 { pres_bline, NULL }, /* TP */ 80 { pres_bline, posts_par }, /* LP */ 81 { pres_bline, posts_par }, /* PP */ 82 { pres_bline, posts_par }, /* P */ 83 { pres_bline, NULL }, /* IP */ 84 { pres_bline, NULL }, /* HP */ 85 { NULL, NULL }, /* SM */ 86 { NULL, NULL }, /* SB */ 87 { NULL, NULL }, /* BI */ 88 { NULL, NULL }, /* IB */ 89 { NULL, NULL }, /* BR */ 90 { NULL, NULL }, /* RB */ 91 { NULL, NULL }, /* R */ 92 { NULL, NULL }, /* B */ 93 { NULL, NULL }, /* I */ 94 { NULL, NULL }, /* IR */ 95 { NULL, NULL }, /* RI */ 96 { NULL, posts_eq0 }, /* na */ /* FIXME: should warn only. */ 97 { NULL, posts_le1 }, /* sp */ /* FIXME: should warn only. */ 98 { pres_bline, posts_nf }, /* nf */ 99 { pres_bline, posts_fi }, /* fi */ 100 { NULL, NULL }, /* RE */ 101 { NULL, posts_part }, /* RS */ 102 { NULL, NULL }, /* DT */ 103 { NULL, posts_uc }, /* UC */ 104 { NULL, NULL }, /* PD */ 105 { NULL, posts_at }, /* AT */ 106 { NULL, NULL }, /* in */ 107 { NULL, posts_ft }, /* ft */ 108 }; 109 110 111 int 112 man_valid_pre(struct man *m, struct man_node *n) 113 { 114 v_check *cp; 115 116 switch (n->type) { 117 case (MAN_TEXT): 118 /* FALLTHROUGH */ 119 case (MAN_ROOT): 120 /* FALLTHROUGH */ 121 case (MAN_EQN): 122 /* FALLTHROUGH */ 123 case (MAN_TBL): 124 return(1); 125 default: 126 break; 127 } 128 129 if (NULL == (cp = man_valids[n->tok].pres)) 130 return(1); 131 for ( ; *cp; cp++) 132 if ( ! (*cp)(m, n)) 133 return(0); 134 return(1); 135 } 136 137 138 int 139 man_valid_post(struct man *m) 140 { 141 v_check *cp; 142 143 if (MAN_VALID & m->last->flags) 144 return(1); 145 m->last->flags |= MAN_VALID; 146 147 switch (m->last->type) { 148 case (MAN_TEXT): 149 return(check_text(m, m->last)); 150 case (MAN_ROOT): 151 return(check_root(m, m->last)); 152 case (MAN_EQN): 153 /* FALLTHROUGH */ 154 case (MAN_TBL): 155 return(1); 156 default: 157 break; 158 } 159 160 if (NULL == (cp = man_valids[m->last->tok].posts)) 161 return(1); 162 for ( ; *cp; cp++) 163 if ( ! (*cp)(m, m->last)) 164 return(0); 165 166 return(1); 167 } 168 169 170 static int 171 check_root(CHKARGS) 172 { 173 174 if (MAN_BLINE & m->flags) 175 man_nmsg(m, n, MANDOCERR_SCOPEEXIT); 176 else if (MAN_ELINE & m->flags) 177 man_nmsg(m, n, MANDOCERR_SCOPEEXIT); 178 179 m->flags &= ~MAN_BLINE; 180 m->flags &= ~MAN_ELINE; 181 182 if (NULL == m->first->child) { 183 man_nmsg(m, n, MANDOCERR_NODOCBODY); 184 return(0); 185 } else if (NULL == m->meta.title) { 186 man_nmsg(m, n, MANDOCERR_NOTITLE); 187 188 /* 189 * If a title hasn't been set, do so now (by 190 * implication, date and section also aren't set). 191 */ 192 193 m->meta.title = mandoc_strdup("unknown"); 194 m->meta.msec = mandoc_strdup("1"); 195 m->meta.date = mandoc_normdate(NULL, 196 m->msg, m->data, n->line, n->pos); 197 } 198 199 return(1); 200 } 201 202 203 static int 204 check_text(CHKARGS) 205 { 206 char *p; 207 int pos, c; 208 size_t sz; 209 210 for (p = n->string, pos = n->pos + 1; *p; p++, pos++) { 211 sz = strcspn(p, "\t\\"); 212 p += (int)sz; 213 214 if ('\0' == *p) 215 break; 216 217 pos += (int)sz; 218 219 if ('\t' == *p) { 220 if (MAN_LITERAL & m->flags) 221 continue; 222 man_pmsg(m, n->line, pos, MANDOCERR_BADTAB); 223 continue; 224 } 225 226 /* Check the special character. */ 227 228 c = mandoc_special(p); 229 if (c) { 230 p += c - 1; 231 pos += c - 1; 232 } else 233 man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE); 234 } 235 236 return(1); 237 } 238 239 240 #define INEQ_DEFINE(x, ineq, name) \ 241 static int \ 242 check_##name(CHKARGS) \ 243 { \ 244 if (n->nchild ineq (x)) \ 245 return(1); \ 246 man_vmsg(m, MANDOCERR_ARGCOUNT, n->line, n->pos, \ 247 "line arguments %s %d (have %d)", \ 248 #ineq, (x), n->nchild); \ 249 return(1); \ 250 } 251 252 INEQ_DEFINE(0, ==, eq0) 253 INEQ_DEFINE(1, <=, le1) 254 INEQ_DEFINE(2, >=, ge2) 255 INEQ_DEFINE(5, <=, le5) 256 257 static int 258 check_ft(CHKARGS) 259 { 260 char *cp; 261 int ok; 262 263 if (0 == n->nchild) 264 return(1); 265 266 ok = 0; 267 cp = n->child->string; 268 switch (*cp) { 269 case ('1'): 270 /* FALLTHROUGH */ 271 case ('2'): 272 /* FALLTHROUGH */ 273 case ('3'): 274 /* FALLTHROUGH */ 275 case ('4'): 276 /* FALLTHROUGH */ 277 case ('I'): 278 /* FALLTHROUGH */ 279 case ('P'): 280 /* FALLTHROUGH */ 281 case ('R'): 282 if ('\0' == cp[1]) 283 ok = 1; 284 break; 285 case ('B'): 286 if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2])) 287 ok = 1; 288 break; 289 case ('C'): 290 if ('W' == cp[1] && '\0' == cp[2]) 291 ok = 1; 292 break; 293 default: 294 break; 295 } 296 297 if (0 == ok) { 298 man_vmsg(m, MANDOCERR_BADFONT, 299 n->line, n->pos, "%s", cp); 300 *cp = '\0'; 301 } 302 303 if (1 < n->nchild) 304 man_vmsg(m, MANDOCERR_ARGCOUNT, n->line, n->pos, 305 "want one child (have %d)", n->nchild); 306 307 return(1); 308 } 309 310 static int 311 check_sec(CHKARGS) 312 { 313 314 if (MAN_HEAD == n->type && 0 == n->nchild) { 315 man_nmsg(m, n, MANDOCERR_SYNTARGCOUNT); 316 return(0); 317 } else if (MAN_BODY == n->type && 0 == n->nchild) 318 man_nmsg(m, n, MANDOCERR_NOBODY); 319 320 return(1); 321 } 322 323 324 static int 325 check_part(CHKARGS) 326 { 327 328 if (MAN_BODY == n->type && 0 == n->nchild) 329 man_nmsg(m, n, MANDOCERR_NOBODY); 330 331 return(1); 332 } 333 334 335 static int 336 check_par(CHKARGS) 337 { 338 339 switch (n->type) { 340 case (MAN_BLOCK): 341 if (0 == n->body->nchild) 342 man_node_delete(m, n); 343 break; 344 case (MAN_BODY): 345 if (0 == n->nchild) 346 man_nmsg(m, n, MANDOCERR_IGNPAR); 347 break; 348 case (MAN_HEAD): 349 if (n->nchild) 350 man_nmsg(m, n, MANDOCERR_ARGSLOST); 351 break; 352 default: 353 break; 354 } 355 356 return(1); 357 } 358 359 360 static int 361 check_bline(CHKARGS) 362 { 363 364 assert( ! (MAN_ELINE & m->flags)); 365 if (MAN_BLINE & m->flags) { 366 man_nmsg(m, n, MANDOCERR_SYNTLINESCOPE); 367 return(0); 368 } 369 370 return(1); 371 } 372 373 static int 374 post_TH(CHKARGS) 375 { 376 const char *p; 377 int line, pos; 378 379 if (m->meta.title) 380 free(m->meta.title); 381 if (m->meta.vol) 382 free(m->meta.vol); 383 if (m->meta.source) 384 free(m->meta.source); 385 if (m->meta.msec) 386 free(m->meta.msec); 387 if (m->meta.date) 388 free(m->meta.date); 389 390 line = n->line; 391 pos = n->pos; 392 m->meta.title = m->meta.vol = m->meta.date = 393 m->meta.msec = m->meta.source = NULL; 394 395 /* ->TITLE<- MSEC DATE SOURCE VOL */ 396 397 n = n->child; 398 if (n && n->string) { 399 for (p = n->string; '\0' != *p; p++) { 400 /* Only warn about this once... */ 401 if (isalpha((u_char)*p) && ! isupper((u_char)*p)) { 402 man_nmsg(m, n, MANDOCERR_UPPERCASE); 403 break; 404 } 405 } 406 m->meta.title = mandoc_strdup(n->string); 407 } else 408 m->meta.title = mandoc_strdup(""); 409 410 /* TITLE ->MSEC<- DATE SOURCE VOL */ 411 412 if (n) 413 n = n->next; 414 if (n && n->string) 415 m->meta.msec = mandoc_strdup(n->string); 416 else 417 m->meta.msec = mandoc_strdup(""); 418 419 /* TITLE MSEC ->DATE<- SOURCE VOL */ 420 421 if (n) 422 n = n->next; 423 if (n) 424 pos = n->pos; 425 m->meta.date = mandoc_normdate(n ? n->string : NULL, 426 m->msg, m->data, line, pos); 427 428 /* TITLE MSEC DATE ->SOURCE<- VOL */ 429 430 if (n && (n = n->next)) 431 m->meta.source = mandoc_strdup(n->string); 432 433 /* TITLE MSEC DATE SOURCE ->VOL<- */ 434 435 if (n && (n = n->next)) 436 m->meta.vol = mandoc_strdup(n->string); 437 438 /* 439 * Remove the `TH' node after we've processed it for our 440 * meta-data. 441 */ 442 man_node_delete(m, m->last); 443 return(1); 444 } 445 446 static int 447 post_nf(CHKARGS) 448 { 449 450 if (MAN_LITERAL & m->flags) 451 man_nmsg(m, n, MANDOCERR_SCOPEREP); 452 453 m->flags |= MAN_LITERAL; 454 return(1); 455 } 456 457 static int 458 post_fi(CHKARGS) 459 { 460 461 if ( ! (MAN_LITERAL & m->flags)) 462 man_nmsg(m, n, MANDOCERR_WNOSCOPE); 463 464 m->flags &= ~MAN_LITERAL; 465 return(1); 466 } 467 468 static int 469 post_UC(CHKARGS) 470 { 471 static const char * const bsd_versions[] = { 472 "3rd Berkeley Distribution", 473 "4th Berkeley Distribution", 474 "4.2 Berkeley Distribution", 475 "4.3 Berkeley Distribution", 476 "4.4 Berkeley Distribution", 477 }; 478 479 const char *p, *s; 480 481 n = n->child; 482 n = m->last->child; 483 484 if (NULL == n || MAN_TEXT != n->type) 485 p = bsd_versions[0]; 486 else { 487 s = n->string; 488 if (0 == strcmp(s, "3")) 489 p = bsd_versions[0]; 490 else if (0 == strcmp(s, "4")) 491 p = bsd_versions[1]; 492 else if (0 == strcmp(s, "5")) 493 p = bsd_versions[2]; 494 else if (0 == strcmp(s, "6")) 495 p = bsd_versions[3]; 496 else if (0 == strcmp(s, "7")) 497 p = bsd_versions[4]; 498 else 499 p = bsd_versions[0]; 500 } 501 502 if (m->meta.source) 503 free(m->meta.source); 504 505 m->meta.source = mandoc_strdup(p); 506 return(1); 507 } 508 509 static int 510 post_AT(CHKARGS) 511 { 512 static const char * const unix_versions[] = { 513 "7th Edition", 514 "System III", 515 "System V", 516 "System V Release 2", 517 }; 518 519 const char *p, *s; 520 struct man_node *nn; 521 522 n = n->child; 523 524 if (NULL == n || MAN_TEXT != n->type) 525 p = unix_versions[0]; 526 else { 527 s = n->string; 528 if (0 == strcmp(s, "3")) 529 p = unix_versions[0]; 530 else if (0 == strcmp(s, "4")) 531 p = unix_versions[1]; 532 else if (0 == strcmp(s, "5")) { 533 nn = n->next; 534 if (nn && MAN_TEXT == nn->type && nn->string[0]) 535 p = unix_versions[3]; 536 else 537 p = unix_versions[2]; 538 } else 539 p = unix_versions[0]; 540 } 541 542 if (m->meta.source) 543 free(m->meta.source); 544 545 m->meta.source = mandoc_strdup(p); 546 return(1); 547 } 548