1 /* $Id: man_validate.c,v 1.77 2014/08/08 16:17:09 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <errno.h> 23 #include <limits.h> 24 #include <stdarg.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <time.h> 28 29 #include "man.h" 30 #include "mandoc.h" 31 #include "mandoc_aux.h" 32 #include "libman.h" 33 #include "libmandoc.h" 34 35 #define CHKARGS struct man *man, struct man_node *n 36 37 typedef int (*v_check)(CHKARGS); 38 39 static int check_eq0(CHKARGS); 40 static int check_eq2(CHKARGS); 41 static int check_le1(CHKARGS); 42 static int check_le5(CHKARGS); 43 static int check_par(CHKARGS); 44 static int check_part(CHKARGS); 45 static int check_root(CHKARGS); 46 static int check_text(CHKARGS); 47 48 static int post_AT(CHKARGS); 49 static int post_IP(CHKARGS); 50 static int post_vs(CHKARGS); 51 static int post_fi(CHKARGS); 52 static int post_ft(CHKARGS); 53 static int post_nf(CHKARGS); 54 static int post_TH(CHKARGS); 55 static int post_UC(CHKARGS); 56 static int post_UR(CHKARGS); 57 58 static v_check man_valids[MAN_MAX] = { 59 post_vs, /* br */ 60 post_TH, /* TH */ 61 NULL, /* SH */ 62 NULL, /* SS */ 63 NULL, /* TP */ 64 check_par, /* LP */ 65 check_par, /* PP */ 66 check_par, /* P */ 67 post_IP, /* IP */ 68 NULL, /* HP */ 69 NULL, /* SM */ 70 NULL, /* SB */ 71 NULL, /* BI */ 72 NULL, /* IB */ 73 NULL, /* BR */ 74 NULL, /* RB */ 75 NULL, /* R */ 76 NULL, /* B */ 77 NULL, /* I */ 78 NULL, /* IR */ 79 NULL, /* RI */ 80 check_eq0, /* na */ 81 post_vs, /* sp */ 82 post_nf, /* nf */ 83 post_fi, /* fi */ 84 NULL, /* RE */ 85 check_part, /* RS */ 86 NULL, /* DT */ 87 post_UC, /* UC */ 88 check_le1, /* PD */ 89 post_AT, /* AT */ 90 NULL, /* in */ 91 post_ft, /* ft */ 92 check_eq2, /* OP */ 93 post_nf, /* EX */ 94 post_fi, /* EE */ 95 post_UR, /* UR */ 96 NULL, /* UE */ 97 NULL, /* ll */ 98 }; 99 100 101 int 102 man_valid_post(struct man *man) 103 { 104 struct man_node *n; 105 v_check *cp; 106 107 n = man->last; 108 if (n->flags & MAN_VALID) 109 return(1); 110 n->flags |= MAN_VALID; 111 112 switch (n->type) { 113 case MAN_TEXT: 114 return(check_text(man, n)); 115 case MAN_ROOT: 116 return(check_root(man, n)); 117 case MAN_EQN: 118 /* FALLTHROUGH */ 119 case MAN_TBL: 120 return(1); 121 default: 122 cp = man_valids + n->tok; 123 return(*cp ? (*cp)(man, n) : 1); 124 } 125 } 126 127 static int 128 check_root(CHKARGS) 129 { 130 131 assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0); 132 133 if (NULL == man->first->child) 134 mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse, 135 n->line, n->pos, NULL); 136 else 137 man->meta.hasbody = 1; 138 139 if (NULL == man->meta.title) { 140 mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse, 141 n->line, n->pos, NULL); 142 143 /* 144 * If a title hasn't been set, do so now (by 145 * implication, date and section also aren't set). 146 */ 147 148 man->meta.title = mandoc_strdup(""); 149 man->meta.msec = mandoc_strdup(""); 150 man->meta.date = man->quick ? mandoc_strdup("") : 151 mandoc_normdate(man->parse, NULL, n->line, n->pos); 152 } 153 154 return(1); 155 } 156 157 static int 158 check_text(CHKARGS) 159 { 160 char *cp, *p; 161 162 if (MAN_LITERAL & man->flags) 163 return(1); 164 165 cp = n->string; 166 for (p = cp; NULL != (p = strchr(p, '\t')); p++) 167 mandoc_msg(MANDOCERR_FI_TAB, man->parse, 168 n->line, n->pos + (p - cp), NULL); 169 return(1); 170 } 171 172 #define INEQ_DEFINE(x, ineq, name) \ 173 static int \ 174 check_##name(CHKARGS) \ 175 { \ 176 if (n->nchild ineq (x)) \ 177 return(1); \ 178 mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line, n->pos, \ 179 "line arguments %s %d (have %d)", \ 180 #ineq, (x), n->nchild); \ 181 return(1); \ 182 } 183 184 INEQ_DEFINE(0, ==, eq0) 185 INEQ_DEFINE(2, ==, eq2) 186 INEQ_DEFINE(1, <=, le1) 187 INEQ_DEFINE(5, <=, le5) 188 189 static int 190 post_UR(CHKARGS) 191 { 192 193 if (MAN_HEAD == n->type && 1 != n->nchild) 194 mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line, 195 n->pos, "line arguments eq 1 (have %d)", n->nchild); 196 197 return(check_part(man, n)); 198 } 199 200 static int 201 post_ft(CHKARGS) 202 { 203 char *cp; 204 int ok; 205 206 if (0 == n->nchild) 207 return(1); 208 209 ok = 0; 210 cp = n->child->string; 211 switch (*cp) { 212 case '1': 213 /* FALLTHROUGH */ 214 case '2': 215 /* FALLTHROUGH */ 216 case '3': 217 /* FALLTHROUGH */ 218 case '4': 219 /* FALLTHROUGH */ 220 case 'I': 221 /* FALLTHROUGH */ 222 case 'P': 223 /* FALLTHROUGH */ 224 case 'R': 225 if ('\0' == cp[1]) 226 ok = 1; 227 break; 228 case 'B': 229 if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2])) 230 ok = 1; 231 break; 232 case 'C': 233 if ('W' == cp[1] && '\0' == cp[2]) 234 ok = 1; 235 break; 236 default: 237 break; 238 } 239 240 if (0 == ok) { 241 mandoc_vmsg(MANDOCERR_FT_BAD, man->parse, 242 n->line, n->pos, "ft %s", cp); 243 *cp = '\0'; 244 } 245 246 if (1 < n->nchild) 247 mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line, 248 n->pos, "want one child (have %d)", n->nchild); 249 250 return(1); 251 } 252 253 static int 254 check_part(CHKARGS) 255 { 256 257 if (MAN_BODY == n->type && 0 == n->nchild) 258 mandoc_msg(MANDOCERR_ARGCWARN, man->parse, n->line, 259 n->pos, "want children (have none)"); 260 261 return(1); 262 } 263 264 static int 265 check_par(CHKARGS) 266 { 267 268 switch (n->type) { 269 case MAN_BLOCK: 270 if (0 == n->body->nchild) 271 man_node_delete(man, n); 272 break; 273 case MAN_BODY: 274 if (0 == n->nchild) 275 mandoc_vmsg(MANDOCERR_PAR_SKIP, 276 man->parse, n->line, n->pos, 277 "%s empty", man_macronames[n->tok]); 278 break; 279 case MAN_HEAD: 280 if (n->nchild) 281 mandoc_vmsg(MANDOCERR_ARG_SKIP, 282 man->parse, n->line, n->pos, 283 "%s %s%s", man_macronames[n->tok], 284 n->child->string, 285 n->nchild > 1 ? " ..." : ""); 286 break; 287 default: 288 break; 289 } 290 291 return(1); 292 } 293 294 static int 295 post_IP(CHKARGS) 296 { 297 298 switch (n->type) { 299 case MAN_BLOCK: 300 if (0 == n->head->nchild && 0 == n->body->nchild) 301 man_node_delete(man, n); 302 break; 303 case MAN_BODY: 304 if (0 == n->parent->head->nchild && 0 == n->nchild) 305 mandoc_vmsg(MANDOCERR_PAR_SKIP, 306 man->parse, n->line, n->pos, 307 "%s empty", man_macronames[n->tok]); 308 break; 309 default: 310 break; 311 } 312 return(1); 313 } 314 315 static int 316 post_TH(CHKARGS) 317 { 318 struct man_node *nb; 319 const char *p; 320 321 check_le5(man, n); 322 323 free(man->meta.title); 324 free(man->meta.vol); 325 free(man->meta.source); 326 free(man->meta.msec); 327 free(man->meta.date); 328 329 man->meta.title = man->meta.vol = man->meta.date = 330 man->meta.msec = man->meta.source = NULL; 331 332 nb = n; 333 334 /* ->TITLE<- MSEC DATE SOURCE VOL */ 335 336 n = n->child; 337 if (n && n->string) { 338 for (p = n->string; '\0' != *p; p++) { 339 /* Only warn about this once... */ 340 if (isalpha((unsigned char)*p) && 341 ! isupper((unsigned char)*p)) { 342 mandoc_vmsg(MANDOCERR_TITLE_CASE, 343 man->parse, n->line, 344 n->pos + (p - n->string), 345 "TH %s", n->string); 346 break; 347 } 348 } 349 man->meta.title = mandoc_strdup(n->string); 350 } else { 351 man->meta.title = mandoc_strdup(""); 352 mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse, 353 nb->line, nb->pos, "TH"); 354 } 355 356 /* TITLE ->MSEC<- DATE SOURCE VOL */ 357 358 if (n) 359 n = n->next; 360 if (n && n->string) 361 man->meta.msec = mandoc_strdup(n->string); 362 else { 363 man->meta.msec = mandoc_strdup(""); 364 mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse, 365 nb->line, nb->pos, "TH %s", man->meta.title); 366 } 367 368 /* TITLE MSEC ->DATE<- SOURCE VOL */ 369 370 if (n) 371 n = n->next; 372 if (n && n->string && '\0' != n->string[0]) { 373 man->meta.date = man->quick ? 374 mandoc_strdup(n->string) : 375 mandoc_normdate(man->parse, n->string, 376 n->line, n->pos); 377 } else { 378 man->meta.date = mandoc_strdup(""); 379 mandoc_msg(MANDOCERR_DATE_MISSING, man->parse, 380 n ? n->line : nb->line, 381 n ? n->pos : nb->pos, "TH"); 382 } 383 384 /* TITLE MSEC DATE ->SOURCE<- VOL */ 385 386 if (n && (n = n->next)) 387 man->meta.source = mandoc_strdup(n->string); 388 389 /* TITLE MSEC DATE SOURCE ->VOL<- */ 390 /* If missing, use the default VOL name for MSEC. */ 391 392 if (n && (n = n->next)) 393 man->meta.vol = mandoc_strdup(n->string); 394 else if ('\0' != man->meta.msec[0] && 395 (NULL != (p = mandoc_a2msec(man->meta.msec)))) 396 man->meta.vol = mandoc_strdup(p); 397 398 /* 399 * Remove the `TH' node after we've processed it for our 400 * meta-data. 401 */ 402 man_node_delete(man, man->last); 403 return(1); 404 } 405 406 static int 407 post_nf(CHKARGS) 408 { 409 410 check_eq0(man, n); 411 412 if (MAN_LITERAL & man->flags) 413 mandoc_msg(MANDOCERR_NF_SKIP, man->parse, 414 n->line, n->pos, "nf"); 415 416 man->flags |= MAN_LITERAL; 417 return(1); 418 } 419 420 static int 421 post_fi(CHKARGS) 422 { 423 424 check_eq0(man, n); 425 426 if ( ! (MAN_LITERAL & man->flags)) 427 mandoc_msg(MANDOCERR_FI_SKIP, man->parse, 428 n->line, n->pos, "fi"); 429 430 man->flags &= ~MAN_LITERAL; 431 return(1); 432 } 433 434 static int 435 post_UC(CHKARGS) 436 { 437 static const char * const bsd_versions[] = { 438 "3rd Berkeley Distribution", 439 "4th Berkeley Distribution", 440 "4.2 Berkeley Distribution", 441 "4.3 Berkeley Distribution", 442 "4.4 Berkeley Distribution", 443 }; 444 445 const char *p, *s; 446 447 n = n->child; 448 449 if (NULL == n || MAN_TEXT != n->type) 450 p = bsd_versions[0]; 451 else { 452 s = n->string; 453 if (0 == strcmp(s, "3")) 454 p = bsd_versions[0]; 455 else if (0 == strcmp(s, "4")) 456 p = bsd_versions[1]; 457 else if (0 == strcmp(s, "5")) 458 p = bsd_versions[2]; 459 else if (0 == strcmp(s, "6")) 460 p = bsd_versions[3]; 461 else if (0 == strcmp(s, "7")) 462 p = bsd_versions[4]; 463 else 464 p = bsd_versions[0]; 465 } 466 467 free(man->meta.source); 468 man->meta.source = mandoc_strdup(p); 469 return(1); 470 } 471 472 static int 473 post_AT(CHKARGS) 474 { 475 static const char * const unix_versions[] = { 476 "7th Edition", 477 "System III", 478 "System V", 479 "System V Release 2", 480 }; 481 482 const char *p, *s; 483 struct man_node *nn; 484 485 n = n->child; 486 487 if (NULL == n || MAN_TEXT != n->type) 488 p = unix_versions[0]; 489 else { 490 s = n->string; 491 if (0 == strcmp(s, "3")) 492 p = unix_versions[0]; 493 else if (0 == strcmp(s, "4")) 494 p = unix_versions[1]; 495 else if (0 == strcmp(s, "5")) { 496 nn = n->next; 497 if (nn && MAN_TEXT == nn->type && nn->string[0]) 498 p = unix_versions[3]; 499 else 500 p = unix_versions[2]; 501 } else 502 p = unix_versions[0]; 503 } 504 505 free(man->meta.source); 506 man->meta.source = mandoc_strdup(p); 507 return(1); 508 } 509 510 static int 511 post_vs(CHKARGS) 512 { 513 514 if (n->tok == MAN_br) 515 check_eq0(man, n); 516 else 517 check_le1(man, n); 518 519 if (NULL != n->prev) 520 return(1); 521 522 switch (n->parent->tok) { 523 case MAN_SH: 524 /* FALLTHROUGH */ 525 case MAN_SS: 526 mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos, 527 "%s after %s", man_macronames[n->tok], 528 man_macronames[n->parent->tok]); 529 /* FALLTHROUGH */ 530 case MAN_MAX: 531 /* 532 * Don't warn about this because it occurs in pod2man 533 * and would cause considerable (unfixable) warnage. 534 */ 535 man_node_delete(man, n); 536 break; 537 default: 538 break; 539 } 540 541 return(1); 542 } 543