1 /* $OpenBSD: man_validate.c,v 1.93 2016/01/08 17:48:04 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2016 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <errno.h> 23 #include <limits.h> 24 #include <stdarg.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <time.h> 28 29 #include "mandoc_aux.h" 30 #include "mandoc.h" 31 #include "roff.h" 32 #include "man.h" 33 #include "libmandoc.h" 34 #include "roff_int.h" 35 #include "libman.h" 36 37 #define CHKARGS struct roff_man *man, struct roff_node *n 38 39 typedef void (*v_check)(CHKARGS); 40 41 static void check_par(CHKARGS); 42 static void check_part(CHKARGS); 43 static void check_root(CHKARGS); 44 static void check_text(CHKARGS); 45 46 static void post_AT(CHKARGS); 47 static void post_IP(CHKARGS); 48 static void post_vs(CHKARGS); 49 static void post_ft(CHKARGS); 50 static void post_OP(CHKARGS); 51 static void post_TH(CHKARGS); 52 static void post_UC(CHKARGS); 53 static void post_UR(CHKARGS); 54 55 static v_check man_valids[MAN_MAX] = { 56 post_vs, /* br */ 57 post_TH, /* TH */ 58 NULL, /* SH */ 59 NULL, /* SS */ 60 NULL, /* TP */ 61 check_par, /* LP */ 62 check_par, /* PP */ 63 check_par, /* P */ 64 post_IP, /* IP */ 65 NULL, /* HP */ 66 NULL, /* SM */ 67 NULL, /* SB */ 68 NULL, /* BI */ 69 NULL, /* IB */ 70 NULL, /* BR */ 71 NULL, /* RB */ 72 NULL, /* R */ 73 NULL, /* B */ 74 NULL, /* I */ 75 NULL, /* IR */ 76 NULL, /* RI */ 77 post_vs, /* sp */ 78 NULL, /* nf */ 79 NULL, /* fi */ 80 NULL, /* RE */ 81 check_part, /* RS */ 82 NULL, /* DT */ 83 post_UC, /* UC */ 84 NULL, /* PD */ 85 post_AT, /* AT */ 86 NULL, /* in */ 87 post_ft, /* ft */ 88 post_OP, /* OP */ 89 NULL, /* EX */ 90 NULL, /* EE */ 91 post_UR, /* UR */ 92 NULL, /* UE */ 93 NULL, /* ll */ 94 }; 95 96 97 void 98 man_node_validate(struct roff_man *man) 99 { 100 struct roff_node *n; 101 v_check *cp; 102 103 n = man->last; 104 man->last = man->last->child; 105 while (man->last != NULL) { 106 man_node_validate(man); 107 if (man->last == n) 108 man->last = man->last->child; 109 else 110 man->last = man->last->next; 111 } 112 113 man->last = n; 114 man->next = ROFF_NEXT_SIBLING; 115 switch (n->type) { 116 case ROFFT_TEXT: 117 check_text(man, n); 118 break; 119 case ROFFT_ROOT: 120 check_root(man, n); 121 break; 122 case ROFFT_EQN: 123 case ROFFT_TBL: 124 break; 125 default: 126 cp = man_valids + n->tok; 127 if (*cp) 128 (*cp)(man, n); 129 if (man->last == n) 130 man_state(man, n); 131 break; 132 } 133 } 134 135 static void 136 check_root(CHKARGS) 137 { 138 139 assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0); 140 141 if (NULL == man->first->child) 142 mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse, 143 n->line, n->pos, NULL); 144 else 145 man->meta.hasbody = 1; 146 147 if (NULL == man->meta.title) { 148 mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse, 149 n->line, n->pos, NULL); 150 151 /* 152 * If a title hasn't been set, do so now (by 153 * implication, date and section also aren't set). 154 */ 155 156 man->meta.title = mandoc_strdup(""); 157 man->meta.msec = mandoc_strdup(""); 158 man->meta.date = man->quick ? mandoc_strdup("") : 159 mandoc_normdate(man->parse, NULL, n->line, n->pos); 160 } 161 } 162 163 static void 164 check_text(CHKARGS) 165 { 166 char *cp, *p; 167 168 if (MAN_LITERAL & man->flags) 169 return; 170 171 cp = n->string; 172 for (p = cp; NULL != (p = strchr(p, '\t')); p++) 173 mandoc_msg(MANDOCERR_FI_TAB, man->parse, 174 n->line, n->pos + (p - cp), NULL); 175 } 176 177 static void 178 post_OP(CHKARGS) 179 { 180 181 if (n->child == NULL) 182 mandoc_msg(MANDOCERR_OP_EMPTY, man->parse, 183 n->line, n->pos, "OP"); 184 else if (n->child->next != NULL && n->child->next->next != NULL) { 185 n = n->child->next->next; 186 mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse, 187 n->line, n->pos, "OP ... %s", n->string); 188 } 189 } 190 191 static void 192 post_UR(CHKARGS) 193 { 194 195 if (n->type == ROFFT_HEAD && n->child == NULL) 196 mandoc_vmsg(MANDOCERR_UR_NOHEAD, man->parse, 197 n->line, n->pos, "UR"); 198 check_part(man, n); 199 } 200 201 static void 202 post_ft(CHKARGS) 203 { 204 char *cp; 205 int ok; 206 207 if (n->child == NULL) 208 return; 209 210 ok = 0; 211 cp = n->child->string; 212 switch (*cp) { 213 case '1': 214 case '2': 215 case '3': 216 case '4': 217 case 'I': 218 case 'P': 219 case 'R': 220 if ('\0' == cp[1]) 221 ok = 1; 222 break; 223 case 'B': 224 if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2])) 225 ok = 1; 226 break; 227 case 'C': 228 if ('W' == cp[1] && '\0' == cp[2]) 229 ok = 1; 230 break; 231 default: 232 break; 233 } 234 235 if (0 == ok) { 236 mandoc_vmsg(MANDOCERR_FT_BAD, man->parse, 237 n->line, n->pos, "ft %s", cp); 238 *cp = '\0'; 239 } 240 } 241 242 static void 243 check_part(CHKARGS) 244 { 245 246 if (n->type == ROFFT_BODY && n->child == NULL) 247 mandoc_msg(MANDOCERR_BLK_EMPTY, man->parse, 248 n->line, n->pos, man_macronames[n->tok]); 249 } 250 251 static void 252 check_par(CHKARGS) 253 { 254 255 switch (n->type) { 256 case ROFFT_BLOCK: 257 if (n->body->child == NULL) 258 roff_node_delete(man, n); 259 break; 260 case ROFFT_BODY: 261 if (n->child == NULL) 262 mandoc_vmsg(MANDOCERR_PAR_SKIP, 263 man->parse, n->line, n->pos, 264 "%s empty", man_macronames[n->tok]); 265 break; 266 case ROFFT_HEAD: 267 if (n->child != NULL) 268 mandoc_vmsg(MANDOCERR_ARG_SKIP, 269 man->parse, n->line, n->pos, 270 "%s %s%s", man_macronames[n->tok], 271 n->child->string, 272 n->child->next != NULL ? " ..." : ""); 273 break; 274 default: 275 break; 276 } 277 } 278 279 static void 280 post_IP(CHKARGS) 281 { 282 283 switch (n->type) { 284 case ROFFT_BLOCK: 285 if (n->head->child == NULL && n->body->child == NULL) 286 roff_node_delete(man, n); 287 break; 288 case ROFFT_BODY: 289 if (n->parent->head->child == NULL && n->child == NULL) 290 mandoc_vmsg(MANDOCERR_PAR_SKIP, 291 man->parse, n->line, n->pos, 292 "%s empty", man_macronames[n->tok]); 293 break; 294 default: 295 break; 296 } 297 } 298 299 static void 300 post_TH(CHKARGS) 301 { 302 struct roff_node *nb; 303 const char *p; 304 305 free(man->meta.title); 306 free(man->meta.vol); 307 free(man->meta.os); 308 free(man->meta.msec); 309 free(man->meta.date); 310 311 man->meta.title = man->meta.vol = man->meta.date = 312 man->meta.msec = man->meta.os = NULL; 313 314 nb = n; 315 316 /* ->TITLE<- MSEC DATE OS VOL */ 317 318 n = n->child; 319 if (n && n->string) { 320 for (p = n->string; '\0' != *p; p++) { 321 /* Only warn about this once... */ 322 if (isalpha((unsigned char)*p) && 323 ! isupper((unsigned char)*p)) { 324 mandoc_vmsg(MANDOCERR_TITLE_CASE, 325 man->parse, n->line, 326 n->pos + (p - n->string), 327 "TH %s", n->string); 328 break; 329 } 330 } 331 man->meta.title = mandoc_strdup(n->string); 332 } else { 333 man->meta.title = mandoc_strdup(""); 334 mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse, 335 nb->line, nb->pos, "TH"); 336 } 337 338 /* TITLE ->MSEC<- DATE OS VOL */ 339 340 if (n) 341 n = n->next; 342 if (n && n->string) 343 man->meta.msec = mandoc_strdup(n->string); 344 else { 345 man->meta.msec = mandoc_strdup(""); 346 mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse, 347 nb->line, nb->pos, "TH %s", man->meta.title); 348 } 349 350 /* TITLE MSEC ->DATE<- OS VOL */ 351 352 if (n) 353 n = n->next; 354 if (n && n->string && '\0' != n->string[0]) { 355 man->meta.date = man->quick ? 356 mandoc_strdup(n->string) : 357 mandoc_normdate(man->parse, n->string, 358 n->line, n->pos); 359 } else { 360 man->meta.date = mandoc_strdup(""); 361 mandoc_msg(MANDOCERR_DATE_MISSING, man->parse, 362 n ? n->line : nb->line, 363 n ? n->pos : nb->pos, "TH"); 364 } 365 366 /* TITLE MSEC DATE ->OS<- VOL */ 367 368 if (n && (n = n->next)) 369 man->meta.os = mandoc_strdup(n->string); 370 else if (man->defos != NULL) 371 man->meta.os = mandoc_strdup(man->defos); 372 373 /* TITLE MSEC DATE OS ->VOL<- */ 374 /* If missing, use the default VOL name for MSEC. */ 375 376 if (n && (n = n->next)) 377 man->meta.vol = mandoc_strdup(n->string); 378 else if ('\0' != man->meta.msec[0] && 379 (NULL != (p = mandoc_a2msec(man->meta.msec)))) 380 man->meta.vol = mandoc_strdup(p); 381 382 if (n != NULL && (n = n->next) != NULL) 383 mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse, 384 n->line, n->pos, "TH ... %s", n->string); 385 386 /* 387 * Remove the `TH' node after we've processed it for our 388 * meta-data. 389 */ 390 roff_node_delete(man, man->last); 391 } 392 393 static void 394 post_UC(CHKARGS) 395 { 396 static const char * const bsd_versions[] = { 397 "3rd Berkeley Distribution", 398 "4th Berkeley Distribution", 399 "4.2 Berkeley Distribution", 400 "4.3 Berkeley Distribution", 401 "4.4 Berkeley Distribution", 402 }; 403 404 const char *p, *s; 405 406 n = n->child; 407 408 if (n == NULL || n->type != ROFFT_TEXT) 409 p = bsd_versions[0]; 410 else { 411 s = n->string; 412 if (0 == strcmp(s, "3")) 413 p = bsd_versions[0]; 414 else if (0 == strcmp(s, "4")) 415 p = bsd_versions[1]; 416 else if (0 == strcmp(s, "5")) 417 p = bsd_versions[2]; 418 else if (0 == strcmp(s, "6")) 419 p = bsd_versions[3]; 420 else if (0 == strcmp(s, "7")) 421 p = bsd_versions[4]; 422 else 423 p = bsd_versions[0]; 424 } 425 426 free(man->meta.os); 427 man->meta.os = mandoc_strdup(p); 428 } 429 430 static void 431 post_AT(CHKARGS) 432 { 433 static const char * const unix_versions[] = { 434 "7th Edition", 435 "System III", 436 "System V", 437 "System V Release 2", 438 }; 439 440 struct roff_node *nn; 441 const char *p, *s; 442 443 n = n->child; 444 445 if (n == NULL || n->type != ROFFT_TEXT) 446 p = unix_versions[0]; 447 else { 448 s = n->string; 449 if (0 == strcmp(s, "3")) 450 p = unix_versions[0]; 451 else if (0 == strcmp(s, "4")) 452 p = unix_versions[1]; 453 else if (0 == strcmp(s, "5")) { 454 nn = n->next; 455 if (nn != NULL && 456 nn->type == ROFFT_TEXT && 457 nn->string[0] != '\0') 458 p = unix_versions[3]; 459 else 460 p = unix_versions[2]; 461 } else 462 p = unix_versions[0]; 463 } 464 465 free(man->meta.os); 466 man->meta.os = mandoc_strdup(p); 467 } 468 469 static void 470 post_vs(CHKARGS) 471 { 472 473 if (NULL != n->prev) 474 return; 475 476 switch (n->parent->tok) { 477 case MAN_SH: 478 case MAN_SS: 479 mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos, 480 "%s after %s", man_macronames[n->tok], 481 man_macronames[n->parent->tok]); 482 /* FALLTHROUGH */ 483 case TOKEN_NONE: 484 /* 485 * Don't warn about this because it occurs in pod2man 486 * and would cause considerable (unfixable) warnage. 487 */ 488 roff_node_delete(man, n); 489 break; 490 default: 491 break; 492 } 493 } 494