1 /* $OpenBSD: man_validate.c,v 1.120 2020/01/19 16:16:32 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2020 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <errno.h> 23 #include <limits.h> 24 #include <stdarg.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 #include <time.h> 29 30 #include "mandoc_aux.h" 31 #include "mandoc.h" 32 #include "roff.h" 33 #include "man.h" 34 #include "libmandoc.h" 35 #include "roff_int.h" 36 #include "libman.h" 37 38 #define CHKARGS struct roff_man *man, struct roff_node *n 39 40 typedef void (*v_check)(CHKARGS); 41 42 static void check_abort(CHKARGS) __attribute__((__noreturn__)); 43 static void check_par(CHKARGS); 44 static void check_part(CHKARGS); 45 static void check_root(CHKARGS); 46 static void check_text(CHKARGS); 47 48 static void post_AT(CHKARGS); 49 static void post_EE(CHKARGS); 50 static void post_EX(CHKARGS); 51 static void post_IP(CHKARGS); 52 static void post_OP(CHKARGS); 53 static void post_SH(CHKARGS); 54 static void post_TH(CHKARGS); 55 static void post_UC(CHKARGS); 56 static void post_UR(CHKARGS); 57 static void post_in(CHKARGS); 58 59 static const v_check man_valids[MAN_MAX - MAN_TH] = { 60 post_TH, /* TH */ 61 post_SH, /* SH */ 62 post_SH, /* SS */ 63 NULL, /* TP */ 64 NULL, /* TQ */ 65 check_abort,/* LP */ 66 check_par, /* PP */ 67 check_abort,/* P */ 68 post_IP, /* IP */ 69 NULL, /* HP */ 70 NULL, /* SM */ 71 NULL, /* SB */ 72 NULL, /* BI */ 73 NULL, /* IB */ 74 NULL, /* BR */ 75 NULL, /* RB */ 76 NULL, /* R */ 77 NULL, /* B */ 78 NULL, /* I */ 79 NULL, /* IR */ 80 NULL, /* RI */ 81 NULL, /* RE */ 82 check_part, /* RS */ 83 NULL, /* DT */ 84 post_UC, /* UC */ 85 NULL, /* PD */ 86 post_AT, /* AT */ 87 post_in, /* in */ 88 NULL, /* SY */ 89 NULL, /* YS */ 90 post_OP, /* OP */ 91 post_EX, /* EX */ 92 post_EE, /* EE */ 93 post_UR, /* UR */ 94 NULL, /* UE */ 95 post_UR, /* MT */ 96 NULL, /* ME */ 97 }; 98 99 100 /* Validate the subtree rooted at man->last. */ 101 void 102 man_validate(struct roff_man *man) 103 { 104 struct roff_node *n; 105 const v_check *cp; 106 107 /* 108 * Translate obsolete macros such that later code 109 * does not need to look for them. 110 */ 111 112 n = man->last; 113 switch (n->tok) { 114 case MAN_LP: 115 case MAN_P: 116 n->tok = MAN_PP; 117 break; 118 default: 119 break; 120 } 121 122 /* 123 * Iterate over all children, recursing into each one 124 * in turn, depth-first. 125 */ 126 127 man->last = man->last->child; 128 while (man->last != NULL) { 129 man_validate(man); 130 if (man->last == n) 131 man->last = man->last->child; 132 else 133 man->last = man->last->next; 134 } 135 136 /* Finally validate the macro itself. */ 137 138 man->last = n; 139 man->next = ROFF_NEXT_SIBLING; 140 switch (n->type) { 141 case ROFFT_TEXT: 142 check_text(man, n); 143 break; 144 case ROFFT_ROOT: 145 check_root(man, n); 146 break; 147 case ROFFT_COMMENT: 148 case ROFFT_EQN: 149 case ROFFT_TBL: 150 break; 151 default: 152 if (n->tok < ROFF_MAX) { 153 roff_validate(man); 154 break; 155 } 156 assert(n->tok >= MAN_TH && n->tok < MAN_MAX); 157 cp = man_valids + (n->tok - MAN_TH); 158 if (*cp) 159 (*cp)(man, n); 160 if (man->last == n) 161 n->flags |= NODE_VALID; 162 break; 163 } 164 } 165 166 static void 167 check_root(CHKARGS) 168 { 169 assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0); 170 171 if (n->last == NULL || n->last->type == ROFFT_COMMENT) 172 mandoc_msg(MANDOCERR_DOC_EMPTY, n->line, n->pos, NULL); 173 else 174 man->meta.hasbody = 1; 175 176 if (NULL == man->meta.title) { 177 mandoc_msg(MANDOCERR_TH_NOTITLE, n->line, n->pos, NULL); 178 179 /* 180 * If a title hasn't been set, do so now (by 181 * implication, date and section also aren't set). 182 */ 183 184 man->meta.title = mandoc_strdup(""); 185 man->meta.msec = mandoc_strdup(""); 186 man->meta.date = mandoc_normdate(NULL, NULL); 187 } 188 189 if (man->meta.os_e && 190 (man->meta.rcsids & (1 << man->meta.os_e)) == 0) 191 mandoc_msg(MANDOCERR_RCS_MISSING, 0, 0, 192 man->meta.os_e == MANDOC_OS_OPENBSD ? 193 "(OpenBSD)" : "(NetBSD)"); 194 } 195 196 static void 197 check_abort(CHKARGS) 198 { 199 abort(); 200 } 201 202 static void 203 check_text(CHKARGS) 204 { 205 char *cp, *p; 206 207 if (n->flags & NODE_NOFILL) 208 return; 209 210 cp = n->string; 211 for (p = cp; NULL != (p = strchr(p, '\t')); p++) 212 mandoc_msg(MANDOCERR_FI_TAB, 213 n->line, n->pos + (int)(p - cp), NULL); 214 } 215 216 static void 217 post_EE(CHKARGS) 218 { 219 if ((n->flags & NODE_NOFILL) == 0) 220 mandoc_msg(MANDOCERR_FI_SKIP, n->line, n->pos, "EE"); 221 } 222 223 static void 224 post_EX(CHKARGS) 225 { 226 if (n->flags & NODE_NOFILL) 227 mandoc_msg(MANDOCERR_NF_SKIP, n->line, n->pos, "EX"); 228 } 229 230 static void 231 post_OP(CHKARGS) 232 { 233 234 if (n->child == NULL) 235 mandoc_msg(MANDOCERR_OP_EMPTY, n->line, n->pos, "OP"); 236 else if (n->child->next != NULL && n->child->next->next != NULL) { 237 n = n->child->next->next; 238 mandoc_msg(MANDOCERR_ARG_EXCESS, 239 n->line, n->pos, "OP ... %s", n->string); 240 } 241 } 242 243 static void 244 post_SH(CHKARGS) 245 { 246 struct roff_node *nc; 247 248 if (n->type != ROFFT_BODY || (nc = n->child) == NULL) 249 return; 250 251 if (nc->tok == MAN_PP && nc->body->child != NULL) { 252 while (nc->body->last != NULL) { 253 man->next = ROFF_NEXT_CHILD; 254 roff_node_relink(man, nc->body->last); 255 man->last = n; 256 } 257 } 258 259 if (nc->tok == MAN_PP || nc->tok == ROFF_sp || nc->tok == ROFF_br) { 260 mandoc_msg(MANDOCERR_PAR_SKIP, nc->line, nc->pos, 261 "%s after %s", roff_name[nc->tok], roff_name[n->tok]); 262 roff_node_delete(man, nc); 263 } 264 265 /* 266 * Trailing PP is empty, so it is deleted by check_par(). 267 * Trailing sp is significant. 268 */ 269 270 if ((nc = n->last) != NULL && nc->tok == ROFF_br) { 271 mandoc_msg(MANDOCERR_PAR_SKIP, 272 nc->line, nc->pos, "%s at the end of %s", 273 roff_name[nc->tok], roff_name[n->tok]); 274 roff_node_delete(man, nc); 275 } 276 } 277 278 static void 279 post_UR(CHKARGS) 280 { 281 if (n->type == ROFFT_HEAD && n->child == NULL) 282 mandoc_msg(MANDOCERR_UR_NOHEAD, n->line, n->pos, 283 "%s", roff_name[n->tok]); 284 check_part(man, n); 285 } 286 287 static void 288 check_part(CHKARGS) 289 { 290 291 if (n->type == ROFFT_BODY && n->child == NULL) 292 mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos, 293 "%s", roff_name[n->tok]); 294 } 295 296 static void 297 check_par(CHKARGS) 298 { 299 300 switch (n->type) { 301 case ROFFT_BLOCK: 302 if (n->body->child == NULL) 303 roff_node_delete(man, n); 304 break; 305 case ROFFT_BODY: 306 if (n->child != NULL && 307 (n->child->tok == ROFF_sp || n->child->tok == ROFF_br)) { 308 mandoc_msg(MANDOCERR_PAR_SKIP, 309 n->child->line, n->child->pos, 310 "%s after %s", roff_name[n->child->tok], 311 roff_name[n->tok]); 312 roff_node_delete(man, n->child); 313 } 314 if (n->child == NULL) 315 mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, 316 "%s empty", roff_name[n->tok]); 317 break; 318 case ROFFT_HEAD: 319 if (n->child != NULL) 320 mandoc_msg(MANDOCERR_ARG_SKIP, 321 n->line, n->pos, "%s %s%s", 322 roff_name[n->tok], n->child->string, 323 n->child->next != NULL ? " ..." : ""); 324 break; 325 default: 326 break; 327 } 328 } 329 330 static void 331 post_IP(CHKARGS) 332 { 333 334 switch (n->type) { 335 case ROFFT_BLOCK: 336 if (n->head->child == NULL && n->body->child == NULL) 337 roff_node_delete(man, n); 338 break; 339 case ROFFT_BODY: 340 if (n->parent->head->child == NULL && n->child == NULL) 341 mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, 342 "%s empty", roff_name[n->tok]); 343 break; 344 default: 345 break; 346 } 347 } 348 349 static void 350 post_TH(CHKARGS) 351 { 352 struct roff_node *nb; 353 const char *p; 354 355 free(man->meta.title); 356 free(man->meta.vol); 357 free(man->meta.os); 358 free(man->meta.msec); 359 free(man->meta.date); 360 361 man->meta.title = man->meta.vol = man->meta.date = 362 man->meta.msec = man->meta.os = NULL; 363 364 nb = n; 365 366 /* ->TITLE<- MSEC DATE OS VOL */ 367 368 n = n->child; 369 if (n != NULL && n->string != NULL) { 370 for (p = n->string; *p != '\0'; p++) { 371 /* Only warn about this once... */ 372 if (isalpha((unsigned char)*p) && 373 ! isupper((unsigned char)*p)) { 374 mandoc_msg(MANDOCERR_TITLE_CASE, n->line, 375 n->pos + (int)(p - n->string), 376 "TH %s", n->string); 377 break; 378 } 379 } 380 man->meta.title = mandoc_strdup(n->string); 381 } else { 382 man->meta.title = mandoc_strdup(""); 383 mandoc_msg(MANDOCERR_TH_NOTITLE, nb->line, nb->pos, "TH"); 384 } 385 386 /* TITLE ->MSEC<- DATE OS VOL */ 387 388 if (n != NULL) 389 n = n->next; 390 if (n != NULL && n->string != NULL) 391 man->meta.msec = mandoc_strdup(n->string); 392 else { 393 man->meta.msec = mandoc_strdup(""); 394 mandoc_msg(MANDOCERR_MSEC_MISSING, 395 nb->line, nb->pos, "TH %s", man->meta.title); 396 } 397 398 /* TITLE MSEC ->DATE<- OS VOL */ 399 400 if (n != NULL) 401 n = n->next; 402 if (man->quick && n != NULL) 403 man->meta.date = mandoc_strdup(""); 404 else 405 man->meta.date = mandoc_normdate(n, nb); 406 407 /* TITLE MSEC DATE ->OS<- VOL */ 408 409 if (n && (n = n->next)) 410 man->meta.os = mandoc_strdup(n->string); 411 else if (man->os_s != NULL) 412 man->meta.os = mandoc_strdup(man->os_s); 413 if (man->meta.os_e == MANDOC_OS_OTHER && man->meta.os != NULL) { 414 if (strstr(man->meta.os, "OpenBSD") != NULL) 415 man->meta.os_e = MANDOC_OS_OPENBSD; 416 else if (strstr(man->meta.os, "NetBSD") != NULL) 417 man->meta.os_e = MANDOC_OS_NETBSD; 418 } 419 420 /* TITLE MSEC DATE OS ->VOL<- */ 421 /* If missing, use the default VOL name for MSEC. */ 422 423 if (n && (n = n->next)) 424 man->meta.vol = mandoc_strdup(n->string); 425 else if ('\0' != man->meta.msec[0] && 426 (NULL != (p = mandoc_a2msec(man->meta.msec)))) 427 man->meta.vol = mandoc_strdup(p); 428 429 if (n != NULL && (n = n->next) != NULL) 430 mandoc_msg(MANDOCERR_ARG_EXCESS, 431 n->line, n->pos, "TH ... %s", n->string); 432 433 /* 434 * Remove the `TH' node after we've processed it for our 435 * meta-data. 436 */ 437 roff_node_delete(man, man->last); 438 } 439 440 static void 441 post_UC(CHKARGS) 442 { 443 static const char * const bsd_versions[] = { 444 "3rd Berkeley Distribution", 445 "4th Berkeley Distribution", 446 "4.2 Berkeley Distribution", 447 "4.3 Berkeley Distribution", 448 "4.4 Berkeley Distribution", 449 }; 450 451 const char *p, *s; 452 453 n = n->child; 454 455 if (n == NULL || n->type != ROFFT_TEXT) 456 p = bsd_versions[0]; 457 else { 458 s = n->string; 459 if (0 == strcmp(s, "3")) 460 p = bsd_versions[0]; 461 else if (0 == strcmp(s, "4")) 462 p = bsd_versions[1]; 463 else if (0 == strcmp(s, "5")) 464 p = bsd_versions[2]; 465 else if (0 == strcmp(s, "6")) 466 p = bsd_versions[3]; 467 else if (0 == strcmp(s, "7")) 468 p = bsd_versions[4]; 469 else 470 p = bsd_versions[0]; 471 } 472 473 free(man->meta.os); 474 man->meta.os = mandoc_strdup(p); 475 } 476 477 static void 478 post_AT(CHKARGS) 479 { 480 static const char * const unix_versions[] = { 481 "7th Edition", 482 "System III", 483 "System V", 484 "System V Release 2", 485 }; 486 487 struct roff_node *nn; 488 const char *p, *s; 489 490 n = n->child; 491 492 if (n == NULL || n->type != ROFFT_TEXT) 493 p = unix_versions[0]; 494 else { 495 s = n->string; 496 if (0 == strcmp(s, "3")) 497 p = unix_versions[0]; 498 else if (0 == strcmp(s, "4")) 499 p = unix_versions[1]; 500 else if (0 == strcmp(s, "5")) { 501 nn = n->next; 502 if (nn != NULL && 503 nn->type == ROFFT_TEXT && 504 nn->string[0] != '\0') 505 p = unix_versions[3]; 506 else 507 p = unix_versions[2]; 508 } else 509 p = unix_versions[0]; 510 } 511 512 free(man->meta.os); 513 man->meta.os = mandoc_strdup(p); 514 } 515 516 static void 517 post_in(CHKARGS) 518 { 519 char *s; 520 521 if (n->parent->tok != MAN_TP || 522 n->parent->type != ROFFT_HEAD || 523 n->child == NULL || 524 *n->child->string == '+' || 525 *n->child->string == '-') 526 return; 527 mandoc_asprintf(&s, "+%s", n->child->string); 528 free(n->child->string); 529 n->child->string = s; 530 } 531