1 /* $OpenBSD: man_validate.c,v 1.128 2023/04/28 20:14:19 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010, 2012-2020 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Validation module for man(7) syntax trees used by mandoc(1). 19 */ 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <errno.h> 25 #include <limits.h> 26 #include <stdarg.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <time.h> 31 32 #include "mandoc_aux.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "man.h" 36 #include "libmandoc.h" 37 #include "roff_int.h" 38 #include "libman.h" 39 #include "tag.h" 40 41 #define CHKARGS struct roff_man *man, struct roff_node *n 42 43 typedef void (*v_check)(CHKARGS); 44 45 static void check_par(CHKARGS); 46 static void check_part(CHKARGS); 47 static void check_root(CHKARGS); 48 static void check_tag(struct roff_node *, struct roff_node *); 49 static void check_text(CHKARGS); 50 51 static void post_AT(CHKARGS); 52 static void post_EE(CHKARGS); 53 static void post_EX(CHKARGS); 54 static void post_IP(CHKARGS); 55 static void post_OP(CHKARGS); 56 static void post_SH(CHKARGS); 57 static void post_TH(CHKARGS); 58 static void post_TP(CHKARGS); 59 static void post_UC(CHKARGS); 60 static void post_UR(CHKARGS); 61 static void post_in(CHKARGS); 62 63 static const v_check man_valids[MAN_MAX - MAN_TH] = { 64 post_TH, /* TH */ 65 post_SH, /* SH */ 66 post_SH, /* SS */ 67 post_TP, /* TP */ 68 post_TP, /* TQ */ 69 check_par, /* LP */ 70 check_par, /* PP */ 71 check_par, /* P */ 72 post_IP, /* IP */ 73 NULL, /* HP */ 74 NULL, /* SM */ 75 NULL, /* SB */ 76 NULL, /* BI */ 77 NULL, /* IB */ 78 NULL, /* BR */ 79 NULL, /* RB */ 80 NULL, /* R */ 81 NULL, /* B */ 82 NULL, /* I */ 83 NULL, /* IR */ 84 NULL, /* RI */ 85 NULL, /* RE */ 86 check_part, /* RS */ 87 NULL, /* DT */ 88 post_UC, /* UC */ 89 NULL, /* PD */ 90 post_AT, /* AT */ 91 post_in, /* in */ 92 NULL, /* SY */ 93 NULL, /* YS */ 94 post_OP, /* OP */ 95 post_EX, /* EX */ 96 post_EE, /* EE */ 97 post_UR, /* UR */ 98 NULL, /* UE */ 99 post_UR, /* MT */ 100 NULL, /* ME */ 101 }; 102 103 104 /* Validate the subtree rooted at man->last. */ 105 void 106 man_validate(struct roff_man *man) 107 { 108 struct roff_node *n; 109 const v_check *cp; 110 111 /* 112 * Iterate over all children, recursing into each one 113 * in turn, depth-first. 114 */ 115 116 n = man->last; 117 man->last = man->last->child; 118 while (man->last != NULL) { 119 man_validate(man); 120 if (man->last == n) 121 man->last = man->last->child; 122 else 123 man->last = man->last->next; 124 } 125 126 /* Finally validate the macro itself. */ 127 128 man->last = n; 129 man->next = ROFF_NEXT_SIBLING; 130 switch (n->type) { 131 case ROFFT_TEXT: 132 check_text(man, n); 133 break; 134 case ROFFT_ROOT: 135 check_root(man, n); 136 break; 137 case ROFFT_COMMENT: 138 case ROFFT_EQN: 139 case ROFFT_TBL: 140 break; 141 default: 142 if (n->tok < ROFF_MAX) { 143 roff_validate(man); 144 break; 145 } 146 assert(n->tok >= MAN_TH && n->tok < MAN_MAX); 147 cp = man_valids + (n->tok - MAN_TH); 148 if (*cp) 149 (*cp)(man, n); 150 if (man->last == n) 151 n->flags |= NODE_VALID; 152 break; 153 } 154 } 155 156 static void 157 check_root(CHKARGS) 158 { 159 assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0); 160 161 if (n->last == NULL || n->last->type == ROFFT_COMMENT) 162 mandoc_msg(MANDOCERR_DOC_EMPTY, n->line, n->pos, NULL); 163 else 164 man->meta.hasbody = 1; 165 166 if (NULL == man->meta.title) { 167 mandoc_msg(MANDOCERR_TH_NOTITLE, n->line, n->pos, NULL); 168 169 /* 170 * If a title hasn't been set, do so now (by 171 * implication, date and section also aren't set). 172 */ 173 174 man->meta.title = mandoc_strdup(""); 175 man->meta.msec = mandoc_strdup(""); 176 man->meta.date = mandoc_normdate(NULL, NULL); 177 } 178 179 if (man->meta.os_e && 180 (man->meta.rcsids & (1 << man->meta.os_e)) == 0) 181 mandoc_msg(MANDOCERR_RCS_MISSING, 0, 0, 182 man->meta.os_e == MANDOC_OS_OPENBSD ? 183 "(OpenBSD)" : "(NetBSD)"); 184 } 185 186 /* 187 * Skip leading whitespace, dashes, backslashes, and font escapes, 188 * then create a tag if the first following byte is a letter. 189 * Priority is high unless whitespace is present. 190 */ 191 static void 192 check_tag(struct roff_node *n, struct roff_node *nt) 193 { 194 const char *cp, *arg; 195 int prio, sz; 196 197 if (nt == NULL || nt->type != ROFFT_TEXT) 198 return; 199 200 cp = nt->string; 201 prio = TAG_STRONG; 202 for (;;) { 203 switch (*cp) { 204 case ' ': 205 case '\t': 206 prio = TAG_WEAK; 207 /* FALLTHROUGH */ 208 case '-': 209 cp++; 210 break; 211 case '\\': 212 cp++; 213 switch (mandoc_escape(&cp, &arg, &sz)) { 214 case ESCAPE_FONT: 215 case ESCAPE_FONTBOLD: 216 case ESCAPE_FONTITALIC: 217 case ESCAPE_FONTBI: 218 case ESCAPE_FONTROMAN: 219 case ESCAPE_FONTCR: 220 case ESCAPE_FONTCB: 221 case ESCAPE_FONTCI: 222 case ESCAPE_FONTPREV: 223 case ESCAPE_IGNORE: 224 break; 225 case ESCAPE_SPECIAL: 226 if (sz != 1) 227 return; 228 switch (*arg) { 229 case '-': 230 case 'e': 231 break; 232 default: 233 return; 234 } 235 break; 236 default: 237 return; 238 } 239 break; 240 default: 241 if (isalpha((unsigned char)*cp)) 242 tag_put(cp, prio, n); 243 return; 244 } 245 } 246 } 247 248 static void 249 check_text(CHKARGS) 250 { 251 char *cp, *p; 252 253 if (n->flags & NODE_NOFILL) 254 return; 255 256 cp = n->string; 257 for (p = cp; NULL != (p = strchr(p, '\t')); p++) 258 mandoc_msg(MANDOCERR_FI_TAB, 259 n->line, n->pos + (int)(p - cp), NULL); 260 } 261 262 static void 263 post_EE(CHKARGS) 264 { 265 if ((n->flags & NODE_NOFILL) == 0) 266 mandoc_msg(MANDOCERR_FI_SKIP, n->line, n->pos, "EE"); 267 } 268 269 static void 270 post_EX(CHKARGS) 271 { 272 if (n->flags & NODE_NOFILL) 273 mandoc_msg(MANDOCERR_NF_SKIP, n->line, n->pos, "EX"); 274 } 275 276 static void 277 post_OP(CHKARGS) 278 { 279 280 if (n->child == NULL) 281 mandoc_msg(MANDOCERR_OP_EMPTY, n->line, n->pos, "OP"); 282 else if (n->child->next != NULL && n->child->next->next != NULL) { 283 n = n->child->next->next; 284 mandoc_msg(MANDOCERR_ARG_EXCESS, 285 n->line, n->pos, "OP ... %s", n->string); 286 } 287 } 288 289 static void 290 post_SH(CHKARGS) 291 { 292 struct roff_node *nc; 293 char *cp, *tag; 294 295 nc = n->child; 296 switch (n->type) { 297 case ROFFT_HEAD: 298 tag = NULL; 299 deroff(&tag, n); 300 if (tag != NULL) { 301 for (cp = tag; *cp != '\0'; cp++) 302 if (*cp == ' ') 303 *cp = '_'; 304 if (nc != NULL && nc->type == ROFFT_TEXT && 305 strcmp(nc->string, tag) == 0) 306 tag_put(NULL, TAG_STRONG, n); 307 else 308 tag_put(tag, TAG_FALLBACK, n); 309 free(tag); 310 } 311 return; 312 case ROFFT_BODY: 313 if (nc != NULL) 314 break; 315 return; 316 default: 317 return; 318 } 319 320 if ((nc->tok == MAN_LP || nc->tok == MAN_PP || nc->tok == MAN_P) && 321 nc->body->child != NULL) { 322 while (nc->body->last != NULL) { 323 man->next = ROFF_NEXT_CHILD; 324 roff_node_relink(man, nc->body->last); 325 man->last = n; 326 } 327 } 328 329 if (nc->tok == MAN_LP || nc->tok == MAN_PP || nc->tok == MAN_P || 330 nc->tok == ROFF_sp || nc->tok == ROFF_br) { 331 mandoc_msg(MANDOCERR_PAR_SKIP, nc->line, nc->pos, 332 "%s after %s", roff_name[nc->tok], roff_name[n->tok]); 333 roff_node_delete(man, nc); 334 } 335 336 /* 337 * Trailing PP is empty, so it is deleted by check_par(). 338 * Trailing sp is significant. 339 */ 340 341 if ((nc = n->last) != NULL && nc->tok == ROFF_br) { 342 mandoc_msg(MANDOCERR_PAR_SKIP, 343 nc->line, nc->pos, "%s at the end of %s", 344 roff_name[nc->tok], roff_name[n->tok]); 345 roff_node_delete(man, nc); 346 } 347 } 348 349 static void 350 post_UR(CHKARGS) 351 { 352 if (n->type == ROFFT_HEAD && n->child == NULL) 353 mandoc_msg(MANDOCERR_UR_NOHEAD, n->line, n->pos, 354 "%s", roff_name[n->tok]); 355 } 356 357 static void 358 check_part(CHKARGS) 359 { 360 if (n->type == ROFFT_BODY && n->child == NULL) 361 mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos, 362 "%s", roff_name[n->tok]); 363 } 364 365 static void 366 check_par(CHKARGS) 367 { 368 369 switch (n->type) { 370 case ROFFT_BLOCK: 371 if (n->body->child == NULL) 372 roff_node_delete(man, n); 373 break; 374 case ROFFT_BODY: 375 if (n->child != NULL && 376 (n->child->tok == ROFF_sp || n->child->tok == ROFF_br)) { 377 mandoc_msg(MANDOCERR_PAR_SKIP, 378 n->child->line, n->child->pos, 379 "%s after %s", roff_name[n->child->tok], 380 roff_name[n->tok]); 381 roff_node_delete(man, n->child); 382 } 383 if (n->child == NULL) 384 mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, 385 "%s empty", roff_name[n->tok]); 386 break; 387 case ROFFT_HEAD: 388 if (n->child != NULL) 389 mandoc_msg(MANDOCERR_ARG_SKIP, 390 n->line, n->pos, "%s %s%s", 391 roff_name[n->tok], n->child->string, 392 n->child->next != NULL ? " ..." : ""); 393 break; 394 default: 395 break; 396 } 397 } 398 399 static void 400 post_IP(CHKARGS) 401 { 402 switch (n->type) { 403 case ROFFT_BLOCK: 404 if (n->head->child == NULL && n->body->child == NULL) 405 roff_node_delete(man, n); 406 break; 407 case ROFFT_HEAD: 408 check_tag(n, n->child); 409 break; 410 case ROFFT_BODY: 411 if (n->parent->head->child == NULL && n->child == NULL) 412 mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, 413 "%s empty", roff_name[n->tok]); 414 break; 415 default: 416 break; 417 } 418 } 419 420 /* 421 * The first next-line element in the head is the tag. 422 * If that's a font macro, use its first child instead. 423 */ 424 static void 425 post_TP(CHKARGS) 426 { 427 struct roff_node *nt; 428 429 if (n->type != ROFFT_HEAD || (nt = n->child) == NULL) 430 return; 431 432 while ((nt->flags & NODE_LINE) == 0) 433 if ((nt = nt->next) == NULL) 434 return; 435 436 switch (nt->tok) { 437 case MAN_B: 438 case MAN_BI: 439 case MAN_BR: 440 case MAN_I: 441 case MAN_IB: 442 case MAN_IR: 443 nt = nt->child; 444 break; 445 default: 446 break; 447 } 448 check_tag(n, nt); 449 } 450 451 static void 452 post_TH(CHKARGS) 453 { 454 struct roff_node *nb; 455 const char *p; 456 457 free(man->meta.title); 458 free(man->meta.vol); 459 free(man->meta.os); 460 free(man->meta.msec); 461 free(man->meta.date); 462 463 man->meta.title = man->meta.vol = man->meta.date = 464 man->meta.msec = man->meta.os = NULL; 465 466 nb = n; 467 468 /* ->TITLE<- MSEC DATE OS VOL */ 469 470 n = n->child; 471 if (n != NULL && n->string != NULL) { 472 for (p = n->string; *p != '\0'; p++) { 473 /* Only warn about this once... */ 474 if (isalpha((unsigned char)*p) && 475 ! isupper((unsigned char)*p)) { 476 mandoc_msg(MANDOCERR_TITLE_CASE, n->line, 477 n->pos + (int)(p - n->string), 478 "TH %s", n->string); 479 break; 480 } 481 } 482 man->meta.title = mandoc_strdup(n->string); 483 } else { 484 man->meta.title = mandoc_strdup(""); 485 mandoc_msg(MANDOCERR_TH_NOTITLE, nb->line, nb->pos, "TH"); 486 } 487 488 /* TITLE ->MSEC<- DATE OS VOL */ 489 490 if (n != NULL) 491 n = n->next; 492 if (n != NULL && n->string != NULL) { 493 man->meta.msec = mandoc_strdup(n->string); 494 if (man->filesec != '\0' && 495 man->filesec != *n->string && 496 *n->string >= '1' && *n->string <= '9') 497 mandoc_msg(MANDOCERR_MSEC_FILE, n->line, n->pos, 498 "*.%c vs TH ... %c", man->filesec, *n->string); 499 } else { 500 man->meta.msec = mandoc_strdup(""); 501 mandoc_msg(MANDOCERR_MSEC_MISSING, 502 nb->line, nb->pos, "TH %s", man->meta.title); 503 } 504 505 /* TITLE MSEC ->DATE<- OS VOL */ 506 507 if (n != NULL) 508 n = n->next; 509 if (man->quick && n != NULL) 510 man->meta.date = mandoc_strdup(""); 511 else 512 man->meta.date = mandoc_normdate(n, nb); 513 514 /* TITLE MSEC DATE ->OS<- VOL */ 515 516 if (n && (n = n->next)) 517 man->meta.os = mandoc_strdup(n->string); 518 else if (man->os_s != NULL) 519 man->meta.os = mandoc_strdup(man->os_s); 520 if (man->meta.os_e == MANDOC_OS_OTHER && man->meta.os != NULL) { 521 if (strstr(man->meta.os, "OpenBSD") != NULL) 522 man->meta.os_e = MANDOC_OS_OPENBSD; 523 else if (strstr(man->meta.os, "NetBSD") != NULL) 524 man->meta.os_e = MANDOC_OS_NETBSD; 525 } 526 527 /* TITLE MSEC DATE OS ->VOL<- */ 528 /* If missing, use the default VOL name for MSEC. */ 529 530 if (n && (n = n->next)) 531 man->meta.vol = mandoc_strdup(n->string); 532 else if ('\0' != man->meta.msec[0] && 533 (NULL != (p = mandoc_a2msec(man->meta.msec)))) 534 man->meta.vol = mandoc_strdup(p); 535 536 if (n != NULL && (n = n->next) != NULL) 537 mandoc_msg(MANDOCERR_ARG_EXCESS, 538 n->line, n->pos, "TH ... %s", n->string); 539 540 /* 541 * Remove the `TH' node after we've processed it for our 542 * meta-data. 543 */ 544 roff_node_delete(man, man->last); 545 } 546 547 static void 548 post_UC(CHKARGS) 549 { 550 static const char * const bsd_versions[] = { 551 "3rd Berkeley Distribution", 552 "4th Berkeley Distribution", 553 "4.2 Berkeley Distribution", 554 "4.3 Berkeley Distribution", 555 "4.4 Berkeley Distribution", 556 }; 557 558 const char *p, *s; 559 560 n = n->child; 561 562 if (n == NULL || n->type != ROFFT_TEXT) 563 p = bsd_versions[0]; 564 else { 565 s = n->string; 566 if (0 == strcmp(s, "3")) 567 p = bsd_versions[0]; 568 else if (0 == strcmp(s, "4")) 569 p = bsd_versions[1]; 570 else if (0 == strcmp(s, "5")) 571 p = bsd_versions[2]; 572 else if (0 == strcmp(s, "6")) 573 p = bsd_versions[3]; 574 else if (0 == strcmp(s, "7")) 575 p = bsd_versions[4]; 576 else 577 p = bsd_versions[0]; 578 } 579 580 free(man->meta.os); 581 man->meta.os = mandoc_strdup(p); 582 } 583 584 static void 585 post_AT(CHKARGS) 586 { 587 static const char * const unix_versions[] = { 588 "7th Edition", 589 "System III", 590 "System V", 591 "System V Release 2", 592 }; 593 594 struct roff_node *nn; 595 const char *p, *s; 596 597 n = n->child; 598 599 if (n == NULL || n->type != ROFFT_TEXT) 600 p = unix_versions[0]; 601 else { 602 s = n->string; 603 if (0 == strcmp(s, "3")) 604 p = unix_versions[0]; 605 else if (0 == strcmp(s, "4")) 606 p = unix_versions[1]; 607 else if (0 == strcmp(s, "5")) { 608 nn = n->next; 609 if (nn != NULL && 610 nn->type == ROFFT_TEXT && 611 nn->string[0] != '\0') 612 p = unix_versions[3]; 613 else 614 p = unix_versions[2]; 615 } else 616 p = unix_versions[0]; 617 } 618 619 free(man->meta.os); 620 man->meta.os = mandoc_strdup(p); 621 } 622 623 static void 624 post_in(CHKARGS) 625 { 626 char *s; 627 628 if (n->parent->tok != MAN_TP || 629 n->parent->type != ROFFT_HEAD || 630 n->child == NULL || 631 *n->child->string == '+' || 632 *n->child->string == '-') 633 return; 634 mandoc_asprintf(&s, "+%s", n->child->string); 635 free(n->child->string); 636 n->child->string = s; 637 } 638