1 /* $OpenBSD: man_validate.c,v 1.103 2017/06/25 07:23:53 bentley Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <errno.h> 23 #include <limits.h> 24 #include <stdarg.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <time.h> 28 29 #include "mandoc_aux.h" 30 #include "mandoc.h" 31 #include "roff.h" 32 #include "man.h" 33 #include "libmandoc.h" 34 #include "roff_int.h" 35 #include "libman.h" 36 37 #define CHKARGS struct roff_man *man, struct roff_node *n 38 39 typedef void (*v_check)(CHKARGS); 40 41 static void check_par(CHKARGS); 42 static void check_part(CHKARGS); 43 static void check_root(CHKARGS); 44 static void check_text(CHKARGS); 45 46 static void post_AT(CHKARGS); 47 static void post_IP(CHKARGS); 48 static void post_OP(CHKARGS); 49 static void post_TH(CHKARGS); 50 static void post_UC(CHKARGS); 51 static void post_UR(CHKARGS); 52 static void post_in(CHKARGS); 53 static void post_vs(CHKARGS); 54 55 static const v_check __man_valids[MAN_MAX - MAN_TH] = { 56 post_TH, /* TH */ 57 NULL, /* SH */ 58 NULL, /* SS */ 59 NULL, /* TP */ 60 check_par, /* LP */ 61 check_par, /* PP */ 62 check_par, /* P */ 63 post_IP, /* IP */ 64 NULL, /* HP */ 65 NULL, /* SM */ 66 NULL, /* SB */ 67 NULL, /* BI */ 68 NULL, /* IB */ 69 NULL, /* BR */ 70 NULL, /* RB */ 71 NULL, /* R */ 72 NULL, /* B */ 73 NULL, /* I */ 74 NULL, /* IR */ 75 NULL, /* RI */ 76 NULL, /* nf */ 77 NULL, /* fi */ 78 NULL, /* RE */ 79 check_part, /* RS */ 80 NULL, /* DT */ 81 post_UC, /* UC */ 82 NULL, /* PD */ 83 post_AT, /* AT */ 84 post_in, /* in */ 85 post_OP, /* OP */ 86 NULL, /* EX */ 87 NULL, /* EE */ 88 post_UR, /* UR */ 89 NULL, /* UE */ 90 post_UR, /* MT */ 91 NULL, /* ME */ 92 }; 93 static const v_check *man_valids = __man_valids - MAN_TH; 94 95 96 void 97 man_node_validate(struct roff_man *man) 98 { 99 struct roff_node *n; 100 const v_check *cp; 101 102 n = man->last; 103 man->last = man->last->child; 104 while (man->last != NULL) { 105 man_node_validate(man); 106 if (man->last == n) 107 man->last = man->last->child; 108 else 109 man->last = man->last->next; 110 } 111 112 man->last = n; 113 man->next = ROFF_NEXT_SIBLING; 114 switch (n->type) { 115 case ROFFT_TEXT: 116 check_text(man, n); 117 break; 118 case ROFFT_ROOT: 119 check_root(man, n); 120 break; 121 case ROFFT_EQN: 122 case ROFFT_TBL: 123 break; 124 default: 125 if (n->tok < ROFF_MAX) { 126 switch (n->tok) { 127 case ROFF_br: 128 case ROFF_sp: 129 post_vs(man, n); 130 break; 131 default: 132 roff_validate(man); 133 break; 134 } 135 break; 136 } 137 assert(n->tok >= MAN_TH && n->tok < MAN_MAX); 138 cp = man_valids + n->tok; 139 if (*cp) 140 (*cp)(man, n); 141 if (man->last == n) 142 man_state(man, n); 143 break; 144 } 145 } 146 147 static void 148 check_root(CHKARGS) 149 { 150 151 assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0); 152 153 if (NULL == man->first->child) 154 mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse, 155 n->line, n->pos, NULL); 156 else 157 man->meta.hasbody = 1; 158 159 if (NULL == man->meta.title) { 160 mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse, 161 n->line, n->pos, NULL); 162 163 /* 164 * If a title hasn't been set, do so now (by 165 * implication, date and section also aren't set). 166 */ 167 168 man->meta.title = mandoc_strdup(""); 169 man->meta.msec = mandoc_strdup(""); 170 man->meta.date = man->quick ? mandoc_strdup("") : 171 mandoc_normdate(man, NULL, n->line, n->pos); 172 } 173 174 if (man->meta.os_e && 175 (man->meta.rcsids & (1 << man->meta.os_e)) == 0) 176 mandoc_msg(MANDOCERR_RCS_MISSING, man->parse, 0, 0, 177 man->meta.os_e == MANDOC_OS_OPENBSD ? 178 "(OpenBSD)" : "(NetBSD)"); 179 } 180 181 static void 182 check_text(CHKARGS) 183 { 184 char *cp, *p; 185 186 if (MAN_LITERAL & man->flags) 187 return; 188 189 cp = n->string; 190 for (p = cp; NULL != (p = strchr(p, '\t')); p++) 191 mandoc_msg(MANDOCERR_FI_TAB, man->parse, 192 n->line, n->pos + (p - cp), NULL); 193 } 194 195 static void 196 post_OP(CHKARGS) 197 { 198 199 if (n->child == NULL) 200 mandoc_msg(MANDOCERR_OP_EMPTY, man->parse, 201 n->line, n->pos, "OP"); 202 else if (n->child->next != NULL && n->child->next->next != NULL) { 203 n = n->child->next->next; 204 mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse, 205 n->line, n->pos, "OP ... %s", n->string); 206 } 207 } 208 209 static void 210 post_UR(CHKARGS) 211 { 212 213 if (n->type == ROFFT_HEAD && n->child == NULL) 214 mandoc_vmsg(MANDOCERR_UR_NOHEAD, man->parse, 215 n->line, n->pos, roff_name[n->tok]); 216 check_part(man, n); 217 } 218 219 static void 220 check_part(CHKARGS) 221 { 222 223 if (n->type == ROFFT_BODY && n->child == NULL) 224 mandoc_msg(MANDOCERR_BLK_EMPTY, man->parse, 225 n->line, n->pos, roff_name[n->tok]); 226 } 227 228 static void 229 check_par(CHKARGS) 230 { 231 232 switch (n->type) { 233 case ROFFT_BLOCK: 234 if (n->body->child == NULL) 235 roff_node_delete(man, n); 236 break; 237 case ROFFT_BODY: 238 if (n->child == NULL) 239 mandoc_vmsg(MANDOCERR_PAR_SKIP, 240 man->parse, n->line, n->pos, 241 "%s empty", roff_name[n->tok]); 242 break; 243 case ROFFT_HEAD: 244 if (n->child != NULL) 245 mandoc_vmsg(MANDOCERR_ARG_SKIP, 246 man->parse, n->line, n->pos, "%s %s%s", 247 roff_name[n->tok], n->child->string, 248 n->child->next != NULL ? " ..." : ""); 249 break; 250 default: 251 break; 252 } 253 } 254 255 static void 256 post_IP(CHKARGS) 257 { 258 259 switch (n->type) { 260 case ROFFT_BLOCK: 261 if (n->head->child == NULL && n->body->child == NULL) 262 roff_node_delete(man, n); 263 break; 264 case ROFFT_BODY: 265 if (n->parent->head->child == NULL && n->child == NULL) 266 mandoc_vmsg(MANDOCERR_PAR_SKIP, 267 man->parse, n->line, n->pos, 268 "%s empty", roff_name[n->tok]); 269 break; 270 default: 271 break; 272 } 273 } 274 275 static void 276 post_TH(CHKARGS) 277 { 278 struct roff_node *nb; 279 const char *p; 280 281 free(man->meta.title); 282 free(man->meta.vol); 283 free(man->meta.os); 284 free(man->meta.msec); 285 free(man->meta.date); 286 287 man->meta.title = man->meta.vol = man->meta.date = 288 man->meta.msec = man->meta.os = NULL; 289 290 nb = n; 291 292 /* ->TITLE<- MSEC DATE OS VOL */ 293 294 n = n->child; 295 if (n && n->string) { 296 for (p = n->string; '\0' != *p; p++) { 297 /* Only warn about this once... */ 298 if (isalpha((unsigned char)*p) && 299 ! isupper((unsigned char)*p)) { 300 mandoc_vmsg(MANDOCERR_TITLE_CASE, 301 man->parse, n->line, 302 n->pos + (p - n->string), 303 "TH %s", n->string); 304 break; 305 } 306 } 307 man->meta.title = mandoc_strdup(n->string); 308 } else { 309 man->meta.title = mandoc_strdup(""); 310 mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse, 311 nb->line, nb->pos, "TH"); 312 } 313 314 /* TITLE ->MSEC<- DATE OS VOL */ 315 316 if (n) 317 n = n->next; 318 if (n && n->string) 319 man->meta.msec = mandoc_strdup(n->string); 320 else { 321 man->meta.msec = mandoc_strdup(""); 322 mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse, 323 nb->line, nb->pos, "TH %s", man->meta.title); 324 } 325 326 /* TITLE MSEC ->DATE<- OS VOL */ 327 328 if (n) 329 n = n->next; 330 if (n && n->string && '\0' != n->string[0]) { 331 man->meta.date = man->quick ? 332 mandoc_strdup(n->string) : 333 mandoc_normdate(man, n->string, n->line, n->pos); 334 } else { 335 man->meta.date = mandoc_strdup(""); 336 mandoc_msg(MANDOCERR_DATE_MISSING, man->parse, 337 n ? n->line : nb->line, 338 n ? n->pos : nb->pos, "TH"); 339 } 340 341 /* TITLE MSEC DATE ->OS<- VOL */ 342 343 if (n && (n = n->next)) 344 man->meta.os = mandoc_strdup(n->string); 345 else if (man->os_s != NULL) 346 man->meta.os = mandoc_strdup(man->os_s); 347 if (man->meta.os_e == MANDOC_OS_OTHER && man->meta.os != NULL) { 348 if (strstr(man->meta.os, "OpenBSD") != NULL) 349 man->meta.os_e = MANDOC_OS_OPENBSD; 350 else if (strstr(man->meta.os, "NetBSD") != NULL) 351 man->meta.os_e = MANDOC_OS_NETBSD; 352 } 353 354 /* TITLE MSEC DATE OS ->VOL<- */ 355 /* If missing, use the default VOL name for MSEC. */ 356 357 if (n && (n = n->next)) 358 man->meta.vol = mandoc_strdup(n->string); 359 else if ('\0' != man->meta.msec[0] && 360 (NULL != (p = mandoc_a2msec(man->meta.msec)))) 361 man->meta.vol = mandoc_strdup(p); 362 363 if (n != NULL && (n = n->next) != NULL) 364 mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse, 365 n->line, n->pos, "TH ... %s", n->string); 366 367 /* 368 * Remove the `TH' node after we've processed it for our 369 * meta-data. 370 */ 371 roff_node_delete(man, man->last); 372 } 373 374 static void 375 post_UC(CHKARGS) 376 { 377 static const char * const bsd_versions[] = { 378 "3rd Berkeley Distribution", 379 "4th Berkeley Distribution", 380 "4.2 Berkeley Distribution", 381 "4.3 Berkeley Distribution", 382 "4.4 Berkeley Distribution", 383 }; 384 385 const char *p, *s; 386 387 n = n->child; 388 389 if (n == NULL || n->type != ROFFT_TEXT) 390 p = bsd_versions[0]; 391 else { 392 s = n->string; 393 if (0 == strcmp(s, "3")) 394 p = bsd_versions[0]; 395 else if (0 == strcmp(s, "4")) 396 p = bsd_versions[1]; 397 else if (0 == strcmp(s, "5")) 398 p = bsd_versions[2]; 399 else if (0 == strcmp(s, "6")) 400 p = bsd_versions[3]; 401 else if (0 == strcmp(s, "7")) 402 p = bsd_versions[4]; 403 else 404 p = bsd_versions[0]; 405 } 406 407 free(man->meta.os); 408 man->meta.os = mandoc_strdup(p); 409 } 410 411 static void 412 post_AT(CHKARGS) 413 { 414 static const char * const unix_versions[] = { 415 "7th Edition", 416 "System III", 417 "System V", 418 "System V Release 2", 419 }; 420 421 struct roff_node *nn; 422 const char *p, *s; 423 424 n = n->child; 425 426 if (n == NULL || n->type != ROFFT_TEXT) 427 p = unix_versions[0]; 428 else { 429 s = n->string; 430 if (0 == strcmp(s, "3")) 431 p = unix_versions[0]; 432 else if (0 == strcmp(s, "4")) 433 p = unix_versions[1]; 434 else if (0 == strcmp(s, "5")) { 435 nn = n->next; 436 if (nn != NULL && 437 nn->type == ROFFT_TEXT && 438 nn->string[0] != '\0') 439 p = unix_versions[3]; 440 else 441 p = unix_versions[2]; 442 } else 443 p = unix_versions[0]; 444 } 445 446 free(man->meta.os); 447 man->meta.os = mandoc_strdup(p); 448 } 449 450 static void 451 post_in(CHKARGS) 452 { 453 char *s; 454 455 if (n->parent->tok != MAN_TP || 456 n->parent->type != ROFFT_HEAD || 457 n->child == NULL || 458 *n->child->string == '+' || 459 *n->child->string == '-') 460 return; 461 mandoc_asprintf(&s, "+%s", n->child->string); 462 free(n->child->string); 463 n->child->string = s; 464 } 465 466 static void 467 post_vs(CHKARGS) 468 { 469 470 if (NULL != n->prev) 471 return; 472 473 switch (n->parent->tok) { 474 case MAN_SH: 475 case MAN_SS: 476 case MAN_PP: 477 case MAN_LP: 478 case MAN_P: 479 mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos, 480 "%s after %s", roff_name[n->tok], 481 roff_name[n->parent->tok]); 482 /* FALLTHROUGH */ 483 case TOKEN_NONE: 484 /* 485 * Don't warn about this because it occurs in pod2man 486 * and would cause considerable (unfixable) warnage. 487 */ 488 roff_node_delete(man, n); 489 break; 490 default: 491 break; 492 } 493 } 494