1 /* $OpenBSD: man_validate.c,v 1.101 2017/06/17 22:40:27 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <errno.h> 23 #include <limits.h> 24 #include <stdarg.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <time.h> 28 29 #include "mandoc_aux.h" 30 #include "mandoc.h" 31 #include "roff.h" 32 #include "man.h" 33 #include "libmandoc.h" 34 #include "roff_int.h" 35 #include "libman.h" 36 37 #define CHKARGS struct roff_man *man, struct roff_node *n 38 39 typedef void (*v_check)(CHKARGS); 40 41 static void check_par(CHKARGS); 42 static void check_part(CHKARGS); 43 static void check_root(CHKARGS); 44 static void check_text(CHKARGS); 45 46 static void post_AT(CHKARGS); 47 static void post_IP(CHKARGS); 48 static void post_OP(CHKARGS); 49 static void post_TH(CHKARGS); 50 static void post_UC(CHKARGS); 51 static void post_UR(CHKARGS); 52 static void post_in(CHKARGS); 53 static void post_vs(CHKARGS); 54 55 static const v_check __man_valids[MAN_MAX - MAN_TH] = { 56 post_TH, /* TH */ 57 NULL, /* SH */ 58 NULL, /* SS */ 59 NULL, /* TP */ 60 check_par, /* LP */ 61 check_par, /* PP */ 62 check_par, /* P */ 63 post_IP, /* IP */ 64 NULL, /* HP */ 65 NULL, /* SM */ 66 NULL, /* SB */ 67 NULL, /* BI */ 68 NULL, /* IB */ 69 NULL, /* BR */ 70 NULL, /* RB */ 71 NULL, /* R */ 72 NULL, /* B */ 73 NULL, /* I */ 74 NULL, /* IR */ 75 NULL, /* RI */ 76 NULL, /* nf */ 77 NULL, /* fi */ 78 NULL, /* RE */ 79 check_part, /* RS */ 80 NULL, /* DT */ 81 post_UC, /* UC */ 82 NULL, /* PD */ 83 post_AT, /* AT */ 84 post_in, /* in */ 85 post_OP, /* OP */ 86 NULL, /* EX */ 87 NULL, /* EE */ 88 post_UR, /* UR */ 89 NULL, /* UE */ 90 }; 91 static const v_check *man_valids = __man_valids - MAN_TH; 92 93 94 void 95 man_node_validate(struct roff_man *man) 96 { 97 struct roff_node *n; 98 const v_check *cp; 99 100 n = man->last; 101 man->last = man->last->child; 102 while (man->last != NULL) { 103 man_node_validate(man); 104 if (man->last == n) 105 man->last = man->last->child; 106 else 107 man->last = man->last->next; 108 } 109 110 man->last = n; 111 man->next = ROFF_NEXT_SIBLING; 112 switch (n->type) { 113 case ROFFT_TEXT: 114 check_text(man, n); 115 break; 116 case ROFFT_ROOT: 117 check_root(man, n); 118 break; 119 case ROFFT_EQN: 120 case ROFFT_TBL: 121 break; 122 default: 123 if (n->tok < ROFF_MAX) { 124 switch (n->tok) { 125 case ROFF_br: 126 case ROFF_sp: 127 post_vs(man, n); 128 break; 129 default: 130 roff_validate(man); 131 break; 132 } 133 break; 134 } 135 assert(n->tok >= MAN_TH && n->tok < MAN_MAX); 136 cp = man_valids + n->tok; 137 if (*cp) 138 (*cp)(man, n); 139 if (man->last == n) 140 man_state(man, n); 141 break; 142 } 143 } 144 145 static void 146 check_root(CHKARGS) 147 { 148 149 assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0); 150 151 if (NULL == man->first->child) 152 mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse, 153 n->line, n->pos, NULL); 154 else 155 man->meta.hasbody = 1; 156 157 if (NULL == man->meta.title) { 158 mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse, 159 n->line, n->pos, NULL); 160 161 /* 162 * If a title hasn't been set, do so now (by 163 * implication, date and section also aren't set). 164 */ 165 166 man->meta.title = mandoc_strdup(""); 167 man->meta.msec = mandoc_strdup(""); 168 man->meta.date = man->quick ? mandoc_strdup("") : 169 mandoc_normdate(man, NULL, n->line, n->pos); 170 } 171 172 if (man->meta.os_e && 173 (man->meta.rcsids & (1 << man->meta.os_e)) == 0) 174 mandoc_msg(MANDOCERR_RCS_MISSING, man->parse, 0, 0, NULL); 175 } 176 177 static void 178 check_text(CHKARGS) 179 { 180 char *cp, *p; 181 182 if (MAN_LITERAL & man->flags) 183 return; 184 185 cp = n->string; 186 for (p = cp; NULL != (p = strchr(p, '\t')); p++) 187 mandoc_msg(MANDOCERR_FI_TAB, man->parse, 188 n->line, n->pos + (p - cp), NULL); 189 } 190 191 static void 192 post_OP(CHKARGS) 193 { 194 195 if (n->child == NULL) 196 mandoc_msg(MANDOCERR_OP_EMPTY, man->parse, 197 n->line, n->pos, "OP"); 198 else if (n->child->next != NULL && n->child->next->next != NULL) { 199 n = n->child->next->next; 200 mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse, 201 n->line, n->pos, "OP ... %s", n->string); 202 } 203 } 204 205 static void 206 post_UR(CHKARGS) 207 { 208 209 if (n->type == ROFFT_HEAD && n->child == NULL) 210 mandoc_vmsg(MANDOCERR_UR_NOHEAD, man->parse, 211 n->line, n->pos, "UR"); 212 check_part(man, n); 213 } 214 215 static void 216 check_part(CHKARGS) 217 { 218 219 if (n->type == ROFFT_BODY && n->child == NULL) 220 mandoc_msg(MANDOCERR_BLK_EMPTY, man->parse, 221 n->line, n->pos, roff_name[n->tok]); 222 } 223 224 static void 225 check_par(CHKARGS) 226 { 227 228 switch (n->type) { 229 case ROFFT_BLOCK: 230 if (n->body->child == NULL) 231 roff_node_delete(man, n); 232 break; 233 case ROFFT_BODY: 234 if (n->child == NULL) 235 mandoc_vmsg(MANDOCERR_PAR_SKIP, 236 man->parse, n->line, n->pos, 237 "%s empty", roff_name[n->tok]); 238 break; 239 case ROFFT_HEAD: 240 if (n->child != NULL) 241 mandoc_vmsg(MANDOCERR_ARG_SKIP, 242 man->parse, n->line, n->pos, "%s %s%s", 243 roff_name[n->tok], n->child->string, 244 n->child->next != NULL ? " ..." : ""); 245 break; 246 default: 247 break; 248 } 249 } 250 251 static void 252 post_IP(CHKARGS) 253 { 254 255 switch (n->type) { 256 case ROFFT_BLOCK: 257 if (n->head->child == NULL && n->body->child == NULL) 258 roff_node_delete(man, n); 259 break; 260 case ROFFT_BODY: 261 if (n->parent->head->child == NULL && n->child == NULL) 262 mandoc_vmsg(MANDOCERR_PAR_SKIP, 263 man->parse, n->line, n->pos, 264 "%s empty", roff_name[n->tok]); 265 break; 266 default: 267 break; 268 } 269 } 270 271 static void 272 post_TH(CHKARGS) 273 { 274 struct roff_node *nb; 275 const char *p; 276 277 free(man->meta.title); 278 free(man->meta.vol); 279 free(man->meta.os); 280 free(man->meta.msec); 281 free(man->meta.date); 282 283 man->meta.title = man->meta.vol = man->meta.date = 284 man->meta.msec = man->meta.os = NULL; 285 286 nb = n; 287 288 /* ->TITLE<- MSEC DATE OS VOL */ 289 290 n = n->child; 291 if (n && n->string) { 292 for (p = n->string; '\0' != *p; p++) { 293 /* Only warn about this once... */ 294 if (isalpha((unsigned char)*p) && 295 ! isupper((unsigned char)*p)) { 296 mandoc_vmsg(MANDOCERR_TITLE_CASE, 297 man->parse, n->line, 298 n->pos + (p - n->string), 299 "TH %s", n->string); 300 break; 301 } 302 } 303 man->meta.title = mandoc_strdup(n->string); 304 } else { 305 man->meta.title = mandoc_strdup(""); 306 mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse, 307 nb->line, nb->pos, "TH"); 308 } 309 310 /* TITLE ->MSEC<- DATE OS VOL */ 311 312 if (n) 313 n = n->next; 314 if (n && n->string) 315 man->meta.msec = mandoc_strdup(n->string); 316 else { 317 man->meta.msec = mandoc_strdup(""); 318 mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse, 319 nb->line, nb->pos, "TH %s", man->meta.title); 320 } 321 322 /* TITLE MSEC ->DATE<- OS VOL */ 323 324 if (n) 325 n = n->next; 326 if (n && n->string && '\0' != n->string[0]) { 327 man->meta.date = man->quick ? 328 mandoc_strdup(n->string) : 329 mandoc_normdate(man, n->string, n->line, n->pos); 330 } else { 331 man->meta.date = mandoc_strdup(""); 332 mandoc_msg(MANDOCERR_DATE_MISSING, man->parse, 333 n ? n->line : nb->line, 334 n ? n->pos : nb->pos, "TH"); 335 } 336 337 /* TITLE MSEC DATE ->OS<- VOL */ 338 339 if (n && (n = n->next)) 340 man->meta.os = mandoc_strdup(n->string); 341 else if (man->defos != NULL) 342 man->meta.os = mandoc_strdup(man->defos); 343 man->meta.os_e = man->meta.os == NULL ? MDOC_OS_OTHER : 344 strstr(man->meta.os, "OpenBSD") != NULL ? MDOC_OS_OPENBSD : 345 strstr(man->meta.os, "NetBSD") != NULL ? MDOC_OS_NETBSD : 346 MDOC_OS_OTHER; 347 348 /* TITLE MSEC DATE OS ->VOL<- */ 349 /* If missing, use the default VOL name for MSEC. */ 350 351 if (n && (n = n->next)) 352 man->meta.vol = mandoc_strdup(n->string); 353 else if ('\0' != man->meta.msec[0] && 354 (NULL != (p = mandoc_a2msec(man->meta.msec)))) 355 man->meta.vol = mandoc_strdup(p); 356 357 if (n != NULL && (n = n->next) != NULL) 358 mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse, 359 n->line, n->pos, "TH ... %s", n->string); 360 361 /* 362 * Remove the `TH' node after we've processed it for our 363 * meta-data. 364 */ 365 roff_node_delete(man, man->last); 366 } 367 368 static void 369 post_UC(CHKARGS) 370 { 371 static const char * const bsd_versions[] = { 372 "3rd Berkeley Distribution", 373 "4th Berkeley Distribution", 374 "4.2 Berkeley Distribution", 375 "4.3 Berkeley Distribution", 376 "4.4 Berkeley Distribution", 377 }; 378 379 const char *p, *s; 380 381 n = n->child; 382 383 if (n == NULL || n->type != ROFFT_TEXT) 384 p = bsd_versions[0]; 385 else { 386 s = n->string; 387 if (0 == strcmp(s, "3")) 388 p = bsd_versions[0]; 389 else if (0 == strcmp(s, "4")) 390 p = bsd_versions[1]; 391 else if (0 == strcmp(s, "5")) 392 p = bsd_versions[2]; 393 else if (0 == strcmp(s, "6")) 394 p = bsd_versions[3]; 395 else if (0 == strcmp(s, "7")) 396 p = bsd_versions[4]; 397 else 398 p = bsd_versions[0]; 399 } 400 401 free(man->meta.os); 402 man->meta.os = mandoc_strdup(p); 403 } 404 405 static void 406 post_AT(CHKARGS) 407 { 408 static const char * const unix_versions[] = { 409 "7th Edition", 410 "System III", 411 "System V", 412 "System V Release 2", 413 }; 414 415 struct roff_node *nn; 416 const char *p, *s; 417 418 n = n->child; 419 420 if (n == NULL || n->type != ROFFT_TEXT) 421 p = unix_versions[0]; 422 else { 423 s = n->string; 424 if (0 == strcmp(s, "3")) 425 p = unix_versions[0]; 426 else if (0 == strcmp(s, "4")) 427 p = unix_versions[1]; 428 else if (0 == strcmp(s, "5")) { 429 nn = n->next; 430 if (nn != NULL && 431 nn->type == ROFFT_TEXT && 432 nn->string[0] != '\0') 433 p = unix_versions[3]; 434 else 435 p = unix_versions[2]; 436 } else 437 p = unix_versions[0]; 438 } 439 440 free(man->meta.os); 441 man->meta.os = mandoc_strdup(p); 442 } 443 444 static void 445 post_in(CHKARGS) 446 { 447 char *s; 448 449 if (n->parent->tok != MAN_TP || 450 n->parent->type != ROFFT_HEAD || 451 n->child == NULL || 452 *n->child->string == '+' || 453 *n->child->string == '-') 454 return; 455 mandoc_asprintf(&s, "+%s", n->child->string); 456 free(n->child->string); 457 n->child->string = s; 458 } 459 460 static void 461 post_vs(CHKARGS) 462 { 463 464 if (NULL != n->prev) 465 return; 466 467 switch (n->parent->tok) { 468 case MAN_SH: 469 case MAN_SS: 470 case MAN_PP: 471 case MAN_LP: 472 case MAN_P: 473 mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos, 474 "%s after %s", roff_name[n->tok], 475 roff_name[n->parent->tok]); 476 /* FALLTHROUGH */ 477 case TOKEN_NONE: 478 /* 479 * Don't warn about this because it occurs in pod2man 480 * and would cause considerable (unfixable) warnage. 481 */ 482 roff_node_delete(man, n); 483 break; 484 default: 485 break; 486 } 487 } 488