1 /* $OpenBSD: mdoc.c,v 1.150 2017/03/03 13:55:06 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <stdarg.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <time.h> 27 28 #include "mandoc_aux.h" 29 #include "mandoc.h" 30 #include "roff.h" 31 #include "mdoc.h" 32 #include "libmandoc.h" 33 #include "roff_int.h" 34 #include "libmdoc.h" 35 36 const char *const __mdoc_macronames[MDOC_MAX + 1] = { 37 "text", 38 "Ap", "Dd", "Dt", "Os", 39 "Sh", "Ss", "Pp", "D1", 40 "Dl", "Bd", "Ed", "Bl", 41 "El", "It", "Ad", "An", 42 "Ar", "Cd", "Cm", "Dv", 43 "Er", "Ev", "Ex", "Fa", 44 "Fd", "Fl", "Fn", "Ft", 45 "Ic", "In", "Li", "Nd", 46 "Nm", "Op", "Ot", "Pa", 47 "Rv", "St", "Va", "Vt", 48 "Xr", "%A", "%B", "%D", 49 "%I", "%J", "%N", "%O", 50 "%P", "%R", "%T", "%V", 51 "Ac", "Ao", "Aq", "At", 52 "Bc", "Bf", "Bo", "Bq", 53 "Bsx", "Bx", "Db", "Dc", 54 "Do", "Dq", "Ec", "Ef", 55 "Em", "Eo", "Fx", "Ms", 56 "No", "Ns", "Nx", "Ox", 57 "Pc", "Pf", "Po", "Pq", 58 "Qc", "Ql", "Qo", "Qq", 59 "Re", "Rs", "Sc", "So", 60 "Sq", "Sm", "Sx", "Sy", 61 "Tn", "Ux", "Xc", "Xo", 62 "Fo", "Fc", "Oo", "Oc", 63 "Bk", "Ek", "Bt", "Hf", 64 "Fr", "Ud", "Lb", "Lp", 65 "Lk", "Mt", "Brq", "Bro", 66 "Brc", "%C", "Es", "En", 67 "Dx", "%Q", "br", "sp", 68 "%U", "Ta", "ll", 69 }; 70 71 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 72 "split", "nosplit", "ragged", 73 "unfilled", "literal", "file", 74 "offset", "bullet", "dash", 75 "hyphen", "item", "enum", 76 "tag", "diag", "hang", 77 "ohang", "inset", "column", 78 "width", "compact", "std", 79 "filled", "words", "emphasis", 80 "symbolic", "nested", "centered" 81 }; 82 83 const char * const *mdoc_macronames = __mdoc_macronames + 1; 84 const char * const *mdoc_argnames = __mdoc_argnames; 85 86 static int mdoc_ptext(struct roff_man *, int, char *, int); 87 static int mdoc_pmacro(struct roff_man *, int, char *, int); 88 89 90 /* 91 * Main parse routine. Parses a single line -- really just hands off to 92 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 93 */ 94 int 95 mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs) 96 { 97 98 if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line) 99 mdoc->flags |= MDOC_NEWLINE; 100 101 /* 102 * Let the roff nS register switch SYNOPSIS mode early, 103 * such that the parser knows at all times 104 * whether this mode is on or off. 105 * Note that this mode is also switched by the Sh macro. 106 */ 107 if (roff_getreg(mdoc->roff, "nS")) 108 mdoc->flags |= MDOC_SYNOPSIS; 109 else 110 mdoc->flags &= ~MDOC_SYNOPSIS; 111 112 return roff_getcontrol(mdoc->roff, buf, &offs) ? 113 mdoc_pmacro(mdoc, ln, buf, offs) : 114 mdoc_ptext(mdoc, ln, buf, offs); 115 } 116 117 void 118 mdoc_macro(MACRO_PROT_ARGS) 119 { 120 assert(tok > TOKEN_NONE && tok < MDOC_MAX); 121 122 (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf); 123 } 124 125 void 126 mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, int tok) 127 { 128 struct roff_node *p; 129 130 p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok); 131 roff_node_append(mdoc, p); 132 mdoc->next = ROFF_NEXT_CHILD; 133 } 134 135 struct roff_node * 136 mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, int tok, 137 struct roff_node *body) 138 { 139 struct roff_node *p; 140 141 body->flags |= NODE_ENDED; 142 body->parent->flags |= NODE_ENDED; 143 p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok); 144 p->body = body; 145 p->norm = body->norm; 146 p->end = ENDBODY_SPACE; 147 roff_node_append(mdoc, p); 148 mdoc->next = ROFF_NEXT_SIBLING; 149 return p; 150 } 151 152 struct roff_node * 153 mdoc_block_alloc(struct roff_man *mdoc, int line, int pos, 154 int tok, struct mdoc_arg *args) 155 { 156 struct roff_node *p; 157 158 p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok); 159 p->args = args; 160 if (p->args) 161 (args->refcnt)++; 162 163 switch (tok) { 164 case MDOC_Bd: 165 case MDOC_Bf: 166 case MDOC_Bl: 167 case MDOC_En: 168 case MDOC_Rs: 169 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 170 break; 171 default: 172 break; 173 } 174 roff_node_append(mdoc, p); 175 mdoc->next = ROFF_NEXT_CHILD; 176 return p; 177 } 178 179 void 180 mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos, 181 int tok, struct mdoc_arg *args) 182 { 183 struct roff_node *p; 184 185 p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok); 186 p->args = args; 187 if (p->args) 188 (args->refcnt)++; 189 190 switch (tok) { 191 case MDOC_An: 192 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 193 break; 194 default: 195 break; 196 } 197 roff_node_append(mdoc, p); 198 mdoc->next = ROFF_NEXT_CHILD; 199 } 200 201 void 202 mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p) 203 { 204 205 roff_node_unlink(mdoc, p); 206 p->prev = p->next = NULL; 207 roff_node_append(mdoc, p); 208 } 209 210 /* 211 * Parse free-form text, that is, a line that does not begin with the 212 * control character. 213 */ 214 static int 215 mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs) 216 { 217 struct roff_node *n; 218 char *c, *ws, *end; 219 220 n = mdoc->last; 221 222 /* 223 * If a column list contains plain text, assume an implicit item 224 * macro. This can happen one or more times at the beginning 225 * of such a list, intermixed with non-It mdoc macros and with 226 * nodes generated on the roff level, for example by tbl. 227 */ 228 229 if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY && 230 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) || 231 (n->parent != NULL && n->parent->tok == MDOC_Bl && 232 n->parent->norm->Bl.type == LIST_column)) { 233 mdoc->flags |= MDOC_FREECOL; 234 mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf); 235 return 1; 236 } 237 238 /* 239 * Search for the beginning of unescaped trailing whitespace (ws) 240 * and for the first character not to be output (end). 241 */ 242 243 /* FIXME: replace with strcspn(). */ 244 ws = NULL; 245 for (c = end = buf + offs; *c; c++) { 246 switch (*c) { 247 case ' ': 248 if (NULL == ws) 249 ws = c; 250 continue; 251 case '\t': 252 /* 253 * Always warn about trailing tabs, 254 * even outside literal context, 255 * where they should be put on the next line. 256 */ 257 if (NULL == ws) 258 ws = c; 259 /* 260 * Strip trailing tabs in literal context only; 261 * outside, they affect the next line. 262 */ 263 if (MDOC_LITERAL & mdoc->flags) 264 continue; 265 break; 266 case '\\': 267 /* Skip the escaped character, too, if any. */ 268 if (c[1]) 269 c++; 270 /* FALLTHROUGH */ 271 default: 272 ws = NULL; 273 break; 274 } 275 end = c + 1; 276 } 277 *end = '\0'; 278 279 if (ws) 280 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 281 line, (int)(ws-buf), NULL); 282 283 if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) { 284 mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse, 285 line, (int)(c - buf), NULL); 286 287 /* 288 * Insert a `sp' in the case of a blank line. Technically, 289 * blank lines aren't allowed, but enough manuals assume this 290 * behaviour that we want to work around it. 291 */ 292 roff_elem_alloc(mdoc, line, offs, MDOC_sp); 293 mdoc->last->flags |= NODE_VALID | NODE_ENDED; 294 mdoc->next = ROFF_NEXT_SIBLING; 295 return 1; 296 } 297 298 roff_word_alloc(mdoc, line, offs, buf+offs); 299 300 if (mdoc->flags & MDOC_LITERAL) 301 return 1; 302 303 /* 304 * End-of-sentence check. If the last character is an unescaped 305 * EOS character, then flag the node as being the end of a 306 * sentence. The front-end will know how to interpret this. 307 */ 308 309 assert(buf < end); 310 311 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) 312 mdoc->last->flags |= NODE_EOS; 313 314 for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) { 315 if (c - buf < offs + 2) 316 continue; 317 if (end - c < 4) 318 break; 319 if (isalpha((unsigned char)c[-2]) && 320 isalpha((unsigned char)c[-1]) && 321 c[1] == ' ' && 322 isupper((unsigned char)(c[2] == ' ' ? c[3] : c[2])) && 323 (c[-2] != 'n' || c[-1] != 'c') && 324 (c[-2] != 'v' || c[-1] != 's')) 325 mandoc_msg(MANDOCERR_EOS, mdoc->parse, 326 line, (int)(c - buf), NULL); 327 } 328 329 return 1; 330 } 331 332 /* 333 * Parse a macro line, that is, a line beginning with the control 334 * character. 335 */ 336 static int 337 mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs) 338 { 339 struct roff_node *n; 340 const char *cp; 341 int tok; 342 int i, sv; 343 char mac[5]; 344 345 sv = offs; 346 347 /* 348 * Copy the first word into a nil-terminated buffer. 349 * Stop when a space, tab, escape, or eoln is encountered. 350 */ 351 352 i = 0; 353 while (i < 4 && strchr(" \t\\", buf[offs]) == NULL) 354 mac[i++] = buf[offs++]; 355 356 mac[i] = '\0'; 357 358 tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : TOKEN_NONE; 359 360 if (tok == TOKEN_NONE) { 361 mandoc_msg(MANDOCERR_MACRO, mdoc->parse, 362 ln, sv, buf + sv - 1); 363 return 1; 364 } 365 366 /* Skip a leading escape sequence or tab. */ 367 368 switch (buf[offs]) { 369 case '\\': 370 cp = buf + offs + 1; 371 mandoc_escape(&cp, NULL, NULL); 372 offs = cp - buf; 373 break; 374 case '\t': 375 offs++; 376 break; 377 default: 378 break; 379 } 380 381 /* Jump to the next non-whitespace word. */ 382 383 while (buf[offs] == ' ') 384 offs++; 385 386 /* 387 * Trailing whitespace. Note that tabs are allowed to be passed 388 * into the parser as "text", so we only warn about spaces here. 389 */ 390 391 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 392 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 393 ln, offs - 1, NULL); 394 395 /* 396 * If an initial macro or a list invocation, divert directly 397 * into macro processing. 398 */ 399 400 n = mdoc->last; 401 if (n == NULL || tok == MDOC_It || tok == MDOC_El) { 402 mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 403 return 1; 404 } 405 406 /* 407 * If a column list contains a non-It macro, assume an implicit 408 * item macro. This can happen one or more times at the 409 * beginning of such a list, intermixed with text lines and 410 * with nodes generated on the roff level, for example by tbl. 411 */ 412 413 if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY && 414 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) || 415 (n->parent != NULL && n->parent->tok == MDOC_Bl && 416 n->parent->norm->Bl.type == LIST_column)) { 417 mdoc->flags |= MDOC_FREECOL; 418 mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); 419 return 1; 420 } 421 422 /* Normal processing of a macro. */ 423 424 mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 425 426 /* In quick mode (for mandocdb), abort after the NAME section. */ 427 428 if (mdoc->quick && MDOC_Sh == tok && 429 SEC_NAME != mdoc->last->sec) 430 return 2; 431 432 return 1; 433 } 434 435 enum mdelim 436 mdoc_isdelim(const char *p) 437 { 438 439 if ('\0' == p[0]) 440 return DELIM_NONE; 441 442 if ('\0' == p[1]) 443 switch (p[0]) { 444 case '(': 445 case '[': 446 return DELIM_OPEN; 447 case '|': 448 return DELIM_MIDDLE; 449 case '.': 450 case ',': 451 case ';': 452 case ':': 453 case '?': 454 case '!': 455 case ')': 456 case ']': 457 return DELIM_CLOSE; 458 default: 459 return DELIM_NONE; 460 } 461 462 if ('\\' != p[0]) 463 return DELIM_NONE; 464 465 if (0 == strcmp(p + 1, ".")) 466 return DELIM_CLOSE; 467 if (0 == strcmp(p + 1, "fR|\\fP")) 468 return DELIM_MIDDLE; 469 470 return DELIM_NONE; 471 } 472 473 void 474 mdoc_validate(struct roff_man *mdoc) 475 { 476 477 mdoc->last = mdoc->first; 478 mdoc_node_validate(mdoc); 479 mdoc_state_reset(mdoc); 480 } 481