1 /* $OpenBSD: mdoc.c,v 1.141 2015/04/23 16:17:04 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2015 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <stdarg.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <time.h> 27 28 #include "mandoc_aux.h" 29 #include "mandoc.h" 30 #include "roff.h" 31 #include "mdoc.h" 32 #include "libmandoc.h" 33 #include "roff_int.h" 34 #include "libmdoc.h" 35 36 const char *const __mdoc_macronames[MDOC_MAX + 1] = { 37 "text", 38 "Ap", "Dd", "Dt", "Os", 39 "Sh", "Ss", "Pp", "D1", 40 "Dl", "Bd", "Ed", "Bl", 41 "El", "It", "Ad", "An", 42 "Ar", "Cd", "Cm", "Dv", 43 "Er", "Ev", "Ex", "Fa", 44 "Fd", "Fl", "Fn", "Ft", 45 "Ic", "In", "Li", "Nd", 46 "Nm", "Op", "Ot", "Pa", 47 "Rv", "St", "Va", "Vt", 48 "Xr", "%A", "%B", "%D", 49 "%I", "%J", "%N", "%O", 50 "%P", "%R", "%T", "%V", 51 "Ac", "Ao", "Aq", "At", 52 "Bc", "Bf", "Bo", "Bq", 53 "Bsx", "Bx", "Db", "Dc", 54 "Do", "Dq", "Ec", "Ef", 55 "Em", "Eo", "Fx", "Ms", 56 "No", "Ns", "Nx", "Ox", 57 "Pc", "Pf", "Po", "Pq", 58 "Qc", "Ql", "Qo", "Qq", 59 "Re", "Rs", "Sc", "So", 60 "Sq", "Sm", "Sx", "Sy", 61 "Tn", "Ux", "Xc", "Xo", 62 "Fo", "Fc", "Oo", "Oc", 63 "Bk", "Ek", "Bt", "Hf", 64 "Fr", "Ud", "Lb", "Lp", 65 "Lk", "Mt", "Brq", "Bro", 66 "Brc", "%C", "Es", "En", 67 "Dx", "%Q", "br", "sp", 68 "%U", "Ta", "ll", 69 }; 70 71 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 72 "split", "nosplit", "ragged", 73 "unfilled", "literal", "file", 74 "offset", "bullet", "dash", 75 "hyphen", "item", "enum", 76 "tag", "diag", "hang", 77 "ohang", "inset", "column", 78 "width", "compact", "std", 79 "filled", "words", "emphasis", 80 "symbolic", "nested", "centered" 81 }; 82 83 const char * const *mdoc_macronames = __mdoc_macronames + 1; 84 const char * const *mdoc_argnames = __mdoc_argnames; 85 86 static int mdoc_ptext(struct roff_man *, int, char *, int); 87 static int mdoc_pmacro(struct roff_man *, int, char *, int); 88 89 90 /* 91 * Main parse routine. Parses a single line -- really just hands off to 92 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 93 */ 94 int 95 mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs) 96 { 97 98 if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line) 99 mdoc->flags |= MDOC_NEWLINE; 100 101 /* 102 * Let the roff nS register switch SYNOPSIS mode early, 103 * such that the parser knows at all times 104 * whether this mode is on or off. 105 * Note that this mode is also switched by the Sh macro. 106 */ 107 if (roff_getreg(mdoc->roff, "nS")) 108 mdoc->flags |= MDOC_SYNOPSIS; 109 else 110 mdoc->flags &= ~MDOC_SYNOPSIS; 111 112 return(roff_getcontrol(mdoc->roff, buf, &offs) ? 113 mdoc_pmacro(mdoc, ln, buf, offs) : 114 mdoc_ptext(mdoc, ln, buf, offs)); 115 } 116 117 void 118 mdoc_macro(MACRO_PROT_ARGS) 119 { 120 assert(tok > TOKEN_NONE && tok < MDOC_MAX); 121 122 if (mdoc->flags & MDOC_PBODY) { 123 if (tok == MDOC_Dt) { 124 mandoc_vmsg(MANDOCERR_DT_LATE, 125 mdoc->parse, line, ppos, 126 "Dt %s", buf + *pos); 127 return; 128 } 129 } else if ( ! (mdoc_macros[tok].flags & MDOC_PROLOGUE)) { 130 if (mdoc->meta.title == NULL) { 131 mandoc_vmsg(MANDOCERR_DT_NOTITLE, 132 mdoc->parse, line, ppos, "%s %s", 133 mdoc_macronames[tok], buf + *pos); 134 mdoc->meta.title = mandoc_strdup("UNTITLED"); 135 } 136 if (NULL == mdoc->meta.vol) 137 mdoc->meta.vol = mandoc_strdup("LOCAL"); 138 mdoc->flags |= MDOC_PBODY; 139 } 140 (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf); 141 } 142 143 void 144 mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, int tok) 145 { 146 struct roff_node *p; 147 148 p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok); 149 roff_node_append(mdoc, p); 150 mdoc->next = ROFF_NEXT_CHILD; 151 } 152 153 struct roff_node * 154 mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, int tok, 155 struct roff_node *body, enum mdoc_endbody end) 156 { 157 struct roff_node *p; 158 159 body->flags |= MDOC_ENDED; 160 body->parent->flags |= MDOC_ENDED; 161 p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok); 162 p->body = body; 163 p->norm = body->norm; 164 p->end = end; 165 roff_node_append(mdoc, p); 166 mdoc->next = ROFF_NEXT_SIBLING; 167 return(p); 168 } 169 170 struct roff_node * 171 mdoc_block_alloc(struct roff_man *mdoc, int line, int pos, 172 int tok, struct mdoc_arg *args) 173 { 174 struct roff_node *p; 175 176 p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok); 177 p->args = args; 178 if (p->args) 179 (args->refcnt)++; 180 181 switch (tok) { 182 case MDOC_Bd: 183 /* FALLTHROUGH */ 184 case MDOC_Bf: 185 /* FALLTHROUGH */ 186 case MDOC_Bl: 187 /* FALLTHROUGH */ 188 case MDOC_En: 189 /* FALLTHROUGH */ 190 case MDOC_Rs: 191 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 192 break; 193 default: 194 break; 195 } 196 roff_node_append(mdoc, p); 197 mdoc->next = ROFF_NEXT_CHILD; 198 return(p); 199 } 200 201 void 202 mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos, 203 int tok, struct mdoc_arg *args) 204 { 205 struct roff_node *p; 206 207 p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok); 208 p->args = args; 209 if (p->args) 210 (args->refcnt)++; 211 212 switch (tok) { 213 case MDOC_An: 214 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 215 break; 216 default: 217 break; 218 } 219 roff_node_append(mdoc, p); 220 mdoc->next = ROFF_NEXT_CHILD; 221 } 222 223 void 224 mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p) 225 { 226 227 roff_node_unlink(mdoc, p); 228 roff_node_append(mdoc, p); 229 } 230 231 /* 232 * Parse free-form text, that is, a line that does not begin with the 233 * control character. 234 */ 235 static int 236 mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs) 237 { 238 struct roff_node *n; 239 char *c, *ws, *end; 240 241 assert(mdoc->last); 242 n = mdoc->last; 243 244 /* 245 * Divert directly to list processing if we're encountering a 246 * columnar ROFFT_BLOCK with or without a prior ROFFT_BLOCK entry 247 * (a ROFFT_BODY means it's already open, in which case we should 248 * process within its context in the normal way). 249 */ 250 251 if (n->tok == MDOC_Bl && n->type == ROFFT_BODY && 252 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) { 253 /* `Bl' is open without any children. */ 254 mdoc->flags |= MDOC_FREECOL; 255 mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf); 256 return(1); 257 } 258 259 if (n->tok == MDOC_It && n->type == ROFFT_BLOCK && 260 NULL != n->parent && 261 MDOC_Bl == n->parent->tok && 262 LIST_column == n->parent->norm->Bl.type) { 263 /* `Bl' has block-level `It' children. */ 264 mdoc->flags |= MDOC_FREECOL; 265 mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf); 266 return(1); 267 } 268 269 /* 270 * Search for the beginning of unescaped trailing whitespace (ws) 271 * and for the first character not to be output (end). 272 */ 273 274 /* FIXME: replace with strcspn(). */ 275 ws = NULL; 276 for (c = end = buf + offs; *c; c++) { 277 switch (*c) { 278 case ' ': 279 if (NULL == ws) 280 ws = c; 281 continue; 282 case '\t': 283 /* 284 * Always warn about trailing tabs, 285 * even outside literal context, 286 * where they should be put on the next line. 287 */ 288 if (NULL == ws) 289 ws = c; 290 /* 291 * Strip trailing tabs in literal context only; 292 * outside, they affect the next line. 293 */ 294 if (MDOC_LITERAL & mdoc->flags) 295 continue; 296 break; 297 case '\\': 298 /* Skip the escaped character, too, if any. */ 299 if (c[1]) 300 c++; 301 /* FALLTHROUGH */ 302 default: 303 ws = NULL; 304 break; 305 } 306 end = c + 1; 307 } 308 *end = '\0'; 309 310 if (ws) 311 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 312 line, (int)(ws-buf), NULL); 313 314 if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) { 315 mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse, 316 line, (int)(c - buf), NULL); 317 318 /* 319 * Insert a `sp' in the case of a blank line. Technically, 320 * blank lines aren't allowed, but enough manuals assume this 321 * behaviour that we want to work around it. 322 */ 323 roff_elem_alloc(mdoc, line, offs, MDOC_sp); 324 mdoc->next = ROFF_NEXT_SIBLING; 325 mdoc_valid_post(mdoc); 326 return(1); 327 } 328 329 roff_word_alloc(mdoc, line, offs, buf+offs); 330 331 if (mdoc->flags & MDOC_LITERAL) 332 return(1); 333 334 /* 335 * End-of-sentence check. If the last character is an unescaped 336 * EOS character, then flag the node as being the end of a 337 * sentence. The front-end will know how to interpret this. 338 */ 339 340 assert(buf < end); 341 342 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) 343 mdoc->last->flags |= MDOC_EOS; 344 return(1); 345 } 346 347 /* 348 * Parse a macro line, that is, a line beginning with the control 349 * character. 350 */ 351 static int 352 mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs) 353 { 354 struct roff_node *n; 355 const char *cp; 356 int tok; 357 int i, sv; 358 char mac[5]; 359 360 sv = offs; 361 362 /* 363 * Copy the first word into a nil-terminated buffer. 364 * Stop when a space, tab, escape, or eoln is encountered. 365 */ 366 367 i = 0; 368 while (i < 4 && strchr(" \t\\", buf[offs]) == NULL) 369 mac[i++] = buf[offs++]; 370 371 mac[i] = '\0'; 372 373 tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : TOKEN_NONE; 374 375 if (tok == TOKEN_NONE) { 376 mandoc_msg(MANDOCERR_MACRO, mdoc->parse, 377 ln, sv, buf + sv - 1); 378 return(1); 379 } 380 381 /* Skip a leading escape sequence or tab. */ 382 383 switch (buf[offs]) { 384 case '\\': 385 cp = buf + offs + 1; 386 mandoc_escape(&cp, NULL, NULL); 387 offs = cp - buf; 388 break; 389 case '\t': 390 offs++; 391 break; 392 default: 393 break; 394 } 395 396 /* Jump to the next non-whitespace word. */ 397 398 while (buf[offs] && ' ' == buf[offs]) 399 offs++; 400 401 /* 402 * Trailing whitespace. Note that tabs are allowed to be passed 403 * into the parser as "text", so we only warn about spaces here. 404 */ 405 406 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 407 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 408 ln, offs - 1, NULL); 409 410 /* 411 * If an initial macro or a list invocation, divert directly 412 * into macro processing. 413 */ 414 415 if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) { 416 mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 417 return(1); 418 } 419 420 n = mdoc->last; 421 assert(mdoc->last); 422 423 /* 424 * If the first macro of a `Bl -column', open an `It' block 425 * context around the parsed macro. 426 */ 427 428 if (n->tok == MDOC_Bl && n->type == ROFFT_BODY && 429 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) { 430 mdoc->flags |= MDOC_FREECOL; 431 mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); 432 return(1); 433 } 434 435 /* 436 * If we're following a block-level `It' within a `Bl -column' 437 * context (perhaps opened in the above block or in ptext()), 438 * then open an `It' block context around the parsed macro. 439 */ 440 441 if (n->tok == MDOC_It && n->type == ROFFT_BLOCK && 442 NULL != n->parent && 443 MDOC_Bl == n->parent->tok && 444 LIST_column == n->parent->norm->Bl.type) { 445 mdoc->flags |= MDOC_FREECOL; 446 mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); 447 return(1); 448 } 449 450 /* Normal processing of a macro. */ 451 452 mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 453 454 /* In quick mode (for mandocdb), abort after the NAME section. */ 455 456 if (mdoc->quick && MDOC_Sh == tok && 457 SEC_NAME != mdoc->last->sec) 458 return(2); 459 460 return(1); 461 } 462 463 enum mdelim 464 mdoc_isdelim(const char *p) 465 { 466 467 if ('\0' == p[0]) 468 return(DELIM_NONE); 469 470 if ('\0' == p[1]) 471 switch (p[0]) { 472 case '(': 473 /* FALLTHROUGH */ 474 case '[': 475 return(DELIM_OPEN); 476 case '|': 477 return(DELIM_MIDDLE); 478 case '.': 479 /* FALLTHROUGH */ 480 case ',': 481 /* FALLTHROUGH */ 482 case ';': 483 /* FALLTHROUGH */ 484 case ':': 485 /* FALLTHROUGH */ 486 case '?': 487 /* FALLTHROUGH */ 488 case '!': 489 /* FALLTHROUGH */ 490 case ')': 491 /* FALLTHROUGH */ 492 case ']': 493 return(DELIM_CLOSE); 494 default: 495 return(DELIM_NONE); 496 } 497 498 if ('\\' != p[0]) 499 return(DELIM_NONE); 500 501 if (0 == strcmp(p + 1, ".")) 502 return(DELIM_CLOSE); 503 if (0 == strcmp(p + 1, "fR|\\fP")) 504 return(DELIM_MIDDLE); 505 506 return(DELIM_NONE); 507 } 508