1 /* $OpenBSD: mdoc.c,v 1.145 2015/10/30 19:03:36 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2015 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <stdarg.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <time.h> 27 28 #include "mandoc_aux.h" 29 #include "mandoc.h" 30 #include "roff.h" 31 #include "mdoc.h" 32 #include "libmandoc.h" 33 #include "roff_int.h" 34 #include "libmdoc.h" 35 36 const char *const __mdoc_macronames[MDOC_MAX + 1] = { 37 "text", 38 "Ap", "Dd", "Dt", "Os", 39 "Sh", "Ss", "Pp", "D1", 40 "Dl", "Bd", "Ed", "Bl", 41 "El", "It", "Ad", "An", 42 "Ar", "Cd", "Cm", "Dv", 43 "Er", "Ev", "Ex", "Fa", 44 "Fd", "Fl", "Fn", "Ft", 45 "Ic", "In", "Li", "Nd", 46 "Nm", "Op", "Ot", "Pa", 47 "Rv", "St", "Va", "Vt", 48 "Xr", "%A", "%B", "%D", 49 "%I", "%J", "%N", "%O", 50 "%P", "%R", "%T", "%V", 51 "Ac", "Ao", "Aq", "At", 52 "Bc", "Bf", "Bo", "Bq", 53 "Bsx", "Bx", "Db", "Dc", 54 "Do", "Dq", "Ec", "Ef", 55 "Em", "Eo", "Fx", "Ms", 56 "No", "Ns", "Nx", "Ox", 57 "Pc", "Pf", "Po", "Pq", 58 "Qc", "Ql", "Qo", "Qq", 59 "Re", "Rs", "Sc", "So", 60 "Sq", "Sm", "Sx", "Sy", 61 "Tn", "Ux", "Xc", "Xo", 62 "Fo", "Fc", "Oo", "Oc", 63 "Bk", "Ek", "Bt", "Hf", 64 "Fr", "Ud", "Lb", "Lp", 65 "Lk", "Mt", "Brq", "Bro", 66 "Brc", "%C", "Es", "En", 67 "Dx", "%Q", "br", "sp", 68 "%U", "Ta", "ll", 69 }; 70 71 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 72 "split", "nosplit", "ragged", 73 "unfilled", "literal", "file", 74 "offset", "bullet", "dash", 75 "hyphen", "item", "enum", 76 "tag", "diag", "hang", 77 "ohang", "inset", "column", 78 "width", "compact", "std", 79 "filled", "words", "emphasis", 80 "symbolic", "nested", "centered" 81 }; 82 83 const char * const *mdoc_macronames = __mdoc_macronames + 1; 84 const char * const *mdoc_argnames = __mdoc_argnames; 85 86 static int mdoc_ptext(struct roff_man *, int, char *, int); 87 static int mdoc_pmacro(struct roff_man *, int, char *, int); 88 89 90 /* 91 * Main parse routine. Parses a single line -- really just hands off to 92 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 93 */ 94 int 95 mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs) 96 { 97 98 if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line) 99 mdoc->flags |= MDOC_NEWLINE; 100 101 /* 102 * Let the roff nS register switch SYNOPSIS mode early, 103 * such that the parser knows at all times 104 * whether this mode is on or off. 105 * Note that this mode is also switched by the Sh macro. 106 */ 107 if (roff_getreg(mdoc->roff, "nS")) 108 mdoc->flags |= MDOC_SYNOPSIS; 109 else 110 mdoc->flags &= ~MDOC_SYNOPSIS; 111 112 return roff_getcontrol(mdoc->roff, buf, &offs) ? 113 mdoc_pmacro(mdoc, ln, buf, offs) : 114 mdoc_ptext(mdoc, ln, buf, offs); 115 } 116 117 void 118 mdoc_macro(MACRO_PROT_ARGS) 119 { 120 assert(tok > TOKEN_NONE && tok < MDOC_MAX); 121 122 (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf); 123 } 124 125 void 126 mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, int tok) 127 { 128 struct roff_node *p; 129 130 p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok); 131 roff_node_append(mdoc, p); 132 mdoc->next = ROFF_NEXT_CHILD; 133 } 134 135 struct roff_node * 136 mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, int tok, 137 struct roff_node *body, enum mdoc_endbody end) 138 { 139 struct roff_node *p; 140 141 body->flags |= MDOC_ENDED; 142 body->parent->flags |= MDOC_ENDED; 143 p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok); 144 p->body = body; 145 p->norm = body->norm; 146 p->end = end; 147 roff_node_append(mdoc, p); 148 mdoc->next = ROFF_NEXT_SIBLING; 149 return p; 150 } 151 152 struct roff_node * 153 mdoc_block_alloc(struct roff_man *mdoc, int line, int pos, 154 int tok, struct mdoc_arg *args) 155 { 156 struct roff_node *p; 157 158 p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok); 159 p->args = args; 160 if (p->args) 161 (args->refcnt)++; 162 163 switch (tok) { 164 case MDOC_Bd: 165 case MDOC_Bf: 166 case MDOC_Bl: 167 case MDOC_En: 168 case MDOC_Rs: 169 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 170 break; 171 default: 172 break; 173 } 174 roff_node_append(mdoc, p); 175 mdoc->next = ROFF_NEXT_CHILD; 176 return p; 177 } 178 179 void 180 mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos, 181 int tok, struct mdoc_arg *args) 182 { 183 struct roff_node *p; 184 185 p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok); 186 p->args = args; 187 if (p->args) 188 (args->refcnt)++; 189 190 switch (tok) { 191 case MDOC_An: 192 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 193 break; 194 default: 195 break; 196 } 197 roff_node_append(mdoc, p); 198 mdoc->next = ROFF_NEXT_CHILD; 199 } 200 201 void 202 mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p) 203 { 204 205 roff_node_unlink(mdoc, p); 206 p->prev = p->next = NULL; 207 roff_node_append(mdoc, p); 208 } 209 210 /* 211 * Parse free-form text, that is, a line that does not begin with the 212 * control character. 213 */ 214 static int 215 mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs) 216 { 217 struct roff_node *n; 218 char *c, *ws, *end; 219 220 assert(mdoc->last); 221 n = mdoc->last; 222 223 /* 224 * Divert directly to list processing if we're encountering a 225 * columnar ROFFT_BLOCK with or without a prior ROFFT_BLOCK entry 226 * (a ROFFT_BODY means it's already open, in which case we should 227 * process within its context in the normal way). 228 */ 229 230 if (n->tok == MDOC_Bl && n->type == ROFFT_BODY && 231 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) { 232 /* `Bl' is open without any children. */ 233 mdoc->flags |= MDOC_FREECOL; 234 mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf); 235 return 1; 236 } 237 238 if (n->tok == MDOC_It && n->type == ROFFT_BLOCK && 239 NULL != n->parent && 240 MDOC_Bl == n->parent->tok && 241 LIST_column == n->parent->norm->Bl.type) { 242 /* `Bl' has block-level `It' children. */ 243 mdoc->flags |= MDOC_FREECOL; 244 mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf); 245 return 1; 246 } 247 248 /* 249 * Search for the beginning of unescaped trailing whitespace (ws) 250 * and for the first character not to be output (end). 251 */ 252 253 /* FIXME: replace with strcspn(). */ 254 ws = NULL; 255 for (c = end = buf + offs; *c; c++) { 256 switch (*c) { 257 case ' ': 258 if (NULL == ws) 259 ws = c; 260 continue; 261 case '\t': 262 /* 263 * Always warn about trailing tabs, 264 * even outside literal context, 265 * where they should be put on the next line. 266 */ 267 if (NULL == ws) 268 ws = c; 269 /* 270 * Strip trailing tabs in literal context only; 271 * outside, they affect the next line. 272 */ 273 if (MDOC_LITERAL & mdoc->flags) 274 continue; 275 break; 276 case '\\': 277 /* Skip the escaped character, too, if any. */ 278 if (c[1]) 279 c++; 280 /* FALLTHROUGH */ 281 default: 282 ws = NULL; 283 break; 284 } 285 end = c + 1; 286 } 287 *end = '\0'; 288 289 if (ws) 290 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 291 line, (int)(ws-buf), NULL); 292 293 if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) { 294 mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse, 295 line, (int)(c - buf), NULL); 296 297 /* 298 * Insert a `sp' in the case of a blank line. Technically, 299 * blank lines aren't allowed, but enough manuals assume this 300 * behaviour that we want to work around it. 301 */ 302 roff_elem_alloc(mdoc, line, offs, MDOC_sp); 303 mdoc->last->flags |= MDOC_VALID | MDOC_ENDED; 304 mdoc->next = ROFF_NEXT_SIBLING; 305 return 1; 306 } 307 308 roff_word_alloc(mdoc, line, offs, buf+offs); 309 310 if (mdoc->flags & MDOC_LITERAL) 311 return 1; 312 313 /* 314 * End-of-sentence check. If the last character is an unescaped 315 * EOS character, then flag the node as being the end of a 316 * sentence. The front-end will know how to interpret this. 317 */ 318 319 assert(buf < end); 320 321 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) 322 mdoc->last->flags |= MDOC_EOS; 323 return 1; 324 } 325 326 /* 327 * Parse a macro line, that is, a line beginning with the control 328 * character. 329 */ 330 static int 331 mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs) 332 { 333 struct roff_node *n; 334 const char *cp; 335 int tok; 336 int i, sv; 337 char mac[5]; 338 339 sv = offs; 340 341 /* 342 * Copy the first word into a nil-terminated buffer. 343 * Stop when a space, tab, escape, or eoln is encountered. 344 */ 345 346 i = 0; 347 while (i < 4 && strchr(" \t\\", buf[offs]) == NULL) 348 mac[i++] = buf[offs++]; 349 350 mac[i] = '\0'; 351 352 tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : TOKEN_NONE; 353 354 if (tok == TOKEN_NONE) { 355 mandoc_msg(MANDOCERR_MACRO, mdoc->parse, 356 ln, sv, buf + sv - 1); 357 return 1; 358 } 359 360 /* Skip a leading escape sequence or tab. */ 361 362 switch (buf[offs]) { 363 case '\\': 364 cp = buf + offs + 1; 365 mandoc_escape(&cp, NULL, NULL); 366 offs = cp - buf; 367 break; 368 case '\t': 369 offs++; 370 break; 371 default: 372 break; 373 } 374 375 /* Jump to the next non-whitespace word. */ 376 377 while (buf[offs] && ' ' == buf[offs]) 378 offs++; 379 380 /* 381 * Trailing whitespace. Note that tabs are allowed to be passed 382 * into the parser as "text", so we only warn about spaces here. 383 */ 384 385 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 386 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 387 ln, offs - 1, NULL); 388 389 /* 390 * If an initial macro or a list invocation, divert directly 391 * into macro processing. 392 */ 393 394 if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) { 395 mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 396 return 1; 397 } 398 399 n = mdoc->last; 400 assert(mdoc->last); 401 402 /* 403 * If the first macro of a `Bl -column', open an `It' block 404 * context around the parsed macro. 405 */ 406 407 if (n->tok == MDOC_Bl && n->type == ROFFT_BODY && 408 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) { 409 mdoc->flags |= MDOC_FREECOL; 410 mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); 411 return 1; 412 } 413 414 /* 415 * If we're following a block-level `It' within a `Bl -column' 416 * context (perhaps opened in the above block or in ptext()), 417 * then open an `It' block context around the parsed macro. 418 */ 419 420 if (n->tok == MDOC_It && n->type == ROFFT_BLOCK && 421 NULL != n->parent && 422 MDOC_Bl == n->parent->tok && 423 LIST_column == n->parent->norm->Bl.type) { 424 mdoc->flags |= MDOC_FREECOL; 425 mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); 426 return 1; 427 } 428 429 /* Normal processing of a macro. */ 430 431 mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 432 433 /* In quick mode (for mandocdb), abort after the NAME section. */ 434 435 if (mdoc->quick && MDOC_Sh == tok && 436 SEC_NAME != mdoc->last->sec) 437 return 2; 438 439 return 1; 440 } 441 442 enum mdelim 443 mdoc_isdelim(const char *p) 444 { 445 446 if ('\0' == p[0]) 447 return DELIM_NONE; 448 449 if ('\0' == p[1]) 450 switch (p[0]) { 451 case '(': 452 case '[': 453 return DELIM_OPEN; 454 case '|': 455 return DELIM_MIDDLE; 456 case '.': 457 case ',': 458 case ';': 459 case ':': 460 case '?': 461 case '!': 462 case ')': 463 case ']': 464 return DELIM_CLOSE; 465 default: 466 return DELIM_NONE; 467 } 468 469 if ('\\' != p[0]) 470 return DELIM_NONE; 471 472 if (0 == strcmp(p + 1, ".")) 473 return DELIM_CLOSE; 474 if (0 == strcmp(p + 1, "fR|\\fP")) 475 return DELIM_MIDDLE; 476 477 return DELIM_NONE; 478 } 479 480 void 481 mdoc_validate(struct roff_man *mdoc) 482 { 483 484 mdoc->last = mdoc->first; 485 mdoc_node_validate(mdoc); 486 mdoc_state_reset(mdoc); 487 } 488