1 /* $OpenBSD: mdoc.c,v 1.158 2018/08/17 20:31:52 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2018 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <stdarg.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <time.h> 27 28 #include "mandoc_aux.h" 29 #include "mandoc.h" 30 #include "roff.h" 31 #include "mdoc.h" 32 #include "libmandoc.h" 33 #include "roff_int.h" 34 #include "libmdoc.h" 35 36 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 37 "split", "nosplit", "ragged", 38 "unfilled", "literal", "file", 39 "offset", "bullet", "dash", 40 "hyphen", "item", "enum", 41 "tag", "diag", "hang", 42 "ohang", "inset", "column", 43 "width", "compact", "std", 44 "filled", "words", "emphasis", 45 "symbolic", "nested", "centered" 46 }; 47 const char * const *mdoc_argnames = __mdoc_argnames; 48 49 static int mdoc_ptext(struct roff_man *, int, char *, int); 50 static int mdoc_pmacro(struct roff_man *, int, char *, int); 51 52 53 /* 54 * Main parse routine. Parses a single line -- really just hands off to 55 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 56 */ 57 int 58 mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs) 59 { 60 61 if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line) 62 mdoc->flags |= MDOC_NEWLINE; 63 64 /* 65 * Let the roff nS register switch SYNOPSIS mode early, 66 * such that the parser knows at all times 67 * whether this mode is on or off. 68 * Note that this mode is also switched by the Sh macro. 69 */ 70 if (roff_getreg(mdoc->roff, "nS")) 71 mdoc->flags |= MDOC_SYNOPSIS; 72 else 73 mdoc->flags &= ~MDOC_SYNOPSIS; 74 75 return roff_getcontrol(mdoc->roff, buf, &offs) ? 76 mdoc_pmacro(mdoc, ln, buf, offs) : 77 mdoc_ptext(mdoc, ln, buf, offs); 78 } 79 80 void 81 mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok) 82 { 83 struct roff_node *p; 84 85 p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok); 86 roff_node_append(mdoc, p); 87 mdoc->next = ROFF_NEXT_CHILD; 88 } 89 90 struct roff_node * 91 mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, 92 enum roff_tok tok, struct roff_node *body) 93 { 94 struct roff_node *p; 95 96 body->flags |= NODE_ENDED; 97 body->parent->flags |= NODE_ENDED; 98 p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok); 99 p->body = body; 100 p->norm = body->norm; 101 p->end = ENDBODY_SPACE; 102 roff_node_append(mdoc, p); 103 mdoc->next = ROFF_NEXT_SIBLING; 104 return p; 105 } 106 107 struct roff_node * 108 mdoc_block_alloc(struct roff_man *mdoc, int line, int pos, 109 enum roff_tok tok, struct mdoc_arg *args) 110 { 111 struct roff_node *p; 112 113 p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok); 114 p->args = args; 115 if (p->args) 116 (args->refcnt)++; 117 118 switch (tok) { 119 case MDOC_Bd: 120 case MDOC_Bf: 121 case MDOC_Bl: 122 case MDOC_En: 123 case MDOC_Rs: 124 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 125 break; 126 default: 127 break; 128 } 129 roff_node_append(mdoc, p); 130 mdoc->next = ROFF_NEXT_CHILD; 131 return p; 132 } 133 134 void 135 mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos, 136 enum roff_tok tok, struct mdoc_arg *args) 137 { 138 struct roff_node *p; 139 140 p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok); 141 p->args = args; 142 if (p->args) 143 (args->refcnt)++; 144 145 switch (tok) { 146 case MDOC_An: 147 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 148 break; 149 default: 150 break; 151 } 152 roff_node_append(mdoc, p); 153 mdoc->next = ROFF_NEXT_CHILD; 154 } 155 156 void 157 mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p) 158 { 159 160 roff_node_unlink(mdoc, p); 161 p->prev = p->next = NULL; 162 roff_node_append(mdoc, p); 163 } 164 165 /* 166 * Parse free-form text, that is, a line that does not begin with the 167 * control character. 168 */ 169 static int 170 mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs) 171 { 172 struct roff_node *n; 173 const char *cp, *sp; 174 char *c, *ws, *end; 175 176 n = mdoc->last; 177 178 /* 179 * If a column list contains plain text, assume an implicit item 180 * macro. This can happen one or more times at the beginning 181 * of such a list, intermixed with non-It mdoc macros and with 182 * nodes generated on the roff level, for example by tbl. 183 */ 184 185 if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY && 186 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) || 187 (n->parent != NULL && n->parent->tok == MDOC_Bl && 188 n->parent->norm->Bl.type == LIST_column)) { 189 mdoc->flags |= MDOC_FREECOL; 190 (*mdoc_macro(MDOC_It)->fp)(mdoc, MDOC_It, 191 line, offs, &offs, buf); 192 return 1; 193 } 194 195 /* 196 * Search for the beginning of unescaped trailing whitespace (ws) 197 * and for the first character not to be output (end). 198 */ 199 200 /* FIXME: replace with strcspn(). */ 201 ws = NULL; 202 for (c = end = buf + offs; *c; c++) { 203 switch (*c) { 204 case ' ': 205 if (NULL == ws) 206 ws = c; 207 continue; 208 case '\t': 209 /* 210 * Always warn about trailing tabs, 211 * even outside literal context, 212 * where they should be put on the next line. 213 */ 214 if (NULL == ws) 215 ws = c; 216 /* 217 * Strip trailing tabs in literal context only; 218 * outside, they affect the next line. 219 */ 220 if (MDOC_LITERAL & mdoc->flags) 221 continue; 222 break; 223 case '\\': 224 /* Skip the escaped character, too, if any. */ 225 if (c[1]) 226 c++; 227 /* FALLTHROUGH */ 228 default: 229 ws = NULL; 230 break; 231 } 232 end = c + 1; 233 } 234 *end = '\0'; 235 236 if (ws) 237 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 238 line, (int)(ws-buf), NULL); 239 240 /* 241 * Blank lines are allowed in no-fill mode 242 * and cancel preceding \c, 243 * but add a single vertical space elsewhere. 244 */ 245 246 if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) { 247 switch (mdoc->last->type) { 248 case ROFFT_TEXT: 249 sp = mdoc->last->string; 250 cp = end = strchr(sp, '\0') - 2; 251 if (cp < sp || cp[0] != '\\' || cp[1] != 'c') 252 break; 253 while (cp > sp && cp[-1] == '\\') 254 cp--; 255 if ((end - cp) % 2) 256 break; 257 *end = '\0'; 258 return 1; 259 default: 260 break; 261 } 262 mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse, 263 line, (int)(c - buf), NULL); 264 roff_elem_alloc(mdoc, line, offs, ROFF_sp); 265 mdoc->last->flags |= NODE_VALID | NODE_ENDED; 266 mdoc->next = ROFF_NEXT_SIBLING; 267 return 1; 268 } 269 270 roff_word_alloc(mdoc, line, offs, buf+offs); 271 272 if (mdoc->flags & MDOC_LITERAL) 273 return 1; 274 275 /* 276 * End-of-sentence check. If the last character is an unescaped 277 * EOS character, then flag the node as being the end of a 278 * sentence. The front-end will know how to interpret this. 279 */ 280 281 assert(buf < end); 282 283 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) 284 mdoc->last->flags |= NODE_EOS; 285 286 for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) { 287 if (c - buf < offs + 2) 288 continue; 289 if (end - c < 3) 290 break; 291 if (c[1] != ' ' || 292 isalnum((unsigned char)c[-2]) == 0 || 293 isalnum((unsigned char)c[-1]) == 0 || 294 (c[-2] == 'n' && c[-1] == 'c') || 295 (c[-2] == 'v' && c[-1] == 's')) 296 continue; 297 c += 2; 298 if (*c == ' ') 299 c++; 300 if (*c == ' ') 301 c++; 302 if (isupper((unsigned char)(*c))) 303 mandoc_msg(MANDOCERR_EOS, mdoc->parse, 304 line, (int)(c - buf), NULL); 305 } 306 307 return 1; 308 } 309 310 /* 311 * Parse a macro line, that is, a line beginning with the control 312 * character. 313 */ 314 static int 315 mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs) 316 { 317 struct roff_node *n; 318 const char *cp; 319 size_t sz; 320 enum roff_tok tok; 321 int sv; 322 323 /* Determine the line macro. */ 324 325 sv = offs; 326 tok = TOKEN_NONE; 327 for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++) 328 offs++; 329 if (sz == 2 || sz == 3) 330 tok = roffhash_find(mdoc->mdocmac, buf + sv, sz); 331 if (tok == TOKEN_NONE) { 332 mandoc_msg(MANDOCERR_MACRO, mdoc->parse, 333 ln, sv, buf + sv - 1); 334 return 1; 335 } 336 337 /* Skip a leading escape sequence or tab. */ 338 339 switch (buf[offs]) { 340 case '\\': 341 cp = buf + offs + 1; 342 mandoc_escape(&cp, NULL, NULL); 343 offs = cp - buf; 344 break; 345 case '\t': 346 offs++; 347 break; 348 default: 349 break; 350 } 351 352 /* Jump to the next non-whitespace word. */ 353 354 while (buf[offs] == ' ') 355 offs++; 356 357 /* 358 * Trailing whitespace. Note that tabs are allowed to be passed 359 * into the parser as "text", so we only warn about spaces here. 360 */ 361 362 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 363 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 364 ln, offs - 1, NULL); 365 366 /* 367 * If an initial macro or a list invocation, divert directly 368 * into macro processing. 369 */ 370 371 n = mdoc->last; 372 if (n == NULL || tok == MDOC_It || tok == MDOC_El) { 373 (*mdoc_macro(tok)->fp)(mdoc, tok, ln, sv, &offs, buf); 374 return 1; 375 } 376 377 /* 378 * If a column list contains a non-It macro, assume an implicit 379 * item macro. This can happen one or more times at the 380 * beginning of such a list, intermixed with text lines and 381 * with nodes generated on the roff level, for example by tbl. 382 */ 383 384 if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY && 385 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) || 386 (n->parent != NULL && n->parent->tok == MDOC_Bl && 387 n->parent->norm->Bl.type == LIST_column)) { 388 mdoc->flags |= MDOC_FREECOL; 389 (*mdoc_macro(MDOC_It)->fp)(mdoc, MDOC_It, ln, sv, &sv, buf); 390 return 1; 391 } 392 393 /* Normal processing of a macro. */ 394 395 (*mdoc_macro(tok)->fp)(mdoc, tok, ln, sv, &offs, buf); 396 397 /* In quick mode (for mandocdb), abort after the NAME section. */ 398 399 if (mdoc->quick && MDOC_Sh == tok && 400 SEC_NAME != mdoc->last->sec) 401 return 2; 402 403 return 1; 404 } 405 406 enum mdelim 407 mdoc_isdelim(const char *p) 408 { 409 410 if ('\0' == p[0]) 411 return DELIM_NONE; 412 413 if ('\0' == p[1]) 414 switch (p[0]) { 415 case '(': 416 case '[': 417 return DELIM_OPEN; 418 case '|': 419 return DELIM_MIDDLE; 420 case '.': 421 case ',': 422 case ';': 423 case ':': 424 case '?': 425 case '!': 426 case ')': 427 case ']': 428 return DELIM_CLOSE; 429 default: 430 return DELIM_NONE; 431 } 432 433 if ('\\' != p[0]) 434 return DELIM_NONE; 435 436 if (0 == strcmp(p + 1, ".")) 437 return DELIM_CLOSE; 438 if (0 == strcmp(p + 1, "fR|\\fP")) 439 return DELIM_MIDDLE; 440 441 return DELIM_NONE; 442 } 443 444 void 445 mdoc_validate(struct roff_man *mdoc) 446 { 447 448 mdoc->last = mdoc->first; 449 mdoc_node_validate(mdoc); 450 mdoc_state_reset(mdoc); 451 } 452