1 /* $OpenBSD: mdoc.c,v 1.163 2018/12/31 07:45:42 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2018 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <stdarg.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <time.h> 27 28 #include "mandoc_aux.h" 29 #include "mandoc.h" 30 #include "roff.h" 31 #include "mdoc.h" 32 #include "libmandoc.h" 33 #include "roff_int.h" 34 #include "libmdoc.h" 35 36 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 37 "split", "nosplit", "ragged", 38 "unfilled", "literal", "file", 39 "offset", "bullet", "dash", 40 "hyphen", "item", "enum", 41 "tag", "diag", "hang", 42 "ohang", "inset", "column", 43 "width", "compact", "std", 44 "filled", "words", "emphasis", 45 "symbolic", "nested", "centered" 46 }; 47 const char * const *mdoc_argnames = __mdoc_argnames; 48 49 static int mdoc_ptext(struct roff_man *, int, char *, int); 50 static int mdoc_pmacro(struct roff_man *, int, char *, int); 51 52 53 /* 54 * Main parse routine. Parses a single line -- really just hands off to 55 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 56 */ 57 int 58 mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs) 59 { 60 61 if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line) 62 mdoc->flags |= MDOC_NEWLINE; 63 64 /* 65 * Let the roff nS register switch SYNOPSIS mode early, 66 * such that the parser knows at all times 67 * whether this mode is on or off. 68 * Note that this mode is also switched by the Sh macro. 69 */ 70 if (roff_getreg(mdoc->roff, "nS")) 71 mdoc->flags |= MDOC_SYNOPSIS; 72 else 73 mdoc->flags &= ~MDOC_SYNOPSIS; 74 75 return roff_getcontrol(mdoc->roff, buf, &offs) ? 76 mdoc_pmacro(mdoc, ln, buf, offs) : 77 mdoc_ptext(mdoc, ln, buf, offs); 78 } 79 80 void 81 mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok) 82 { 83 struct roff_node *p; 84 85 p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok); 86 roff_node_append(mdoc, p); 87 mdoc->next = ROFF_NEXT_CHILD; 88 } 89 90 struct roff_node * 91 mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, 92 enum roff_tok tok, struct roff_node *body) 93 { 94 struct roff_node *p; 95 96 body->flags |= NODE_ENDED; 97 body->parent->flags |= NODE_ENDED; 98 p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok); 99 p->body = body; 100 p->norm = body->norm; 101 p->end = ENDBODY_SPACE; 102 roff_node_append(mdoc, p); 103 mdoc->next = ROFF_NEXT_SIBLING; 104 return p; 105 } 106 107 struct roff_node * 108 mdoc_block_alloc(struct roff_man *mdoc, int line, int pos, 109 enum roff_tok tok, struct mdoc_arg *args) 110 { 111 struct roff_node *p; 112 113 p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok); 114 p->args = args; 115 if (p->args) 116 (args->refcnt)++; 117 118 switch (tok) { 119 case MDOC_Bd: 120 case MDOC_Bf: 121 case MDOC_Bl: 122 case MDOC_En: 123 case MDOC_Rs: 124 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 125 break; 126 default: 127 break; 128 } 129 roff_node_append(mdoc, p); 130 mdoc->next = ROFF_NEXT_CHILD; 131 return p; 132 } 133 134 void 135 mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos, 136 enum roff_tok tok, struct mdoc_arg *args) 137 { 138 struct roff_node *p; 139 140 p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok); 141 p->args = args; 142 if (p->args) 143 (args->refcnt)++; 144 145 switch (tok) { 146 case MDOC_An: 147 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 148 break; 149 default: 150 break; 151 } 152 roff_node_append(mdoc, p); 153 mdoc->next = ROFF_NEXT_CHILD; 154 } 155 156 /* 157 * Parse free-form text, that is, a line that does not begin with the 158 * control character. 159 */ 160 static int 161 mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs) 162 { 163 struct roff_node *n; 164 const char *cp, *sp; 165 char *c, *ws, *end; 166 167 n = mdoc->last; 168 169 /* 170 * If a column list contains plain text, assume an implicit item 171 * macro. This can happen one or more times at the beginning 172 * of such a list, intermixed with non-It mdoc macros and with 173 * nodes generated on the roff level, for example by tbl. 174 */ 175 176 if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY && 177 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) || 178 (n->parent != NULL && n->parent->tok == MDOC_Bl && 179 n->parent->norm->Bl.type == LIST_column)) { 180 mdoc->flags |= MDOC_FREECOL; 181 (*mdoc_macro(MDOC_It)->fp)(mdoc, MDOC_It, 182 line, offs, &offs, buf); 183 return 1; 184 } 185 186 /* 187 * Search for the beginning of unescaped trailing whitespace (ws) 188 * and for the first character not to be output (end). 189 */ 190 191 /* FIXME: replace with strcspn(). */ 192 ws = NULL; 193 for (c = end = buf + offs; *c; c++) { 194 switch (*c) { 195 case ' ': 196 if (NULL == ws) 197 ws = c; 198 continue; 199 case '\t': 200 /* 201 * Always warn about trailing tabs, 202 * even outside literal context, 203 * where they should be put on the next line. 204 */ 205 if (NULL == ws) 206 ws = c; 207 /* 208 * Strip trailing tabs in literal context only; 209 * outside, they affect the next line. 210 */ 211 if (mdoc->flags & ROFF_NOFILL) 212 continue; 213 break; 214 case '\\': 215 /* Skip the escaped character, too, if any. */ 216 if (c[1]) 217 c++; 218 /* FALLTHROUGH */ 219 default: 220 ws = NULL; 221 break; 222 } 223 end = c + 1; 224 } 225 *end = '\0'; 226 227 if (ws) 228 mandoc_msg(MANDOCERR_SPACE_EOL, line, (int)(ws - buf), NULL); 229 230 /* 231 * Blank lines are allowed in no-fill mode 232 * and cancel preceding \c, 233 * but add a single vertical space elsewhere. 234 */ 235 236 if (buf[offs] == '\0' && (mdoc->flags & ROFF_NOFILL) == 0) { 237 switch (mdoc->last->type) { 238 case ROFFT_TEXT: 239 sp = mdoc->last->string; 240 cp = end = strchr(sp, '\0') - 2; 241 if (cp < sp || cp[0] != '\\' || cp[1] != 'c') 242 break; 243 while (cp > sp && cp[-1] == '\\') 244 cp--; 245 if ((end - cp) % 2) 246 break; 247 *end = '\0'; 248 return 1; 249 default: 250 break; 251 } 252 mandoc_msg(MANDOCERR_FI_BLANK, line, (int)(c - buf), NULL); 253 roff_elem_alloc(mdoc, line, offs, ROFF_sp); 254 mdoc->last->flags |= NODE_VALID | NODE_ENDED; 255 mdoc->next = ROFF_NEXT_SIBLING; 256 return 1; 257 } 258 259 roff_word_alloc(mdoc, line, offs, buf+offs); 260 261 if (mdoc->flags & ROFF_NOFILL) 262 return 1; 263 264 /* 265 * End-of-sentence check. If the last character is an unescaped 266 * EOS character, then flag the node as being the end of a 267 * sentence. The front-end will know how to interpret this. 268 */ 269 270 assert(buf < end); 271 272 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) 273 mdoc->last->flags |= NODE_EOS; 274 275 for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) { 276 if (c - buf < offs + 2) 277 continue; 278 if (end - c < 3) 279 break; 280 if (c[1] != ' ' || 281 isalnum((unsigned char)c[-2]) == 0 || 282 isalnum((unsigned char)c[-1]) == 0 || 283 (c[-2] == 'n' && c[-1] == 'c') || 284 (c[-2] == 'v' && c[-1] == 's')) 285 continue; 286 c += 2; 287 if (*c == ' ') 288 c++; 289 if (*c == ' ') 290 c++; 291 if (isupper((unsigned char)(*c))) 292 mandoc_msg(MANDOCERR_EOS, line, (int)(c - buf), NULL); 293 } 294 295 return 1; 296 } 297 298 /* 299 * Parse a macro line, that is, a line beginning with the control 300 * character. 301 */ 302 static int 303 mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs) 304 { 305 struct roff_node *n; 306 const char *cp; 307 size_t sz; 308 enum roff_tok tok; 309 int sv; 310 311 /* Determine the line macro. */ 312 313 sv = offs; 314 tok = TOKEN_NONE; 315 for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++) 316 offs++; 317 if (sz == 2 || sz == 3) 318 tok = roffhash_find(mdoc->mdocmac, buf + sv, sz); 319 if (tok == TOKEN_NONE) { 320 mandoc_msg(MANDOCERR_MACRO, ln, sv, "%s", buf + sv - 1); 321 return 1; 322 } 323 324 /* Skip a leading escape sequence or tab. */ 325 326 switch (buf[offs]) { 327 case '\\': 328 cp = buf + offs + 1; 329 mandoc_escape(&cp, NULL, NULL); 330 offs = cp - buf; 331 break; 332 case '\t': 333 offs++; 334 break; 335 default: 336 break; 337 } 338 339 /* Jump to the next non-whitespace word. */ 340 341 while (buf[offs] == ' ') 342 offs++; 343 344 /* 345 * Trailing whitespace. Note that tabs are allowed to be passed 346 * into the parser as "text", so we only warn about spaces here. 347 */ 348 349 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 350 mandoc_msg(MANDOCERR_SPACE_EOL, ln, offs - 1, NULL); 351 352 /* 353 * If an initial macro or a list invocation, divert directly 354 * into macro processing. 355 */ 356 357 n = mdoc->last; 358 if (n == NULL || tok == MDOC_It || tok == MDOC_El) { 359 (*mdoc_macro(tok)->fp)(mdoc, tok, ln, sv, &offs, buf); 360 return 1; 361 } 362 363 /* 364 * If a column list contains a non-It macro, assume an implicit 365 * item macro. This can happen one or more times at the 366 * beginning of such a list, intermixed with text lines and 367 * with nodes generated on the roff level, for example by tbl. 368 */ 369 370 if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY && 371 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) || 372 (n->parent != NULL && n->parent->tok == MDOC_Bl && 373 n->parent->norm->Bl.type == LIST_column)) { 374 mdoc->flags |= MDOC_FREECOL; 375 (*mdoc_macro(MDOC_It)->fp)(mdoc, MDOC_It, ln, sv, &sv, buf); 376 return 1; 377 } 378 379 /* Normal processing of a macro. */ 380 381 (*mdoc_macro(tok)->fp)(mdoc, tok, ln, sv, &offs, buf); 382 383 /* In quick mode (for mandocdb), abort after the NAME section. */ 384 385 if (mdoc->quick && MDOC_Sh == tok && 386 SEC_NAME != mdoc->last->sec) 387 return 2; 388 389 return 1; 390 } 391 392 enum mdelim 393 mdoc_isdelim(const char *p) 394 { 395 396 if ('\0' == p[0]) 397 return DELIM_NONE; 398 399 if ('\0' == p[1]) 400 switch (p[0]) { 401 case '(': 402 case '[': 403 return DELIM_OPEN; 404 case '|': 405 return DELIM_MIDDLE; 406 case '.': 407 case ',': 408 case ';': 409 case ':': 410 case '?': 411 case '!': 412 case ')': 413 case ']': 414 return DELIM_CLOSE; 415 default: 416 return DELIM_NONE; 417 } 418 419 if ('\\' != p[0]) 420 return DELIM_NONE; 421 422 if (0 == strcmp(p + 1, ".")) 423 return DELIM_CLOSE; 424 if (0 == strcmp(p + 1, "fR|\\fP")) 425 return DELIM_MIDDLE; 426 427 return DELIM_NONE; 428 } 429