1 /* $Id: man_macro.c,v 1.37 2012/07/07 17:39:05 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2012 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <assert.h> 19 #include <ctype.h> 20 #include <stdlib.h> 21 #include <string.h> 22 23 #include "man.h" 24 #include "mandoc.h" 25 #include "libmandoc.h" 26 #include "libman.h" 27 28 enum rew { 29 REW_REWIND, 30 REW_NOHALT, 31 REW_HALT 32 }; 33 34 static int blk_close(MACRO_PROT_ARGS); 35 static int blk_exp(MACRO_PROT_ARGS); 36 static int blk_imp(MACRO_PROT_ARGS); 37 static int in_line_eoln(MACRO_PROT_ARGS); 38 static int man_args(struct man *, int, 39 int *, char *, char **); 40 41 static int rew_scope(enum man_type, 42 struct man *, enum mant); 43 static enum rew rew_dohalt(enum mant, enum man_type, 44 const struct man_node *); 45 static enum rew rew_block(enum mant, enum man_type, 46 const struct man_node *); 47 static void rew_warn(struct man *, 48 struct man_node *, enum mandocerr); 49 50 const struct man_macro __man_macros[MAN_MAX] = { 51 { in_line_eoln, MAN_NSCOPED }, /* br */ 52 { in_line_eoln, MAN_BSCOPE }, /* TH */ 53 { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SH */ 54 { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SS */ 55 { blk_imp, MAN_BSCOPE | MAN_SCOPED | MAN_FSCOPED }, /* TP */ 56 { blk_imp, MAN_BSCOPE }, /* LP */ 57 { blk_imp, MAN_BSCOPE }, /* PP */ 58 { blk_imp, MAN_BSCOPE }, /* P */ 59 { blk_imp, MAN_BSCOPE }, /* IP */ 60 { blk_imp, MAN_BSCOPE }, /* HP */ 61 { in_line_eoln, MAN_SCOPED }, /* SM */ 62 { in_line_eoln, MAN_SCOPED }, /* SB */ 63 { in_line_eoln, 0 }, /* BI */ 64 { in_line_eoln, 0 }, /* IB */ 65 { in_line_eoln, 0 }, /* BR */ 66 { in_line_eoln, 0 }, /* RB */ 67 { in_line_eoln, MAN_SCOPED }, /* R */ 68 { in_line_eoln, MAN_SCOPED }, /* B */ 69 { in_line_eoln, MAN_SCOPED }, /* I */ 70 { in_line_eoln, 0 }, /* IR */ 71 { in_line_eoln, 0 }, /* RI */ 72 { in_line_eoln, MAN_NSCOPED }, /* na */ 73 { in_line_eoln, MAN_NSCOPED }, /* sp */ 74 { in_line_eoln, MAN_BSCOPE }, /* nf */ 75 { in_line_eoln, MAN_BSCOPE }, /* fi */ 76 { blk_close, 0 }, /* RE */ 77 { blk_exp, MAN_BSCOPE | MAN_EXPLICIT }, /* RS */ 78 { in_line_eoln, 0 }, /* DT */ 79 { in_line_eoln, 0 }, /* UC */ 80 { in_line_eoln, 0 }, /* PD */ 81 { in_line_eoln, 0 }, /* AT */ 82 { in_line_eoln, 0 }, /* in */ 83 { in_line_eoln, 0 }, /* ft */ 84 { in_line_eoln, 0 }, /* OP */ 85 { in_line_eoln, MAN_BSCOPE }, /* EX */ 86 { in_line_eoln, MAN_BSCOPE }, /* EE */ 87 }; 88 89 const struct man_macro * const man_macros = __man_macros; 90 91 92 /* 93 * Warn when "n" is an explicit non-roff macro. 94 */ 95 static void 96 rew_warn(struct man *m, struct man_node *n, enum mandocerr er) 97 { 98 99 if (er == MANDOCERR_MAX || MAN_BLOCK != n->type) 100 return; 101 if (MAN_VALID & n->flags) 102 return; 103 if ( ! (MAN_EXPLICIT & man_macros[n->tok].flags)) 104 return; 105 106 assert(er < MANDOCERR_FATAL); 107 man_nmsg(m, n, er); 108 } 109 110 111 /* 112 * Rewind scope. If a code "er" != MANDOCERR_MAX has been provided, it 113 * will be used if an explicit block scope is being closed out. 114 */ 115 int 116 man_unscope(struct man *m, const struct man_node *to, 117 enum mandocerr er) 118 { 119 struct man_node *n; 120 121 assert(to); 122 123 m->next = MAN_NEXT_SIBLING; 124 125 /* LINTED */ 126 while (m->last != to) { 127 /* 128 * Save the parent here, because we may delete the 129 * m->last node in the post-validation phase and reset 130 * it to m->last->parent, causing a step in the closing 131 * out to be lost. 132 */ 133 n = m->last->parent; 134 rew_warn(m, m->last, er); 135 if ( ! man_valid_post(m)) 136 return(0); 137 m->last = n; 138 assert(m->last); 139 } 140 141 rew_warn(m, m->last, er); 142 if ( ! man_valid_post(m)) 143 return(0); 144 145 return(1); 146 } 147 148 149 static enum rew 150 rew_block(enum mant ntok, enum man_type type, const struct man_node *n) 151 { 152 153 if (MAN_BLOCK == type && ntok == n->parent->tok && 154 MAN_BODY == n->parent->type) 155 return(REW_REWIND); 156 return(ntok == n->tok ? REW_HALT : REW_NOHALT); 157 } 158 159 160 /* 161 * There are three scope levels: scoped to the root (all), scoped to the 162 * section (all less sections), and scoped to subsections (all less 163 * sections and subsections). 164 */ 165 static enum rew 166 rew_dohalt(enum mant tok, enum man_type type, const struct man_node *n) 167 { 168 enum rew c; 169 170 /* We cannot progress beyond the root ever. */ 171 if (MAN_ROOT == n->type) 172 return(REW_HALT); 173 174 assert(n->parent); 175 176 /* Normal nodes shouldn't go to the level of the root. */ 177 if (MAN_ROOT == n->parent->type) 178 return(REW_REWIND); 179 180 /* Already-validated nodes should be closed out. */ 181 if (MAN_VALID & n->flags) 182 return(REW_NOHALT); 183 184 /* First: rewind to ourselves. */ 185 if (type == n->type && tok == n->tok) { 186 if (MAN_EXPLICIT & man_macros[n->tok].flags) 187 return(REW_HALT); 188 else 189 return(REW_REWIND); 190 } 191 192 /* 193 * Next follow the implicit scope-smashings as defined by man.7: 194 * section, sub-section, etc. 195 */ 196 197 switch (tok) { 198 case (MAN_SH): 199 break; 200 case (MAN_SS): 201 /* Rewind to a section, if a block. */ 202 if (REW_NOHALT != (c = rew_block(MAN_SH, type, n))) 203 return(c); 204 break; 205 case (MAN_RS): 206 /* Preserve empty paragraphs before RS. */ 207 if (0 == n->nchild && (MAN_P == n->tok || 208 MAN_PP == n->tok || MAN_LP == n->tok)) 209 return(REW_HALT); 210 /* Rewind to a subsection, if a block. */ 211 if (REW_NOHALT != (c = rew_block(MAN_SS, type, n))) 212 return(c); 213 /* Rewind to a section, if a block. */ 214 if (REW_NOHALT != (c = rew_block(MAN_SH, type, n))) 215 return(c); 216 break; 217 default: 218 /* Rewind to an offsetter, if a block. */ 219 if (REW_NOHALT != (c = rew_block(MAN_RS, type, n))) 220 return(c); 221 /* Rewind to a subsection, if a block. */ 222 if (REW_NOHALT != (c = rew_block(MAN_SS, type, n))) 223 return(c); 224 /* Rewind to a section, if a block. */ 225 if (REW_NOHALT != (c = rew_block(MAN_SH, type, n))) 226 return(c); 227 break; 228 } 229 230 return(REW_NOHALT); 231 } 232 233 234 /* 235 * Rewinding entails ascending the parse tree until a coherent point, 236 * for example, the `SH' macro will close out any intervening `SS' 237 * scopes. When a scope is closed, it must be validated and actioned. 238 */ 239 static int 240 rew_scope(enum man_type type, struct man *m, enum mant tok) 241 { 242 struct man_node *n; 243 enum rew c; 244 245 /* LINTED */ 246 for (n = m->last; n; n = n->parent) { 247 /* 248 * Whether we should stop immediately (REW_HALT), stop 249 * and rewind until this point (REW_REWIND), or keep 250 * rewinding (REW_NOHALT). 251 */ 252 c = rew_dohalt(tok, type, n); 253 if (REW_HALT == c) 254 return(1); 255 if (REW_REWIND == c) 256 break; 257 } 258 259 /* 260 * Rewind until the current point. Warn if we're a roff 261 * instruction that's mowing over explicit scopes. 262 */ 263 assert(n); 264 265 return(man_unscope(m, n, MANDOCERR_MAX)); 266 } 267 268 269 /* 270 * Close out a generic explicit macro. 271 */ 272 /* ARGSUSED */ 273 int 274 blk_close(MACRO_PROT_ARGS) 275 { 276 enum mant ntok; 277 const struct man_node *nn; 278 279 switch (tok) { 280 case (MAN_RE): 281 ntok = MAN_RS; 282 break; 283 default: 284 abort(); 285 /* NOTREACHED */ 286 } 287 288 for (nn = m->last->parent; nn; nn = nn->parent) 289 if (ntok == nn->tok && MAN_BLOCK == nn->type) 290 break; 291 292 if (NULL != nn) 293 man_unscope(m, nn, MANDOCERR_MAX); 294 else 295 man_pmsg(m, line, ppos, MANDOCERR_NOSCOPE); 296 297 return(1); 298 } 299 300 301 /* ARGSUSED */ 302 int 303 blk_exp(MACRO_PROT_ARGS) 304 { 305 struct man_node *n; 306 int la; 307 char *p; 308 309 /* Close out prior implicit scopes. */ 310 311 if ( ! rew_scope(MAN_BLOCK, m, tok)) 312 return(0); 313 314 if ( ! man_block_alloc(m, line, ppos, tok)) 315 return(0); 316 if ( ! man_head_alloc(m, line, ppos, tok)) 317 return(0); 318 319 for (;;) { 320 la = *pos; 321 if ( ! man_args(m, line, pos, buf, &p)) 322 break; 323 if ( ! man_word_alloc(m, line, la, p)) 324 return(0); 325 } 326 327 assert(m); 328 assert(tok != MAN_MAX); 329 330 for (n = m->last; n; n = n->parent) { 331 if (n->tok != tok) 332 continue; 333 assert(MAN_HEAD == n->type); 334 man_unscope(m, n, MANDOCERR_MAX); 335 break; 336 } 337 338 return(man_body_alloc(m, line, ppos, tok)); 339 } 340 341 342 343 /* 344 * Parse an implicit-block macro. These contain a MAN_HEAD and a 345 * MAN_BODY contained within a MAN_BLOCK. Rules for closing out other 346 * scopes, such as `SH' closing out an `SS', are defined in the rew 347 * routines. 348 */ 349 /* ARGSUSED */ 350 int 351 blk_imp(MACRO_PROT_ARGS) 352 { 353 int la; 354 char *p; 355 struct man_node *n; 356 357 /* Close out prior scopes. */ 358 359 if ( ! rew_scope(MAN_BODY, m, tok)) 360 return(0); 361 if ( ! rew_scope(MAN_BLOCK, m, tok)) 362 return(0); 363 364 /* Allocate new block & head scope. */ 365 366 if ( ! man_block_alloc(m, line, ppos, tok)) 367 return(0); 368 if ( ! man_head_alloc(m, line, ppos, tok)) 369 return(0); 370 371 n = m->last; 372 373 /* Add line arguments. */ 374 375 for (;;) { 376 la = *pos; 377 if ( ! man_args(m, line, pos, buf, &p)) 378 break; 379 if ( ! man_word_alloc(m, line, la, p)) 380 return(0); 381 } 382 383 /* Close out head and open body (unless MAN_SCOPE). */ 384 385 if (MAN_SCOPED & man_macros[tok].flags) { 386 /* If we're forcing scope (`TP'), keep it open. */ 387 if (MAN_FSCOPED & man_macros[tok].flags) { 388 m->flags |= MAN_BLINE; 389 return(1); 390 } else if (n == m->last) { 391 m->flags |= MAN_BLINE; 392 return(1); 393 } 394 } 395 396 if ( ! rew_scope(MAN_HEAD, m, tok)) 397 return(0); 398 return(man_body_alloc(m, line, ppos, tok)); 399 } 400 401 402 /* ARGSUSED */ 403 int 404 in_line_eoln(MACRO_PROT_ARGS) 405 { 406 int la; 407 char *p; 408 struct man_node *n; 409 410 if ( ! man_elem_alloc(m, line, ppos, tok)) 411 return(0); 412 413 n = m->last; 414 415 for (;;) { 416 la = *pos; 417 if ( ! man_args(m, line, pos, buf, &p)) 418 break; 419 if ( ! man_word_alloc(m, line, la, p)) 420 return(0); 421 } 422 423 /* 424 * If no arguments are specified and this is MAN_SCOPED (i.e., 425 * next-line scoped), then set our mode to indicate that we're 426 * waiting for terms to load into our context. 427 */ 428 429 if (n == m->last && MAN_SCOPED & man_macros[tok].flags) { 430 assert( ! (MAN_NSCOPED & man_macros[tok].flags)); 431 m->flags |= MAN_ELINE; 432 return(1); 433 } 434 435 /* Set ignorable context, if applicable. */ 436 437 if (MAN_NSCOPED & man_macros[tok].flags) { 438 assert( ! (MAN_SCOPED & man_macros[tok].flags)); 439 m->flags |= MAN_ILINE; 440 } 441 442 assert(MAN_ROOT != m->last->type); 443 m->next = MAN_NEXT_SIBLING; 444 445 /* 446 * Rewind our element scope. Note that when TH is pruned, we'll 447 * be back at the root, so make sure that we don't clobber as 448 * its sibling. 449 */ 450 451 for ( ; m->last; m->last = m->last->parent) { 452 if (m->last == n) 453 break; 454 if (m->last->type == MAN_ROOT) 455 break; 456 if ( ! man_valid_post(m)) 457 return(0); 458 } 459 460 assert(m->last); 461 462 /* 463 * Same here regarding whether we're back at the root. 464 */ 465 466 if (m->last->type != MAN_ROOT && ! man_valid_post(m)) 467 return(0); 468 469 return(1); 470 } 471 472 473 int 474 man_macroend(struct man *m) 475 { 476 477 return(man_unscope(m, m->first, MANDOCERR_SCOPEEXIT)); 478 } 479 480 static int 481 man_args(struct man *m, int line, int *pos, char *buf, char **v) 482 { 483 char *start; 484 485 assert(*pos); 486 *v = start = buf + *pos; 487 assert(' ' != *start); 488 489 if ('\0' == *start) 490 return(0); 491 492 *v = mandoc_getarg(m->parse, v, line, pos); 493 return(1); 494 } 495