1 /* $Id: mdoc_macro.c,v 1.99 2010/12/15 23:39:40 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <stdlib.h> 25 #include <stdio.h> 26 #include <string.h> 27 #include <time.h> 28 29 #include "mandoc.h" 30 #include "libmdoc.h" 31 #include "libmandoc.h" 32 33 enum rew { /* see rew_dohalt() */ 34 REWIND_NONE, 35 REWIND_THIS, 36 REWIND_MORE, 37 REWIND_FORCE, 38 REWIND_LATER, 39 REWIND_ERROR 40 }; 41 42 static int blk_full(MACRO_PROT_ARGS); 43 static int blk_exp_close(MACRO_PROT_ARGS); 44 static int blk_part_exp(MACRO_PROT_ARGS); 45 static int blk_part_imp(MACRO_PROT_ARGS); 46 static int ctx_synopsis(MACRO_PROT_ARGS); 47 static int in_line_eoln(MACRO_PROT_ARGS); 48 static int in_line_argn(MACRO_PROT_ARGS); 49 static int in_line(MACRO_PROT_ARGS); 50 static int obsolete(MACRO_PROT_ARGS); 51 static int phrase_ta(MACRO_PROT_ARGS); 52 53 static int append_delims(struct mdoc *, 54 int, int *, char *); 55 static enum mdoct lookup(enum mdoct, const char *); 56 static enum mdoct lookup_raw(const char *); 57 static int make_pending(struct mdoc_node *, enum mdoct, 58 struct mdoc *, int, int); 59 static int phrase(struct mdoc *, int, int, char *); 60 static enum mdoct rew_alt(enum mdoct); 61 static enum rew rew_dohalt(enum mdoct, enum mdoc_type, 62 const struct mdoc_node *); 63 static int rew_elem(struct mdoc *, enum mdoct); 64 static int rew_last(struct mdoc *, 65 const struct mdoc_node *); 66 static int rew_sub(enum mdoc_type, struct mdoc *, 67 enum mdoct, int, int); 68 69 const struct mdoc_macro __mdoc_macros[MDOC_MAX] = { 70 { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Ap */ 71 { in_line_eoln, MDOC_PROLOGUE }, /* Dd */ 72 { in_line_eoln, MDOC_PROLOGUE }, /* Dt */ 73 { in_line_eoln, MDOC_PROLOGUE }, /* Os */ 74 { blk_full, 0 }, /* Sh */ 75 { blk_full, 0 }, /* Ss */ 76 { in_line_eoln, 0 }, /* Pp */ 77 { blk_part_imp, MDOC_PARSED }, /* D1 */ 78 { blk_part_imp, MDOC_PARSED }, /* Dl */ 79 { blk_full, MDOC_EXPLICIT }, /* Bd */ 80 { blk_exp_close, MDOC_EXPLICIT }, /* Ed */ 81 { blk_full, MDOC_EXPLICIT }, /* Bl */ 82 { blk_exp_close, MDOC_EXPLICIT }, /* El */ 83 { blk_full, MDOC_PARSED }, /* It */ 84 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ad */ 85 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* An */ 86 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ar */ 87 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Cd */ 88 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Cm */ 89 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Dv */ 90 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Er */ 91 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ev */ 92 { in_line_eoln, 0 }, /* Ex */ 93 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fa */ 94 { in_line_eoln, 0 }, /* Fd */ 95 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fl */ 96 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fn */ 97 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ft */ 98 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ic */ 99 { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* In */ 100 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Li */ 101 { blk_full, 0 }, /* Nd */ 102 { ctx_synopsis, MDOC_CALLABLE | MDOC_PARSED }, /* Nm */ 103 { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Op */ 104 { obsolete, 0 }, /* Ot */ 105 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Pa */ 106 { in_line_eoln, 0 }, /* Rv */ 107 { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* St */ 108 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Va */ 109 { ctx_synopsis, MDOC_CALLABLE | MDOC_PARSED }, /* Vt */ 110 { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Xr */ 111 { in_line_eoln, 0 }, /* %A */ 112 { in_line_eoln, 0 }, /* %B */ 113 { in_line_eoln, 0 }, /* %D */ 114 { in_line_eoln, 0 }, /* %I */ 115 { in_line_eoln, 0 }, /* %J */ 116 { in_line_eoln, 0 }, /* %N */ 117 { in_line_eoln, 0 }, /* %O */ 118 { in_line_eoln, 0 }, /* %P */ 119 { in_line_eoln, 0 }, /* %R */ 120 { in_line_eoln, 0 }, /* %T */ 121 { in_line_eoln, 0 }, /* %V */ 122 { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Ac */ 123 { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Ao */ 124 { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Aq */ 125 { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* At */ 126 { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Bc */ 127 { blk_full, MDOC_EXPLICIT }, /* Bf */ 128 { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Bo */ 129 { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Bq */ 130 { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Bsx */ 131 { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Bx */ 132 { in_line_eoln, 0 }, /* Db */ 133 { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Dc */ 134 { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Do */ 135 { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Dq */ 136 { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Ec */ 137 { blk_exp_close, MDOC_EXPLICIT }, /* Ef */ 138 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Em */ 139 { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Eo */ 140 { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Fx */ 141 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ms */ 142 { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | MDOC_IGNDELIM }, /* No */ 143 { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | MDOC_IGNDELIM }, /* Ns */ 144 { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Nx */ 145 { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Ox */ 146 { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Pc */ 147 { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | MDOC_IGNDELIM }, /* Pf */ 148 { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Po */ 149 { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Pq */ 150 { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Qc */ 151 { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Ql */ 152 { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Qo */ 153 { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Qq */ 154 { blk_exp_close, MDOC_EXPLICIT }, /* Re */ 155 { blk_full, MDOC_EXPLICIT }, /* Rs */ 156 { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Sc */ 157 { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* So */ 158 { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Sq */ 159 { in_line_eoln, 0 }, /* Sm */ 160 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Sx */ 161 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Sy */ 162 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Tn */ 163 { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Ux */ 164 { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Xc */ 165 { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Xo */ 166 { blk_full, MDOC_EXPLICIT | MDOC_CALLABLE }, /* Fo */ 167 { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Fc */ 168 { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Oo */ 169 { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Oc */ 170 { blk_full, MDOC_EXPLICIT }, /* Bk */ 171 { blk_exp_close, MDOC_EXPLICIT }, /* Ek */ 172 { in_line_eoln, 0 }, /* Bt */ 173 { in_line_eoln, 0 }, /* Hf */ 174 { obsolete, 0 }, /* Fr */ 175 { in_line_eoln, 0 }, /* Ud */ 176 { in_line, 0 }, /* Lb */ 177 { in_line_eoln, 0 }, /* Lp */ 178 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Lk */ 179 { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Mt */ 180 { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Brq */ 181 { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Bro */ 182 { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Brc */ 183 { in_line_eoln, 0 }, /* %C */ 184 { obsolete, 0 }, /* Es */ 185 { obsolete, 0 }, /* En */ 186 { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Dx */ 187 { in_line_eoln, 0 }, /* %Q */ 188 { in_line_eoln, 0 }, /* br */ 189 { in_line_eoln, 0 }, /* sp */ 190 { in_line_eoln, 0 }, /* %U */ 191 { phrase_ta, MDOC_CALLABLE | MDOC_PARSED }, /* Ta */ 192 }; 193 194 const struct mdoc_macro * const mdoc_macros = __mdoc_macros; 195 196 197 /* 198 * This is called at the end of parsing. It must traverse up the tree, 199 * closing out open [implicit] scopes. Obviously, open explicit scopes 200 * are errors. 201 */ 202 int 203 mdoc_macroend(struct mdoc *m) 204 { 205 struct mdoc_node *n; 206 207 /* Scan for open explicit scopes. */ 208 209 n = MDOC_VALID & m->last->flags ? m->last->parent : m->last; 210 211 for ( ; n; n = n->parent) 212 if (MDOC_BLOCK == n->type && 213 MDOC_EXPLICIT & mdoc_macros[n->tok].flags) 214 mdoc_nmsg(m, n, MANDOCERR_SCOPEEXIT); 215 216 /* Rewind to the first. */ 217 218 return(rew_last(m, m->first)); 219 } 220 221 222 /* 223 * Look up a macro from within a subsequent context. 224 */ 225 static enum mdoct 226 lookup(enum mdoct from, const char *p) 227 { 228 /* FIXME: make -diag lists be un-PARSED. */ 229 230 if ( ! (MDOC_PARSED & mdoc_macros[from].flags)) 231 return(MDOC_MAX); 232 return(lookup_raw(p)); 233 } 234 235 236 /* 237 * Lookup a macro following the initial line macro. 238 */ 239 static enum mdoct 240 lookup_raw(const char *p) 241 { 242 enum mdoct res; 243 244 if (MDOC_MAX == (res = mdoc_hash_find(p))) 245 return(MDOC_MAX); 246 if (MDOC_CALLABLE & mdoc_macros[res].flags) 247 return(res); 248 return(MDOC_MAX); 249 } 250 251 252 static int 253 rew_last(struct mdoc *mdoc, const struct mdoc_node *to) 254 { 255 struct mdoc_node *n; 256 257 assert(to); 258 mdoc->next = MDOC_NEXT_SIBLING; 259 260 /* LINTED */ 261 while (mdoc->last != to) { 262 if ( ! mdoc_valid_post(mdoc)) 263 return(0); 264 n = mdoc->last; 265 mdoc->last = mdoc->last->parent; 266 assert(mdoc->last); 267 mdoc->last->last = n; 268 } 269 270 return(mdoc_valid_post(mdoc)); 271 } 272 273 274 /* 275 * For a block closing macro, return the corresponding opening one. 276 * Otherwise, return the macro itself. 277 */ 278 static enum mdoct 279 rew_alt(enum mdoct tok) 280 { 281 switch (tok) { 282 case (MDOC_Ac): 283 return(MDOC_Ao); 284 case (MDOC_Bc): 285 return(MDOC_Bo); 286 case (MDOC_Brc): 287 return(MDOC_Bro); 288 case (MDOC_Dc): 289 return(MDOC_Do); 290 case (MDOC_Ec): 291 return(MDOC_Eo); 292 case (MDOC_Ed): 293 return(MDOC_Bd); 294 case (MDOC_Ef): 295 return(MDOC_Bf); 296 case (MDOC_Ek): 297 return(MDOC_Bk); 298 case (MDOC_El): 299 return(MDOC_Bl); 300 case (MDOC_Fc): 301 return(MDOC_Fo); 302 case (MDOC_Oc): 303 return(MDOC_Oo); 304 case (MDOC_Pc): 305 return(MDOC_Po); 306 case (MDOC_Qc): 307 return(MDOC_Qo); 308 case (MDOC_Re): 309 return(MDOC_Rs); 310 case (MDOC_Sc): 311 return(MDOC_So); 312 case (MDOC_Xc): 313 return(MDOC_Xo); 314 default: 315 return(tok); 316 } 317 /* NOTREACHED */ 318 } 319 320 321 /* 322 * Rewinding to tok, how do we have to handle *p? 323 * REWIND_NONE: *p would delimit tok, but no tok scope is open 324 * inside *p, so there is no need to rewind anything at all. 325 * REWIND_THIS: *p matches tok, so rewind *p and nothing else. 326 * REWIND_MORE: *p is implicit, rewind it and keep searching for tok. 327 * REWIND_FORCE: *p is explicit, but tok is full, force rewinding *p. 328 * REWIND_LATER: *p is explicit and still open, postpone rewinding. 329 * REWIND_ERROR: No tok block is open at all. 330 */ 331 static enum rew 332 rew_dohalt(enum mdoct tok, enum mdoc_type type, 333 const struct mdoc_node *p) 334 { 335 336 /* 337 * No matching token, no delimiting block, no broken block. 338 * This can happen when full implicit macros are called for 339 * the first time but try to rewind their previous 340 * instance anyway. 341 */ 342 if (MDOC_ROOT == p->type) 343 return(MDOC_BLOCK == type && 344 MDOC_EXPLICIT & mdoc_macros[tok].flags ? 345 REWIND_ERROR : REWIND_NONE); 346 347 /* 348 * When starting to rewind, skip plain text 349 * and nodes that have already been rewound. 350 */ 351 if (MDOC_TEXT == p->type || MDOC_VALID & p->flags) 352 return(REWIND_MORE); 353 354 /* 355 * The easiest case: Found a matching token. 356 * This applies to both blocks and elements. 357 */ 358 tok = rew_alt(tok); 359 if (tok == p->tok) 360 return(p->end ? REWIND_NONE : 361 type == p->type ? REWIND_THIS : REWIND_MORE); 362 363 /* 364 * While elements do require rewinding for themselves, 365 * they never affect rewinding of other nodes. 366 */ 367 if (MDOC_ELEM == p->type) 368 return(REWIND_MORE); 369 370 /* 371 * Blocks delimited by our target token get REWIND_MORE. 372 * Blocks delimiting our target token get REWIND_NONE. 373 */ 374 switch (tok) { 375 case (MDOC_Bl): 376 if (MDOC_It == p->tok) 377 return(REWIND_MORE); 378 break; 379 case (MDOC_It): 380 if (MDOC_BODY == p->type && MDOC_Bl == p->tok) 381 return(REWIND_NONE); 382 break; 383 /* 384 * XXX Badly nested block handling still fails badly 385 * when one block is breaking two blocks of the same type. 386 * This is an incomplete and extremely ugly workaround, 387 * required to let the OpenBSD tree build. 388 */ 389 case (MDOC_Oo): 390 if (MDOC_Op == p->tok) 391 return(REWIND_MORE); 392 break; 393 case (MDOC_Nm): 394 return(REWIND_NONE); 395 case (MDOC_Nd): 396 /* FALLTHROUGH */ 397 case (MDOC_Ss): 398 if (MDOC_BODY == p->type && MDOC_Sh == p->tok) 399 return(REWIND_NONE); 400 /* FALLTHROUGH */ 401 case (MDOC_Sh): 402 if (MDOC_Nd == p->tok || MDOC_Ss == p->tok || 403 MDOC_Sh == p->tok) 404 return(REWIND_MORE); 405 break; 406 default: 407 break; 408 } 409 410 /* 411 * Default block rewinding rules. 412 * In particular, always skip block end markers, 413 * and let all blocks rewind Nm children. 414 */ 415 if (ENDBODY_NOT != p->end || MDOC_Nm == p->tok || 416 (MDOC_BLOCK == p->type && 417 ! (MDOC_EXPLICIT & mdoc_macros[tok].flags))) 418 return(REWIND_MORE); 419 420 /* 421 * By default, closing out full blocks 422 * forces closing of broken explicit blocks, 423 * while closing out partial blocks 424 * allows delayed rewinding by default. 425 */ 426 return (&blk_full == mdoc_macros[tok].fp ? 427 REWIND_FORCE : REWIND_LATER); 428 } 429 430 431 static int 432 rew_elem(struct mdoc *mdoc, enum mdoct tok) 433 { 434 struct mdoc_node *n; 435 436 n = mdoc->last; 437 if (MDOC_ELEM != n->type) 438 n = n->parent; 439 assert(MDOC_ELEM == n->type); 440 assert(tok == n->tok); 441 442 return(rew_last(mdoc, n)); 443 } 444 445 446 /* 447 * We are trying to close a block identified by tok, 448 * but the child block *broken is still open. 449 * Thus, postpone closing the tok block 450 * until the rew_sub call closing *broken. 451 */ 452 static int 453 make_pending(struct mdoc_node *broken, enum mdoct tok, 454 struct mdoc *m, int line, int ppos) 455 { 456 struct mdoc_node *breaker; 457 458 /* 459 * Iterate backwards, searching for the block matching tok, 460 * that is, the block breaking the *broken block. 461 */ 462 for (breaker = broken->parent; breaker; breaker = breaker->parent) { 463 464 /* 465 * If the *broken block had already been broken before 466 * and we encounter its breaker, make the tok block 467 * pending on the inner breaker. 468 * Graphically, "[A breaker=[B broken=[C->B B] tok=A] C]" 469 * becomes "[A broken=[B [C->B B] tok=A] C]" 470 * and finally "[A [B->A [C->B B] A] C]". 471 */ 472 if (breaker == broken->pending) { 473 broken = breaker; 474 continue; 475 } 476 477 if (REWIND_THIS != rew_dohalt(tok, MDOC_BLOCK, breaker)) 478 continue; 479 if (MDOC_BODY == broken->type) 480 broken = broken->parent; 481 482 /* 483 * Found the breaker. 484 * If another, outer breaker is already pending on 485 * the *broken block, we must not clobber the link 486 * to the outer breaker, but make it pending on the 487 * new, now inner breaker. 488 * Graphically, "[A breaker=[B broken=[C->A A] tok=B] C]" 489 * becomes "[A breaker=[B->A broken=[C A] tok=B] C]" 490 * and finally "[A [B->A [C->B A] B] C]". 491 */ 492 if (broken->pending) { 493 struct mdoc_node *taker; 494 495 /* 496 * If the breaker had also been broken before, 497 * it cannot take on the outer breaker itself, 498 * but must hand it on to its own breakers. 499 * Graphically, this is the following situation: 500 * "[A [B breaker=[C->B B] broken=[D->A A] tok=C] D]" 501 * "[A taker=[B->A breaker=[C->B B] [D->C A] C] D]" 502 */ 503 taker = breaker; 504 while (taker->pending) 505 taker = taker->pending; 506 taker->pending = broken->pending; 507 } 508 broken->pending = breaker; 509 mdoc_vmsg(m, MANDOCERR_SCOPENEST, line, ppos, 510 "%s breaks %s", mdoc_macronames[tok], 511 mdoc_macronames[broken->tok]); 512 return(1); 513 } 514 515 /* 516 * Found no matching block for tok. 517 * Are you trying to close a block that is not open? 518 */ 519 return(0); 520 } 521 522 523 static int 524 rew_sub(enum mdoc_type t, struct mdoc *m, 525 enum mdoct tok, int line, int ppos) 526 { 527 struct mdoc_node *n; 528 529 n = m->last; 530 while (n) { 531 switch (rew_dohalt(tok, t, n)) { 532 case (REWIND_NONE): 533 return(1); 534 case (REWIND_THIS): 535 break; 536 case (REWIND_FORCE): 537 mdoc_vmsg(m, MANDOCERR_SCOPEBROKEN, line, ppos, 538 "%s breaks %s", mdoc_macronames[tok], 539 mdoc_macronames[n->tok]); 540 /* FALLTHROUGH */ 541 case (REWIND_MORE): 542 n = n->parent; 543 continue; 544 case (REWIND_LATER): 545 if (make_pending(n, tok, m, line, ppos) || 546 MDOC_BLOCK != t) 547 return(1); 548 /* FALLTHROUGH */ 549 case (REWIND_ERROR): 550 mdoc_pmsg(m, line, ppos, MANDOCERR_NOSCOPE); 551 return(1); 552 } 553 break; 554 } 555 556 assert(n); 557 if ( ! rew_last(m, n)) 558 return(0); 559 560 /* 561 * The current block extends an enclosing block. 562 * Now that the current block ends, close the enclosing block, too. 563 */ 564 while (NULL != (n = n->pending)) { 565 if ( ! rew_last(m, n)) 566 return(0); 567 if (MDOC_HEAD == n->type && 568 ! mdoc_body_alloc(m, n->line, n->pos, n->tok)) 569 return(0); 570 } 571 572 return(1); 573 } 574 575 576 static int 577 append_delims(struct mdoc *m, int line, int *pos, char *buf) 578 { 579 int la; 580 enum margserr ac; 581 char *p; 582 583 if ('\0' == buf[*pos]) 584 return(1); 585 586 for (;;) { 587 la = *pos; 588 ac = mdoc_zargs(m, line, pos, buf, ARGS_NOWARN, &p); 589 590 if (ARGS_ERROR == ac) 591 return(0); 592 else if (ARGS_EOLN == ac) 593 break; 594 595 assert(DELIM_NONE != mdoc_isdelim(p)); 596 if ( ! mdoc_word_alloc(m, line, la, p)) 597 return(0); 598 599 /* 600 * If we encounter end-of-sentence symbols, then trigger 601 * the double-space. 602 * 603 * XXX: it's easy to allow this to propogate outward to 604 * the last symbol, such that `. )' will cause the 605 * correct double-spacing. However, (1) groff isn't 606 * smart enough to do this and (2) it would require 607 * knowing which symbols break this behaviour, for 608 * example, `. ;' shouldn't propogate the double-space. 609 */ 610 if (mandoc_eos(p, strlen(p), 0)) 611 m->last->flags |= MDOC_EOS; 612 } 613 614 return(1); 615 } 616 617 618 /* 619 * Close out block partial/full explicit. 620 */ 621 static int 622 blk_exp_close(MACRO_PROT_ARGS) 623 { 624 struct mdoc_node *body; /* Our own body. */ 625 struct mdoc_node *later; /* A sub-block starting later. */ 626 struct mdoc_node *n; /* For searching backwards. */ 627 628 int j, lastarg, maxargs, flushed, nl; 629 enum margserr ac; 630 enum mdoct atok, ntok; 631 char *p; 632 633 nl = MDOC_NEWLINE & m->flags; 634 635 switch (tok) { 636 case (MDOC_Ec): 637 maxargs = 1; 638 break; 639 default: 640 maxargs = 0; 641 break; 642 } 643 644 /* 645 * Search backwards for beginnings of blocks, 646 * both of our own and of pending sub-blocks. 647 */ 648 atok = rew_alt(tok); 649 body = later = NULL; 650 for (n = m->last; n; n = n->parent) { 651 if (MDOC_VALID & n->flags) 652 continue; 653 654 /* Remember the start of our own body. */ 655 if (MDOC_BODY == n->type && atok == n->tok) { 656 if (ENDBODY_NOT == n->end) 657 body = n; 658 continue; 659 } 660 661 if (MDOC_BLOCK != n->type || MDOC_Nm == n->tok) 662 continue; 663 if (atok == n->tok) { 664 assert(body); 665 666 /* 667 * Found the start of our own block. 668 * When there is no pending sub block, 669 * just proceed to closing out. 670 */ 671 if (NULL == later) 672 break; 673 674 /* 675 * When there is a pending sub block, 676 * postpone closing out the current block 677 * until the rew_sub() closing out the sub-block. 678 */ 679 make_pending(later, tok, m, line, ppos); 680 681 /* 682 * Mark the place where the formatting - but not 683 * the scope - of the current block ends. 684 */ 685 if ( ! mdoc_endbody_alloc(m, line, ppos, 686 atok, body, ENDBODY_SPACE)) 687 return(0); 688 break; 689 } 690 691 /* 692 * When finding an open sub block, remember the last 693 * open explicit block, or, in case there are only 694 * implicit ones, the first open implicit block. 695 */ 696 if (later && 697 MDOC_EXPLICIT & mdoc_macros[later->tok].flags) 698 continue; 699 if (MDOC_CALLABLE & mdoc_macros[n->tok].flags) 700 later = n; 701 } 702 703 if ( ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) { 704 /* FIXME: do this in validate */ 705 if (buf[*pos]) 706 if ( ! mdoc_pmsg(m, line, ppos, MANDOCERR_ARGSLOST)) 707 return(0); 708 709 if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos)) 710 return(0); 711 return(rew_sub(MDOC_BLOCK, m, tok, line, ppos)); 712 } 713 714 if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos)) 715 return(0); 716 717 if (NULL == later && maxargs > 0) 718 if ( ! mdoc_tail_alloc(m, line, ppos, rew_alt(tok))) 719 return(0); 720 721 for (flushed = j = 0; ; j++) { 722 lastarg = *pos; 723 724 if (j == maxargs && ! flushed) { 725 if ( ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) 726 return(0); 727 flushed = 1; 728 } 729 730 ac = mdoc_args(m, line, pos, buf, tok, &p); 731 732 if (ARGS_ERROR == ac) 733 return(0); 734 if (ARGS_PUNCT == ac) 735 break; 736 if (ARGS_EOLN == ac) 737 break; 738 739 ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); 740 741 if (MDOC_MAX == ntok) { 742 if ( ! mdoc_word_alloc(m, line, lastarg, p)) 743 return(0); 744 continue; 745 } 746 747 if ( ! flushed) { 748 if ( ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) 749 return(0); 750 flushed = 1; 751 } 752 if ( ! mdoc_macro(m, ntok, line, lastarg, pos, buf)) 753 return(0); 754 break; 755 } 756 757 if ( ! flushed && ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) 758 return(0); 759 760 if ( ! nl) 761 return(1); 762 return(append_delims(m, line, pos, buf)); 763 } 764 765 766 static int 767 in_line(MACRO_PROT_ARGS) 768 { 769 int la, scope, cnt, nc, nl; 770 enum margverr av; 771 enum mdoct ntok; 772 enum margserr ac; 773 enum mdelim d; 774 struct mdoc_arg *arg; 775 char *p; 776 777 nl = MDOC_NEWLINE & m->flags; 778 779 /* 780 * Whether we allow ignored elements (those without content, 781 * usually because of reserved words) to squeak by. 782 */ 783 784 switch (tok) { 785 case (MDOC_An): 786 /* FALLTHROUGH */ 787 case (MDOC_Ar): 788 /* FALLTHROUGH */ 789 case (MDOC_Fl): 790 /* FALLTHROUGH */ 791 case (MDOC_Mt): 792 /* FALLTHROUGH */ 793 case (MDOC_Nm): 794 /* FALLTHROUGH */ 795 case (MDOC_Pa): 796 nc = 1; 797 break; 798 default: 799 nc = 0; 800 break; 801 } 802 803 for (arg = NULL;; ) { 804 la = *pos; 805 av = mdoc_argv(m, line, tok, &arg, pos, buf); 806 807 if (ARGV_WORD == av) { 808 *pos = la; 809 break; 810 } 811 if (ARGV_EOLN == av) 812 break; 813 if (ARGV_ARG == av) 814 continue; 815 816 mdoc_argv_free(arg); 817 return(0); 818 } 819 820 for (cnt = scope = 0;; ) { 821 la = *pos; 822 ac = mdoc_args(m, line, pos, buf, tok, &p); 823 824 if (ARGS_ERROR == ac) 825 return(0); 826 if (ARGS_EOLN == ac) 827 break; 828 if (ARGS_PUNCT == ac) 829 break; 830 831 ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); 832 833 /* 834 * In this case, we've located a submacro and must 835 * execute it. Close out scope, if open. If no 836 * elements have been generated, either create one (nc) 837 * or raise a warning. 838 */ 839 840 if (MDOC_MAX != ntok) { 841 if (scope && ! rew_elem(m, tok)) 842 return(0); 843 if (nc && 0 == cnt) { 844 if ( ! mdoc_elem_alloc(m, line, ppos, tok, arg)) 845 return(0); 846 if ( ! rew_last(m, m->last)) 847 return(0); 848 } else if ( ! nc && 0 == cnt) { 849 mdoc_argv_free(arg); 850 if ( ! mdoc_pmsg(m, line, ppos, MANDOCERR_MACROEMPTY)) 851 return(0); 852 } 853 if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) 854 return(0); 855 if ( ! nl) 856 return(1); 857 return(append_delims(m, line, pos, buf)); 858 } 859 860 /* 861 * Non-quote-enclosed punctuation. Set up our scope, if 862 * a word; rewind the scope, if a delimiter; then append 863 * the word. 864 */ 865 866 d = ARGS_QWORD == ac ? DELIM_NONE : mdoc_isdelim(p); 867 868 if (DELIM_NONE != d) { 869 /* 870 * If we encounter closing punctuation, no word 871 * has been omitted, no scope is open, and we're 872 * allowed to have an empty element, then start 873 * a new scope. `Ar', `Fl', and `Li', only do 874 * this once per invocation. There may be more 875 * of these (all of them?). 876 */ 877 if (0 == cnt && (nc || MDOC_Li == tok) && 878 DELIM_CLOSE == d && ! scope) { 879 if ( ! mdoc_elem_alloc(m, line, ppos, tok, arg)) 880 return(0); 881 if (MDOC_Ar == tok || MDOC_Li == tok || 882 MDOC_Fl == tok) 883 cnt++; 884 scope = 1; 885 } 886 /* 887 * Close out our scope, if one is open, before 888 * any punctuation. 889 */ 890 if (scope && ! rew_elem(m, tok)) 891 return(0); 892 scope = 0; 893 } else if ( ! scope) { 894 if ( ! mdoc_elem_alloc(m, line, ppos, tok, arg)) 895 return(0); 896 scope = 1; 897 } 898 899 if (DELIM_NONE == d) 900 cnt++; 901 if ( ! mdoc_word_alloc(m, line, la, p)) 902 return(0); 903 904 /* 905 * `Fl' macros have their scope re-opened with each new 906 * word so that the `-' can be added to each one without 907 * having to parse out spaces. 908 */ 909 if (scope && MDOC_Fl == tok) { 910 if ( ! rew_elem(m, tok)) 911 return(0); 912 scope = 0; 913 } 914 } 915 916 if (scope && ! rew_elem(m, tok)) 917 return(0); 918 919 /* 920 * If no elements have been collected and we're allowed to have 921 * empties (nc), open a scope and close it out. Otherwise, 922 * raise a warning. 923 */ 924 925 if (nc && 0 == cnt) { 926 if ( ! mdoc_elem_alloc(m, line, ppos, tok, arg)) 927 return(0); 928 if ( ! rew_last(m, m->last)) 929 return(0); 930 } else if ( ! nc && 0 == cnt) { 931 mdoc_argv_free(arg); 932 if ( ! mdoc_pmsg(m, line, ppos, MANDOCERR_MACROEMPTY)) 933 return(0); 934 } 935 936 if ( ! nl) 937 return(1); 938 return(append_delims(m, line, pos, buf)); 939 } 940 941 942 static int 943 blk_full(MACRO_PROT_ARGS) 944 { 945 int la, nl; 946 struct mdoc_arg *arg; 947 struct mdoc_node *head; /* save of head macro */ 948 struct mdoc_node *body; /* save of body macro */ 949 struct mdoc_node *n; 950 enum mdoc_type mtt; 951 enum mdoct ntok; 952 enum margserr ac, lac; 953 enum margverr av; 954 char *p; 955 956 nl = MDOC_NEWLINE & m->flags; 957 958 /* Close out prior implicit scope. */ 959 960 if ( ! (MDOC_EXPLICIT & mdoc_macros[tok].flags)) { 961 if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos)) 962 return(0); 963 if ( ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) 964 return(0); 965 } 966 967 /* 968 * This routine accomodates implicitly- and explicitly-scoped 969 * macro openings. Implicit ones first close out prior scope 970 * (seen above). Delay opening the head until necessary to 971 * allow leading punctuation to print. Special consideration 972 * for `It -column', which has phrase-part syntax instead of 973 * regular child nodes. 974 */ 975 976 for (arg = NULL;; ) { 977 la = *pos; 978 av = mdoc_argv(m, line, tok, &arg, pos, buf); 979 980 if (ARGV_WORD == av) { 981 *pos = la; 982 break; 983 } 984 985 if (ARGV_EOLN == av) 986 break; 987 if (ARGV_ARG == av) 988 continue; 989 990 mdoc_argv_free(arg); 991 return(0); 992 } 993 994 if ( ! mdoc_block_alloc(m, line, ppos, tok, arg)) 995 return(0); 996 997 head = body = NULL; 998 999 /* 1000 * The `Nd' macro has all arguments in its body: it's a hybrid 1001 * of block partial-explicit and full-implicit. Stupid. 1002 */ 1003 1004 if (MDOC_Nd == tok) { 1005 if ( ! mdoc_head_alloc(m, line, ppos, tok)) 1006 return(0); 1007 head = m->last; 1008 if ( ! rew_sub(MDOC_HEAD, m, tok, line, ppos)) 1009 return(0); 1010 if ( ! mdoc_body_alloc(m, line, ppos, tok)) 1011 return(0); 1012 body = m->last; 1013 } 1014 1015 ac = ARGS_ERROR; 1016 1017 for ( ; ; ) { 1018 la = *pos; 1019 /* Initialise last-phrase-type with ARGS_PEND. */ 1020 lac = ARGS_ERROR == ac ? ARGS_PEND : ac; 1021 ac = mdoc_args(m, line, pos, buf, tok, &p); 1022 1023 if (ARGS_PUNCT == ac) 1024 break; 1025 1026 if (ARGS_ERROR == ac) 1027 return(0); 1028 1029 if (ARGS_EOLN == ac) { 1030 if (ARGS_PPHRASE != lac && ARGS_PHRASE != lac) 1031 break; 1032 /* 1033 * This is necessary: if the last token on a 1034 * line is a `Ta' or tab, then we'll get 1035 * ARGS_EOLN, so we must be smart enough to 1036 * reopen our scope if the last parse was a 1037 * phrase or partial phrase. 1038 */ 1039 if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos)) 1040 return(0); 1041 if ( ! mdoc_body_alloc(m, line, ppos, tok)) 1042 return(0); 1043 body = m->last; 1044 break; 1045 } 1046 1047 /* 1048 * Emit leading punctuation (i.e., punctuation before 1049 * the MDOC_HEAD) for non-phrase types. 1050 */ 1051 1052 if (NULL == head && 1053 ARGS_PEND != ac && 1054 ARGS_PHRASE != ac && 1055 ARGS_PPHRASE != ac && 1056 ARGS_QWORD != ac && 1057 DELIM_OPEN == mdoc_isdelim(p)) { 1058 if ( ! mdoc_word_alloc(m, line, la, p)) 1059 return(0); 1060 continue; 1061 } 1062 1063 /* Open a head if one hasn't been opened. */ 1064 1065 if (NULL == head) { 1066 if ( ! mdoc_head_alloc(m, line, ppos, tok)) 1067 return(0); 1068 head = m->last; 1069 } 1070 1071 if (ARGS_PHRASE == ac || 1072 ARGS_PEND == ac || 1073 ARGS_PPHRASE == ac) { 1074 /* 1075 * If we haven't opened a body yet, rewind the 1076 * head; if we have, rewind that instead. 1077 */ 1078 1079 mtt = body ? MDOC_BODY : MDOC_HEAD; 1080 if ( ! rew_sub(mtt, m, tok, line, ppos)) 1081 return(0); 1082 1083 /* Then allocate our body context. */ 1084 1085 if ( ! mdoc_body_alloc(m, line, ppos, tok)) 1086 return(0); 1087 body = m->last; 1088 1089 /* 1090 * Process phrases: set whether we're in a 1091 * partial-phrase (this effects line handling) 1092 * then call down into the phrase parser. 1093 */ 1094 1095 if (ARGS_PPHRASE == ac) 1096 m->flags |= MDOC_PPHRASE; 1097 if (ARGS_PEND == ac && ARGS_PPHRASE == lac) 1098 m->flags |= MDOC_PPHRASE; 1099 1100 if ( ! phrase(m, line, la, buf)) 1101 return(0); 1102 1103 m->flags &= ~MDOC_PPHRASE; 1104 continue; 1105 } 1106 1107 ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); 1108 1109 if (MDOC_MAX == ntok) { 1110 if ( ! mdoc_word_alloc(m, line, la, p)) 1111 return(0); 1112 continue; 1113 } 1114 1115 if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) 1116 return(0); 1117 break; 1118 } 1119 1120 if (NULL == head) { 1121 if ( ! mdoc_head_alloc(m, line, ppos, tok)) 1122 return(0); 1123 head = m->last; 1124 } 1125 1126 if (nl && ! append_delims(m, line, pos, buf)) 1127 return(0); 1128 1129 /* If we've already opened our body, exit now. */ 1130 1131 if (NULL != body) 1132 goto out; 1133 1134 /* 1135 * If there is an open (i.e., unvalidated) sub-block requiring 1136 * explicit close-out, postpone switching the current block from 1137 * head to body until the rew_sub() call closing out that 1138 * sub-block. 1139 */ 1140 for (n = m->last; n && n != head; n = n->parent) { 1141 if (MDOC_BLOCK == n->type && 1142 MDOC_EXPLICIT & mdoc_macros[n->tok].flags && 1143 ! (MDOC_VALID & n->flags)) { 1144 n->pending = head; 1145 return(1); 1146 } 1147 } 1148 1149 /* Close out scopes to remain in a consistent state. */ 1150 1151 if ( ! rew_sub(MDOC_HEAD, m, tok, line, ppos)) 1152 return(0); 1153 if ( ! mdoc_body_alloc(m, line, ppos, tok)) 1154 return(0); 1155 1156 out: 1157 if ( ! (MDOC_FREECOL & m->flags)) 1158 return(1); 1159 1160 if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos)) 1161 return(0); 1162 if ( ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) 1163 return(0); 1164 1165 m->flags &= ~MDOC_FREECOL; 1166 return(1); 1167 } 1168 1169 1170 static int 1171 blk_part_imp(MACRO_PROT_ARGS) 1172 { 1173 int la, nl; 1174 enum mdoct ntok; 1175 enum margserr ac; 1176 char *p; 1177 struct mdoc_node *blk; /* saved block context */ 1178 struct mdoc_node *body; /* saved body context */ 1179 struct mdoc_node *n; 1180 1181 nl = MDOC_NEWLINE & m->flags; 1182 1183 /* 1184 * A macro that spans to the end of the line. This is generally 1185 * (but not necessarily) called as the first macro. The block 1186 * has a head as the immediate child, which is always empty, 1187 * followed by zero or more opening punctuation nodes, then the 1188 * body (which may be empty, depending on the macro), then zero 1189 * or more closing punctuation nodes. 1190 */ 1191 1192 if ( ! mdoc_block_alloc(m, line, ppos, tok, NULL)) 1193 return(0); 1194 1195 blk = m->last; 1196 1197 if ( ! mdoc_head_alloc(m, line, ppos, tok)) 1198 return(0); 1199 if ( ! rew_sub(MDOC_HEAD, m, tok, line, ppos)) 1200 return(0); 1201 1202 /* 1203 * Open the body scope "on-demand", that is, after we've 1204 * processed all our the leading delimiters (open parenthesis, 1205 * etc.). 1206 */ 1207 1208 for (body = NULL; ; ) { 1209 la = *pos; 1210 ac = mdoc_args(m, line, pos, buf, tok, &p); 1211 1212 if (ARGS_ERROR == ac) 1213 return(0); 1214 if (ARGS_EOLN == ac) 1215 break; 1216 if (ARGS_PUNCT == ac) 1217 break; 1218 1219 if (NULL == body && ARGS_QWORD != ac && 1220 DELIM_OPEN == mdoc_isdelim(p)) { 1221 if ( ! mdoc_word_alloc(m, line, la, p)) 1222 return(0); 1223 continue; 1224 } 1225 1226 if (NULL == body) { 1227 if ( ! mdoc_body_alloc(m, line, ppos, tok)) 1228 return(0); 1229 body = m->last; 1230 } 1231 1232 ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); 1233 1234 if (MDOC_MAX == ntok) { 1235 if ( ! mdoc_word_alloc(m, line, la, p)) 1236 return(0); 1237 continue; 1238 } 1239 1240 if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) 1241 return(0); 1242 break; 1243 } 1244 1245 /* Clean-ups to leave in a consistent state. */ 1246 1247 if (NULL == body) { 1248 if ( ! mdoc_body_alloc(m, line, ppos, tok)) 1249 return(0); 1250 body = m->last; 1251 } 1252 1253 for (n = body->child; n && n->next; n = n->next) 1254 /* Do nothing. */ ; 1255 1256 /* 1257 * End of sentence spacing: if the last node is a text node and 1258 * has a trailing period, then mark it as being end-of-sentence. 1259 */ 1260 1261 if (n && MDOC_TEXT == n->type && n->string) 1262 if (mandoc_eos(n->string, strlen(n->string), 1)) 1263 n->flags |= MDOC_EOS; 1264 1265 /* Up-propogate the end-of-space flag. */ 1266 1267 if (n && (MDOC_EOS & n->flags)) { 1268 body->flags |= MDOC_EOS; 1269 body->parent->flags |= MDOC_EOS; 1270 } 1271 1272 /* 1273 * If there is an open sub-block requiring explicit close-out, 1274 * postpone closing out the current block 1275 * until the rew_sub() call closing out the sub-block. 1276 */ 1277 for (n = m->last; n && n != body && n != blk->parent; n = n->parent) { 1278 if (MDOC_BLOCK == n->type && 1279 MDOC_EXPLICIT & mdoc_macros[n->tok].flags && 1280 ! (MDOC_VALID & n->flags)) { 1281 make_pending(n, tok, m, line, ppos); 1282 if ( ! mdoc_endbody_alloc(m, line, ppos, 1283 tok, body, ENDBODY_NOSPACE)) 1284 return(0); 1285 return(1); 1286 } 1287 } 1288 1289 /* 1290 * If we can't rewind to our body, then our scope has already 1291 * been closed by another macro (like `Oc' closing `Op'). This 1292 * is ugly behaviour nodding its head to OpenBSD's overwhelming 1293 * crufty use of `Op' breakage. 1294 */ 1295 if (n != body && ! mdoc_vmsg(m, MANDOCERR_SCOPENEST, 1296 line, ppos, "%s broken", mdoc_macronames[tok])) 1297 return(0); 1298 1299 if (n && ! rew_sub(MDOC_BODY, m, tok, line, ppos)) 1300 return(0); 1301 1302 /* Standard appending of delimiters. */ 1303 1304 if (nl && ! append_delims(m, line, pos, buf)) 1305 return(0); 1306 1307 /* Rewind scope, if applicable. */ 1308 1309 if (n && ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) 1310 return(0); 1311 1312 return(1); 1313 } 1314 1315 1316 static int 1317 blk_part_exp(MACRO_PROT_ARGS) 1318 { 1319 int la, nl; 1320 enum margserr ac; 1321 struct mdoc_node *head; /* keep track of head */ 1322 struct mdoc_node *body; /* keep track of body */ 1323 char *p; 1324 enum mdoct ntok; 1325 1326 nl = MDOC_NEWLINE & m->flags; 1327 1328 /* 1329 * The opening of an explicit macro having zero or more leading 1330 * punctuation nodes; a head with optional single element (the 1331 * case of `Eo'); and a body that may be empty. 1332 */ 1333 1334 if ( ! mdoc_block_alloc(m, line, ppos, tok, NULL)) 1335 return(0); 1336 1337 for (head = body = NULL; ; ) { 1338 la = *pos; 1339 ac = mdoc_args(m, line, pos, buf, tok, &p); 1340 1341 if (ARGS_ERROR == ac) 1342 return(0); 1343 if (ARGS_PUNCT == ac) 1344 break; 1345 if (ARGS_EOLN == ac) 1346 break; 1347 1348 /* Flush out leading punctuation. */ 1349 1350 if (NULL == head && ARGS_QWORD != ac && 1351 DELIM_OPEN == mdoc_isdelim(p)) { 1352 assert(NULL == body); 1353 if ( ! mdoc_word_alloc(m, line, la, p)) 1354 return(0); 1355 continue; 1356 } 1357 1358 if (NULL == head) { 1359 assert(NULL == body); 1360 if ( ! mdoc_head_alloc(m, line, ppos, tok)) 1361 return(0); 1362 head = m->last; 1363 } 1364 1365 /* 1366 * `Eo' gobbles any data into the head, but most other 1367 * macros just immediately close out and begin the body. 1368 */ 1369 1370 if (NULL == body) { 1371 assert(head); 1372 /* No check whether it's a macro! */ 1373 if (MDOC_Eo == tok) 1374 if ( ! mdoc_word_alloc(m, line, la, p)) 1375 return(0); 1376 1377 if ( ! rew_sub(MDOC_HEAD, m, tok, line, ppos)) 1378 return(0); 1379 if ( ! mdoc_body_alloc(m, line, ppos, tok)) 1380 return(0); 1381 body = m->last; 1382 1383 if (MDOC_Eo == tok) 1384 continue; 1385 } 1386 1387 assert(NULL != head && NULL != body); 1388 1389 ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); 1390 1391 if (MDOC_MAX == ntok) { 1392 if ( ! mdoc_word_alloc(m, line, la, p)) 1393 return(0); 1394 continue; 1395 } 1396 1397 if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) 1398 return(0); 1399 break; 1400 } 1401 1402 /* Clean-up to leave in a consistent state. */ 1403 1404 if (NULL == head) { 1405 if ( ! mdoc_head_alloc(m, line, ppos, tok)) 1406 return(0); 1407 head = m->last; 1408 } 1409 1410 if (NULL == body) { 1411 if ( ! rew_sub(MDOC_HEAD, m, tok, line, ppos)) 1412 return(0); 1413 if ( ! mdoc_body_alloc(m, line, ppos, tok)) 1414 return(0); 1415 body = m->last; 1416 } 1417 1418 /* Standard appending of delimiters. */ 1419 1420 if ( ! nl) 1421 return(1); 1422 return(append_delims(m, line, pos, buf)); 1423 } 1424 1425 1426 /* ARGSUSED */ 1427 static int 1428 in_line_argn(MACRO_PROT_ARGS) 1429 { 1430 int la, flushed, j, maxargs, nl; 1431 enum margserr ac; 1432 enum margverr av; 1433 struct mdoc_arg *arg; 1434 char *p; 1435 enum mdoct ntok; 1436 1437 nl = MDOC_NEWLINE & m->flags; 1438 1439 /* 1440 * A line macro that has a fixed number of arguments (maxargs). 1441 * Only open the scope once the first non-leading-punctuation is 1442 * found (unless MDOC_IGNDELIM is noted, like in `Pf'), then 1443 * keep it open until the maximum number of arguments are 1444 * exhausted. 1445 */ 1446 1447 switch (tok) { 1448 case (MDOC_Ap): 1449 /* FALLTHROUGH */ 1450 case (MDOC_No): 1451 /* FALLTHROUGH */ 1452 case (MDOC_Ns): 1453 /* FALLTHROUGH */ 1454 case (MDOC_Ux): 1455 maxargs = 0; 1456 break; 1457 case (MDOC_Xr): 1458 maxargs = 2; 1459 break; 1460 default: 1461 maxargs = 1; 1462 break; 1463 } 1464 1465 for (arg = NULL; ; ) { 1466 la = *pos; 1467 av = mdoc_argv(m, line, tok, &arg, pos, buf); 1468 1469 if (ARGV_WORD == av) { 1470 *pos = la; 1471 break; 1472 } 1473 1474 if (ARGV_EOLN == av) 1475 break; 1476 if (ARGV_ARG == av) 1477 continue; 1478 1479 mdoc_argv_free(arg); 1480 return(0); 1481 } 1482 1483 for (flushed = j = 0; ; ) { 1484 la = *pos; 1485 ac = mdoc_args(m, line, pos, buf, tok, &p); 1486 1487 if (ARGS_ERROR == ac) 1488 return(0); 1489 if (ARGS_PUNCT == ac) 1490 break; 1491 if (ARGS_EOLN == ac) 1492 break; 1493 1494 if ( ! (MDOC_IGNDELIM & mdoc_macros[tok].flags) && 1495 ARGS_QWORD != ac && 1496 0 == j && DELIM_OPEN == mdoc_isdelim(p)) { 1497 if ( ! mdoc_word_alloc(m, line, la, p)) 1498 return(0); 1499 continue; 1500 } else if (0 == j) 1501 if ( ! mdoc_elem_alloc(m, line, la, tok, arg)) 1502 return(0); 1503 1504 if (j == maxargs && ! flushed) { 1505 if ( ! rew_elem(m, tok)) 1506 return(0); 1507 flushed = 1; 1508 } 1509 1510 ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); 1511 1512 if (MDOC_MAX != ntok) { 1513 if ( ! flushed && ! rew_elem(m, tok)) 1514 return(0); 1515 flushed = 1; 1516 if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) 1517 return(0); 1518 j++; 1519 break; 1520 } 1521 1522 if ( ! (MDOC_IGNDELIM & mdoc_macros[tok].flags) && 1523 ARGS_QWORD != ac && 1524 ! flushed && 1525 DELIM_NONE != mdoc_isdelim(p)) { 1526 if ( ! rew_elem(m, tok)) 1527 return(0); 1528 flushed = 1; 1529 } 1530 1531 /* 1532 * XXX: this is a hack to work around groff's ugliness 1533 * as regards `Xr' and extraneous arguments. It should 1534 * ideally be deprecated behaviour, but because this is 1535 * code is no here, it's unlikely to be removed. 1536 */ 1537 1538 #ifdef __OpenBSD__ 1539 if (MDOC_Xr == tok && j == maxargs) { 1540 if ( ! mdoc_elem_alloc(m, line, la, MDOC_Ns, NULL)) 1541 return(0); 1542 if ( ! rew_elem(m, MDOC_Ns)) 1543 return(0); 1544 } 1545 #endif 1546 1547 if ( ! mdoc_word_alloc(m, line, la, p)) 1548 return(0); 1549 j++; 1550 } 1551 1552 if (0 == j && ! mdoc_elem_alloc(m, line, la, tok, arg)) 1553 return(0); 1554 1555 /* Close out in a consistent state. */ 1556 1557 if ( ! flushed && ! rew_elem(m, tok)) 1558 return(0); 1559 if ( ! nl) 1560 return(1); 1561 return(append_delims(m, line, pos, buf)); 1562 } 1563 1564 1565 static int 1566 in_line_eoln(MACRO_PROT_ARGS) 1567 { 1568 int la; 1569 enum margserr ac; 1570 enum margverr av; 1571 struct mdoc_arg *arg; 1572 char *p; 1573 enum mdoct ntok; 1574 1575 assert( ! (MDOC_PARSED & mdoc_macros[tok].flags)); 1576 1577 if (tok == MDOC_Pp) 1578 rew_sub(MDOC_BLOCK, m, MDOC_Nm, line, ppos); 1579 1580 /* Parse macro arguments. */ 1581 1582 for (arg = NULL; ; ) { 1583 la = *pos; 1584 av = mdoc_argv(m, line, tok, &arg, pos, buf); 1585 1586 if (ARGV_WORD == av) { 1587 *pos = la; 1588 break; 1589 } 1590 if (ARGV_EOLN == av) 1591 break; 1592 if (ARGV_ARG == av) 1593 continue; 1594 1595 mdoc_argv_free(arg); 1596 return(0); 1597 } 1598 1599 /* Open element scope. */ 1600 1601 if ( ! mdoc_elem_alloc(m, line, ppos, tok, arg)) 1602 return(0); 1603 1604 /* Parse argument terms. */ 1605 1606 for (;;) { 1607 la = *pos; 1608 ac = mdoc_args(m, line, pos, buf, tok, &p); 1609 1610 if (ARGS_ERROR == ac) 1611 return(0); 1612 if (ARGS_EOLN == ac) 1613 break; 1614 1615 ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); 1616 1617 if (MDOC_MAX == ntok) { 1618 if ( ! mdoc_word_alloc(m, line, la, p)) 1619 return(0); 1620 continue; 1621 } 1622 1623 if ( ! rew_elem(m, tok)) 1624 return(0); 1625 return(mdoc_macro(m, ntok, line, la, pos, buf)); 1626 } 1627 1628 /* Close out (no delimiters). */ 1629 1630 return(rew_elem(m, tok)); 1631 } 1632 1633 1634 /* ARGSUSED */ 1635 static int 1636 ctx_synopsis(MACRO_PROT_ARGS) 1637 { 1638 int nl; 1639 1640 nl = MDOC_NEWLINE & m->flags; 1641 1642 /* If we're not in the SYNOPSIS, go straight to in-line. */ 1643 if ( ! (MDOC_SYNOPSIS & m->flags)) 1644 return(in_line(m, tok, line, ppos, pos, buf)); 1645 1646 /* If we're a nested call, same place. */ 1647 if ( ! nl) 1648 return(in_line(m, tok, line, ppos, pos, buf)); 1649 1650 /* 1651 * XXX: this will open a block scope; however, if later we end 1652 * up formatting the block scope, then child nodes will inherit 1653 * the formatting. Be careful. 1654 */ 1655 if (MDOC_Nm == tok) 1656 return(blk_full(m, tok, line, ppos, pos, buf)); 1657 assert(MDOC_Vt == tok); 1658 return(blk_part_imp(m, tok, line, ppos, pos, buf)); 1659 } 1660 1661 1662 /* ARGSUSED */ 1663 static int 1664 obsolete(MACRO_PROT_ARGS) 1665 { 1666 1667 return(mdoc_pmsg(m, line, ppos, MANDOCERR_MACROOBS)); 1668 } 1669 1670 1671 /* 1672 * Phrases occur within `Bl -column' entries, separated by `Ta' or tabs. 1673 * They're unusual because they're basically free-form text until a 1674 * macro is encountered. 1675 */ 1676 static int 1677 phrase(struct mdoc *m, int line, int ppos, char *buf) 1678 { 1679 int la, pos; 1680 enum margserr ac; 1681 enum mdoct ntok; 1682 char *p; 1683 1684 for (pos = ppos; ; ) { 1685 la = pos; 1686 1687 ac = mdoc_zargs(m, line, &pos, buf, 0, &p); 1688 1689 if (ARGS_ERROR == ac) 1690 return(0); 1691 if (ARGS_EOLN == ac) 1692 break; 1693 1694 ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup_raw(p); 1695 1696 if (MDOC_MAX == ntok) { 1697 if ( ! mdoc_word_alloc(m, line, la, p)) 1698 return(0); 1699 continue; 1700 } 1701 1702 if ( ! mdoc_macro(m, ntok, line, la, &pos, buf)) 1703 return(0); 1704 return(append_delims(m, line, &pos, buf)); 1705 } 1706 1707 return(1); 1708 } 1709 1710 1711 /* ARGSUSED */ 1712 static int 1713 phrase_ta(MACRO_PROT_ARGS) 1714 { 1715 int la; 1716 enum mdoct ntok; 1717 enum margserr ac; 1718 char *p; 1719 1720 /* 1721 * FIXME: this is overly restrictive: if the `Ta' is unexpected, 1722 * it should simply error out with ARGSLOST. 1723 */ 1724 1725 if ( ! rew_sub(MDOC_BODY, m, MDOC_It, line, ppos)) 1726 return(0); 1727 if ( ! mdoc_body_alloc(m, line, ppos, MDOC_It)) 1728 return(0); 1729 1730 for (;;) { 1731 la = *pos; 1732 ac = mdoc_zargs(m, line, pos, buf, 0, &p); 1733 1734 if (ARGS_ERROR == ac) 1735 return(0); 1736 if (ARGS_EOLN == ac) 1737 break; 1738 1739 ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup_raw(p); 1740 1741 if (MDOC_MAX == ntok) { 1742 if ( ! mdoc_word_alloc(m, line, la, p)) 1743 return(0); 1744 continue; 1745 } 1746 1747 if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) 1748 return(0); 1749 return(append_delims(m, line, pos, buf)); 1750 } 1751 1752 return(1); 1753 } 1754