1 /* $OpenBSD: html.c,v 1.125 2019/04/30 15:52:42 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/stat.h> 20 21 #include <assert.h> 22 #include <ctype.h> 23 #include <stdarg.h> 24 #include <stddef.h> 25 #include <stdio.h> 26 #include <stdint.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <unistd.h> 30 31 #include "mandoc_aux.h" 32 #include "mandoc_ohash.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "out.h" 36 #include "html.h" 37 #include "manconf.h" 38 #include "main.h" 39 40 struct htmldata { 41 const char *name; 42 int flags; 43 #define HTML_NOSTACK (1 << 0) 44 #define HTML_AUTOCLOSE (1 << 1) 45 #define HTML_NLBEFORE (1 << 2) 46 #define HTML_NLBEGIN (1 << 3) 47 #define HTML_NLEND (1 << 4) 48 #define HTML_NLAFTER (1 << 5) 49 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER) 50 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND) 51 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE) 52 #define HTML_INDENT (1 << 6) 53 #define HTML_NOINDENT (1 << 7) 54 }; 55 56 static const struct htmldata htmltags[TAG_MAX] = { 57 {"html", HTML_NLALL}, 58 {"head", HTML_NLALL | HTML_INDENT}, 59 {"body", HTML_NLALL}, 60 {"meta", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL}, 61 {"title", HTML_NLAROUND}, 62 {"div", HTML_NLAROUND}, 63 {"div", 0}, 64 {"section", HTML_NLALL}, 65 {"h1", HTML_NLAROUND}, 66 {"h2", HTML_NLAROUND}, 67 {"span", 0}, 68 {"link", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL}, 69 {"br", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL}, 70 {"a", 0}, 71 {"table", HTML_NLALL | HTML_INDENT}, 72 {"tr", HTML_NLALL | HTML_INDENT}, 73 {"td", HTML_NLAROUND}, 74 {"li", HTML_NLAROUND | HTML_INDENT}, 75 {"ul", HTML_NLALL | HTML_INDENT}, 76 {"ol", HTML_NLALL | HTML_INDENT}, 77 {"dl", HTML_NLALL | HTML_INDENT}, 78 {"dt", HTML_NLAROUND}, 79 {"dd", HTML_NLAROUND | HTML_INDENT}, 80 {"p", HTML_NLAROUND | HTML_INDENT}, 81 {"pre", HTML_NLALL | HTML_NOINDENT}, 82 {"var", 0}, 83 {"cite", 0}, 84 {"b", 0}, 85 {"i", 0}, 86 {"code", 0}, 87 {"small", 0}, 88 {"style", HTML_NLALL | HTML_INDENT}, 89 {"math", HTML_NLALL | HTML_INDENT}, 90 {"mrow", 0}, 91 {"mi", 0}, 92 {"mn", 0}, 93 {"mo", 0}, 94 {"msup", 0}, 95 {"msub", 0}, 96 {"msubsup", 0}, 97 {"mfrac", 0}, 98 {"msqrt", 0}, 99 {"mfenced", 0}, 100 {"mtable", 0}, 101 {"mtr", 0}, 102 {"mtd", 0}, 103 {"munderover", 0}, 104 {"munder", 0}, 105 {"mover", 0}, 106 }; 107 108 /* Avoid duplicate HTML id= attributes. */ 109 static struct ohash id_unique; 110 111 static void html_reset_internal(struct html *); 112 static void print_byte(struct html *, char); 113 static void print_endword(struct html *); 114 static void print_indent(struct html *); 115 static void print_word(struct html *, const char *); 116 117 static void print_ctag(struct html *, struct tag *); 118 static int print_escape(struct html *, char); 119 static int print_encode(struct html *, const char *, const char *, int); 120 static void print_href(struct html *, const char *, const char *, int); 121 static void print_metaf(struct html *); 122 123 124 void * 125 html_alloc(const struct manoutput *outopts) 126 { 127 struct html *h; 128 129 h = mandoc_calloc(1, sizeof(struct html)); 130 131 h->tag = NULL; 132 h->style = outopts->style; 133 if ((h->base_man1 = outopts->man) == NULL) 134 h->base_man2 = NULL; 135 else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL) 136 *h->base_man2++ = '\0'; 137 h->base_includes = outopts->includes; 138 if (outopts->fragment) 139 h->oflags |= HTML_FRAGMENT; 140 if (outopts->toc) 141 h->oflags |= HTML_TOC; 142 143 mandoc_ohash_init(&id_unique, 4, 0); 144 145 return h; 146 } 147 148 static void 149 html_reset_internal(struct html *h) 150 { 151 struct tag *tag; 152 char *cp; 153 unsigned int slot; 154 155 while ((tag = h->tag) != NULL) { 156 h->tag = tag->next; 157 free(tag); 158 } 159 cp = ohash_first(&id_unique, &slot); 160 while (cp != NULL) { 161 free(cp); 162 cp = ohash_next(&id_unique, &slot); 163 } 164 ohash_delete(&id_unique); 165 } 166 167 void 168 html_reset(void *p) 169 { 170 html_reset_internal(p); 171 mandoc_ohash_init(&id_unique, 4, 0); 172 } 173 174 void 175 html_free(void *p) 176 { 177 html_reset_internal(p); 178 free(p); 179 } 180 181 void 182 print_gen_head(struct html *h) 183 { 184 struct tag *t; 185 186 print_otag(h, TAG_META, "?", "charset", "utf-8"); 187 if (h->style != NULL) { 188 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet", 189 h->style, "type", "text/css", "media", "all"); 190 return; 191 } 192 193 /* 194 * Print a minimal embedded style sheet. 195 */ 196 197 t = print_otag(h, TAG_STYLE, ""); 198 print_text(h, "table.head, table.foot { width: 100%; }"); 199 print_endline(h); 200 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }"); 201 print_endline(h); 202 print_text(h, "td.head-vol { text-align: center; }"); 203 print_endline(h); 204 print_text(h, "div.Pp { margin: 1ex 0ex; }"); 205 print_endline(h); 206 print_text(h, "div.Nd, div.Bf, div.Op { display: inline; }"); 207 print_endline(h); 208 print_text(h, "span.Pa, span.Ad { font-style: italic; }"); 209 print_endline(h); 210 print_text(h, "span.Ms { font-weight: bold; }"); 211 print_endline(h); 212 print_text(h, "dl.Bl-diag "); 213 print_byte(h, '>'); 214 print_text(h, " dt { font-weight: bold; }"); 215 print_endline(h); 216 print_text(h, "code.Nm, code.Fl, code.Cm, code.Ic, " 217 "code.In, code.Fd, code.Fn,"); 218 print_endline(h); 219 print_text(h, "code.Cd { font-weight: bold; " 220 "font-family: inherit; }"); 221 print_tagq(h, t); 222 } 223 224 int 225 html_setfont(struct html *h, enum mandoc_esc font) 226 { 227 switch (font) { 228 case ESCAPE_FONTPREV: 229 font = h->metal; 230 break; 231 case ESCAPE_FONTITALIC: 232 case ESCAPE_FONTBOLD: 233 case ESCAPE_FONTBI: 234 case ESCAPE_FONTCW: 235 case ESCAPE_FONTROMAN: 236 break; 237 case ESCAPE_FONT: 238 font = ESCAPE_FONTROMAN; 239 break; 240 default: 241 return 0; 242 } 243 h->metal = h->metac; 244 h->metac = font; 245 return 1; 246 } 247 248 static void 249 print_metaf(struct html *h) 250 { 251 if (h->metaf) { 252 print_tagq(h, h->metaf); 253 h->metaf = NULL; 254 } 255 switch (h->metac) { 256 case ESCAPE_FONTITALIC: 257 h->metaf = print_otag(h, TAG_I, ""); 258 break; 259 case ESCAPE_FONTBOLD: 260 h->metaf = print_otag(h, TAG_B, ""); 261 break; 262 case ESCAPE_FONTBI: 263 h->metaf = print_otag(h, TAG_B, ""); 264 print_otag(h, TAG_I, ""); 265 break; 266 case ESCAPE_FONTCW: 267 h->metaf = print_otag(h, TAG_SPAN, "c", "Li"); 268 break; 269 default: 270 break; 271 } 272 } 273 274 void 275 html_close_paragraph(struct html *h) 276 { 277 struct tag *t; 278 279 for (t = h->tag; t != NULL && t->closed == 0; t = t->next) { 280 switch(t->tag) { 281 case TAG_P: 282 case TAG_PRE: 283 print_tagq(h, t); 284 break; 285 case TAG_A: 286 print_tagq(h, t); 287 continue; 288 default: 289 continue; 290 } 291 break; 292 } 293 } 294 295 /* 296 * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode. 297 * TOKEN_NONE does not switch. The old mode is returned. 298 */ 299 enum roff_tok 300 html_fillmode(struct html *h, enum roff_tok want) 301 { 302 struct tag *t; 303 enum roff_tok had; 304 305 for (t = h->tag; t != NULL; t = t->next) 306 if (t->tag == TAG_PRE) 307 break; 308 309 had = t == NULL ? ROFF_fi : ROFF_nf; 310 311 if (want != had) { 312 switch (want) { 313 case ROFF_fi: 314 print_tagq(h, t); 315 break; 316 case ROFF_nf: 317 html_close_paragraph(h); 318 print_otag(h, TAG_PRE, ""); 319 break; 320 case TOKEN_NONE: 321 break; 322 default: 323 abort(); 324 } 325 } 326 return had; 327 } 328 329 char * 330 html_make_id(const struct roff_node *n, int unique) 331 { 332 const struct roff_node *nch; 333 char *buf, *bufs, *cp; 334 unsigned int slot; 335 int suffix; 336 337 for (nch = n->child; nch != NULL; nch = nch->next) 338 if (nch->type != ROFFT_TEXT) 339 return NULL; 340 341 buf = NULL; 342 deroff(&buf, n); 343 if (buf == NULL) 344 return NULL; 345 346 /* 347 * In ID attributes, only use ASCII characters that are 348 * permitted in URL-fragment strings according to the 349 * explicit list at: 350 * https://url.spec.whatwg.org/#url-fragment-string 351 */ 352 353 for (cp = buf; *cp != '\0'; cp++) 354 if (isalnum((unsigned char)*cp) == 0 && 355 strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL) 356 *cp = '_'; 357 358 if (unique == 0) 359 return buf; 360 361 /* Avoid duplicate HTML id= attributes. */ 362 363 bufs = NULL; 364 suffix = 1; 365 slot = ohash_qlookup(&id_unique, buf); 366 cp = ohash_find(&id_unique, slot); 367 if (cp != NULL) { 368 while (cp != NULL) { 369 free(bufs); 370 if (++suffix > 127) { 371 free(buf); 372 return NULL; 373 } 374 mandoc_asprintf(&bufs, "%s_%d", buf, suffix); 375 slot = ohash_qlookup(&id_unique, bufs); 376 cp = ohash_find(&id_unique, slot); 377 } 378 free(buf); 379 buf = bufs; 380 } 381 ohash_insert(&id_unique, slot, buf); 382 return buf; 383 } 384 385 static int 386 print_escape(struct html *h, char c) 387 { 388 389 switch (c) { 390 case '<': 391 print_word(h, "<"); 392 break; 393 case '>': 394 print_word(h, ">"); 395 break; 396 case '&': 397 print_word(h, "&"); 398 break; 399 case '"': 400 print_word(h, """); 401 break; 402 case ASCII_NBRSP: 403 print_word(h, " "); 404 break; 405 case ASCII_HYPH: 406 print_byte(h, '-'); 407 break; 408 case ASCII_BREAK: 409 break; 410 default: 411 return 0; 412 } 413 return 1; 414 } 415 416 static int 417 print_encode(struct html *h, const char *p, const char *pend, int norecurse) 418 { 419 char numbuf[16]; 420 const char *seq; 421 size_t sz; 422 int c, len, breakline, nospace; 423 enum mandoc_esc esc; 424 static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"', 425 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; 426 427 if (pend == NULL) 428 pend = strchr(p, '\0'); 429 430 breakline = 0; 431 nospace = 0; 432 433 while (p < pend) { 434 if (HTML_SKIPCHAR & h->flags && '\\' != *p) { 435 h->flags &= ~HTML_SKIPCHAR; 436 p++; 437 continue; 438 } 439 440 for (sz = strcspn(p, rejs); sz-- && p < pend; p++) 441 print_byte(h, *p); 442 443 if (breakline && 444 (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) { 445 print_otag(h, TAG_BR, ""); 446 breakline = 0; 447 while (p < pend && (*p == ' ' || *p == ASCII_NBRSP)) 448 p++; 449 continue; 450 } 451 452 if (p >= pend) 453 break; 454 455 if (*p == ' ') { 456 print_endword(h); 457 p++; 458 continue; 459 } 460 461 if (print_escape(h, *p++)) 462 continue; 463 464 esc = mandoc_escape(&p, &seq, &len); 465 switch (esc) { 466 case ESCAPE_FONT: 467 case ESCAPE_FONTPREV: 468 case ESCAPE_FONTBOLD: 469 case ESCAPE_FONTITALIC: 470 case ESCAPE_FONTBI: 471 case ESCAPE_FONTCW: 472 case ESCAPE_FONTROMAN: 473 if (0 == norecurse) { 474 h->flags |= HTML_NOSPACE; 475 if (html_setfont(h, esc)) 476 print_metaf(h); 477 h->flags &= ~HTML_NOSPACE; 478 } 479 continue; 480 case ESCAPE_SKIPCHAR: 481 h->flags |= HTML_SKIPCHAR; 482 continue; 483 case ESCAPE_ERROR: 484 continue; 485 default: 486 break; 487 } 488 489 if (h->flags & HTML_SKIPCHAR) { 490 h->flags &= ~HTML_SKIPCHAR; 491 continue; 492 } 493 494 switch (esc) { 495 case ESCAPE_UNICODE: 496 /* Skip past "u" header. */ 497 c = mchars_num2uc(seq + 1, len - 1); 498 break; 499 case ESCAPE_NUMBERED: 500 c = mchars_num2char(seq, len); 501 if (c < 0) 502 continue; 503 break; 504 case ESCAPE_SPECIAL: 505 c = mchars_spec2cp(seq, len); 506 if (c <= 0) 507 continue; 508 break; 509 case ESCAPE_UNDEF: 510 c = *seq; 511 break; 512 case ESCAPE_DEVICE: 513 print_word(h, "html"); 514 continue; 515 case ESCAPE_BREAK: 516 breakline = 1; 517 continue; 518 case ESCAPE_NOSPACE: 519 if ('\0' == *p) 520 nospace = 1; 521 continue; 522 case ESCAPE_OVERSTRIKE: 523 if (len == 0) 524 continue; 525 c = seq[len - 1]; 526 break; 527 default: 528 continue; 529 } 530 if ((c < 0x20 && c != 0x09) || 531 (c > 0x7E && c < 0xA0)) 532 c = 0xFFFD; 533 if (c > 0x7E) { 534 (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c); 535 print_word(h, numbuf); 536 } else if (print_escape(h, c) == 0) 537 print_byte(h, c); 538 } 539 540 return nospace; 541 } 542 543 static void 544 print_href(struct html *h, const char *name, const char *sec, int man) 545 { 546 struct stat sb; 547 const char *p, *pp; 548 char *filename; 549 550 if (man) { 551 pp = h->base_man1; 552 if (h->base_man2 != NULL) { 553 mandoc_asprintf(&filename, "%s.%s", name, sec); 554 if (stat(filename, &sb) == -1) 555 pp = h->base_man2; 556 free(filename); 557 } 558 } else 559 pp = h->base_includes; 560 561 while ((p = strchr(pp, '%')) != NULL) { 562 print_encode(h, pp, p, 1); 563 if (man && p[1] == 'S') { 564 if (sec == NULL) 565 print_byte(h, '1'); 566 else 567 print_encode(h, sec, NULL, 1); 568 } else if ((man && p[1] == 'N') || 569 (man == 0 && p[1] == 'I')) 570 print_encode(h, name, NULL, 1); 571 else 572 print_encode(h, p, p + 2, 1); 573 pp = p + 2; 574 } 575 if (*pp != '\0') 576 print_encode(h, pp, NULL, 1); 577 } 578 579 struct tag * 580 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) 581 { 582 va_list ap; 583 struct tag *t; 584 const char *attr; 585 char *arg1, *arg2; 586 int style_written, tflags; 587 588 tflags = htmltags[tag].flags; 589 590 /* Push this tag onto the stack of open scopes. */ 591 592 if ((tflags & HTML_NOSTACK) == 0) { 593 t = mandoc_malloc(sizeof(struct tag)); 594 t->tag = tag; 595 t->next = h->tag; 596 t->refcnt = 0; 597 t->closed = 0; 598 h->tag = t; 599 } else 600 t = NULL; 601 602 if (tflags & HTML_NLBEFORE) 603 print_endline(h); 604 if (h->col == 0) 605 print_indent(h); 606 else if ((h->flags & HTML_NOSPACE) == 0) { 607 if (h->flags & HTML_KEEP) 608 print_word(h, " "); 609 else { 610 if (h->flags & HTML_PREKEEP) 611 h->flags |= HTML_KEEP; 612 print_endword(h); 613 } 614 } 615 616 if ( ! (h->flags & HTML_NONOSPACE)) 617 h->flags &= ~HTML_NOSPACE; 618 else 619 h->flags |= HTML_NOSPACE; 620 621 /* Print out the tag name and attributes. */ 622 623 print_byte(h, '<'); 624 print_word(h, htmltags[tag].name); 625 626 va_start(ap, fmt); 627 628 while (*fmt != '\0' && *fmt != 's') { 629 630 /* Parse attributes and arguments. */ 631 632 arg1 = va_arg(ap, char *); 633 arg2 = NULL; 634 switch (*fmt++) { 635 case 'c': 636 attr = "class"; 637 break; 638 case 'h': 639 attr = "href"; 640 break; 641 case 'i': 642 attr = "id"; 643 break; 644 case '?': 645 attr = arg1; 646 arg1 = va_arg(ap, char *); 647 break; 648 default: 649 abort(); 650 } 651 if (*fmt == 'M') 652 arg2 = va_arg(ap, char *); 653 if (arg1 == NULL) 654 continue; 655 656 /* Print the attributes. */ 657 658 print_byte(h, ' '); 659 print_word(h, attr); 660 print_byte(h, '='); 661 print_byte(h, '"'); 662 switch (*fmt) { 663 case 'I': 664 print_href(h, arg1, NULL, 0); 665 fmt++; 666 break; 667 case 'M': 668 print_href(h, arg1, arg2, 1); 669 fmt++; 670 break; 671 case 'R': 672 print_byte(h, '#'); 673 print_encode(h, arg1, NULL, 1); 674 fmt++; 675 break; 676 default: 677 print_encode(h, arg1, NULL, 1); 678 break; 679 } 680 print_byte(h, '"'); 681 } 682 683 style_written = 0; 684 while (*fmt++ == 's') { 685 arg1 = va_arg(ap, char *); 686 arg2 = va_arg(ap, char *); 687 if (arg2 == NULL) 688 continue; 689 print_byte(h, ' '); 690 if (style_written == 0) { 691 print_word(h, "style=\""); 692 style_written = 1; 693 } 694 print_word(h, arg1); 695 print_byte(h, ':'); 696 print_byte(h, ' '); 697 print_word(h, arg2); 698 print_byte(h, ';'); 699 } 700 if (style_written) 701 print_byte(h, '"'); 702 703 va_end(ap); 704 705 /* Accommodate for "well-formed" singleton escaping. */ 706 707 if (HTML_AUTOCLOSE & htmltags[tag].flags) 708 print_byte(h, '/'); 709 710 print_byte(h, '>'); 711 712 if (tflags & HTML_NLBEGIN) 713 print_endline(h); 714 else 715 h->flags |= HTML_NOSPACE; 716 717 if (tflags & HTML_INDENT) 718 h->indent++; 719 if (tflags & HTML_NOINDENT) 720 h->noindent++; 721 722 return t; 723 } 724 725 static void 726 print_ctag(struct html *h, struct tag *tag) 727 { 728 int tflags; 729 730 if (tag->closed == 0) { 731 tag->closed = 1; 732 if (tag == h->metaf) 733 h->metaf = NULL; 734 if (tag == h->tblt) 735 h->tblt = NULL; 736 737 tflags = htmltags[tag->tag].flags; 738 if (tflags & HTML_INDENT) 739 h->indent--; 740 if (tflags & HTML_NOINDENT) 741 h->noindent--; 742 if (tflags & HTML_NLEND) 743 print_endline(h); 744 print_indent(h); 745 print_byte(h, '<'); 746 print_byte(h, '/'); 747 print_word(h, htmltags[tag->tag].name); 748 print_byte(h, '>'); 749 if (tflags & HTML_NLAFTER) 750 print_endline(h); 751 } 752 if (tag->refcnt == 0) { 753 h->tag = tag->next; 754 free(tag); 755 } 756 } 757 758 void 759 print_gen_decls(struct html *h) 760 { 761 print_word(h, "<!DOCTYPE html>"); 762 print_endline(h); 763 } 764 765 void 766 print_gen_comment(struct html *h, struct roff_node *n) 767 { 768 int wantblank; 769 770 print_word(h, "<!-- This is an automatically generated file." 771 " Do not edit."); 772 h->indent = 1; 773 wantblank = 0; 774 while (n != NULL && n->type == ROFFT_COMMENT) { 775 if (strstr(n->string, "-->") == NULL && 776 (wantblank || *n->string != '\0')) { 777 print_endline(h); 778 print_indent(h); 779 print_word(h, n->string); 780 wantblank = *n->string != '\0'; 781 } 782 n = n->next; 783 } 784 if (wantblank) 785 print_endline(h); 786 print_word(h, " -->"); 787 print_endline(h); 788 h->indent = 0; 789 } 790 791 void 792 print_text(struct html *h, const char *word) 793 { 794 if (h->col && (h->flags & HTML_NOSPACE) == 0) { 795 if ( ! (HTML_KEEP & h->flags)) { 796 if (HTML_PREKEEP & h->flags) 797 h->flags |= HTML_KEEP; 798 print_endword(h); 799 } else 800 print_word(h, " "); 801 } 802 803 assert(h->metaf == NULL); 804 print_metaf(h); 805 print_indent(h); 806 if ( ! print_encode(h, word, NULL, 0)) { 807 if ( ! (h->flags & HTML_NONOSPACE)) 808 h->flags &= ~HTML_NOSPACE; 809 h->flags &= ~HTML_NONEWLINE; 810 } else 811 h->flags |= HTML_NOSPACE | HTML_NONEWLINE; 812 813 if (h->metaf != NULL) { 814 print_tagq(h, h->metaf); 815 h->metaf = NULL; 816 } 817 818 h->flags &= ~HTML_IGNDELIM; 819 } 820 821 void 822 print_tagq(struct html *h, const struct tag *until) 823 { 824 struct tag *this, *next; 825 826 for (this = h->tag; this != NULL; this = next) { 827 next = this == until ? NULL : this->next; 828 print_ctag(h, this); 829 } 830 } 831 832 /* 833 * Close out all open elements up to but excluding suntil. 834 * Note that a paragraph just inside stays open together with it 835 * because paragraphs include subsequent phrasing content. 836 */ 837 void 838 print_stagq(struct html *h, const struct tag *suntil) 839 { 840 struct tag *this, *next; 841 842 for (this = h->tag; this != NULL; this = next) { 843 next = this->next; 844 if (this == suntil || (next == suntil && 845 (this->tag == TAG_P || this->tag == TAG_PRE))) 846 break; 847 print_ctag(h, this); 848 } 849 } 850 851 852 /*********************************************************************** 853 * Low level output functions. 854 * They implement line breaking using a short static buffer. 855 ***********************************************************************/ 856 857 /* 858 * Buffer one HTML output byte. 859 * If the buffer is full, flush and deactivate it and start a new line. 860 * If the buffer is inactive, print directly. 861 */ 862 static void 863 print_byte(struct html *h, char c) 864 { 865 if ((h->flags & HTML_BUFFER) == 0) { 866 putchar(c); 867 h->col++; 868 return; 869 } 870 871 if (h->col + h->bufcol < sizeof(h->buf)) { 872 h->buf[h->bufcol++] = c; 873 return; 874 } 875 876 putchar('\n'); 877 h->col = 0; 878 print_indent(h); 879 putchar(' '); 880 putchar(' '); 881 fwrite(h->buf, h->bufcol, 1, stdout); 882 putchar(c); 883 h->col = (h->indent + 1) * 2 + h->bufcol + 1; 884 h->bufcol = 0; 885 h->flags &= ~HTML_BUFFER; 886 } 887 888 /* 889 * If something was printed on the current output line, end it. 890 * Not to be called right after print_indent(). 891 */ 892 void 893 print_endline(struct html *h) 894 { 895 if (h->col == 0) 896 return; 897 898 if (h->bufcol) { 899 putchar(' '); 900 fwrite(h->buf, h->bufcol, 1, stdout); 901 h->bufcol = 0; 902 } 903 putchar('\n'); 904 h->col = 0; 905 h->flags |= HTML_NOSPACE; 906 h->flags &= ~HTML_BUFFER; 907 } 908 909 /* 910 * Flush the HTML output buffer. 911 * If it is inactive, activate it. 912 */ 913 static void 914 print_endword(struct html *h) 915 { 916 if (h->noindent) { 917 print_byte(h, ' '); 918 return; 919 } 920 921 if ((h->flags & HTML_BUFFER) == 0) { 922 h->col++; 923 h->flags |= HTML_BUFFER; 924 } else if (h->bufcol) { 925 putchar(' '); 926 fwrite(h->buf, h->bufcol, 1, stdout); 927 h->col += h->bufcol + 1; 928 } 929 h->bufcol = 0; 930 } 931 932 /* 933 * If at the beginning of a new output line, 934 * perform indentation and mark the line as containing output. 935 * Make sure to really produce some output right afterwards, 936 * but do not use print_otag() for producing it. 937 */ 938 static void 939 print_indent(struct html *h) 940 { 941 size_t i; 942 943 if (h->col) 944 return; 945 946 if (h->noindent == 0) { 947 h->col = h->indent * 2; 948 for (i = 0; i < h->col; i++) 949 putchar(' '); 950 } 951 h->flags &= ~HTML_NOSPACE; 952 } 953 954 /* 955 * Print or buffer some characters 956 * depending on the current HTML output buffer state. 957 */ 958 static void 959 print_word(struct html *h, const char *cp) 960 { 961 while (*cp != '\0') 962 print_byte(h, *cp++); 963 } 964