1 /* $OpenBSD: html.c,v 1.134 2020/02/27 22:26:26 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/stat.h> 20 21 #include <assert.h> 22 #include <ctype.h> 23 #include <stdarg.h> 24 #include <stddef.h> 25 #include <stdio.h> 26 #include <stdint.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <unistd.h> 30 31 #include "mandoc_aux.h" 32 #include "mandoc_ohash.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "out.h" 36 #include "html.h" 37 #include "manconf.h" 38 #include "main.h" 39 40 struct htmldata { 41 const char *name; 42 int flags; 43 #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */ 44 #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */ 45 #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */ 46 #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */ 47 #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */ 48 #define HTML_NLEND (1 << 5) /* Output line break before closing. */ 49 #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */ 50 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER) 51 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND) 52 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE) 53 #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */ 54 #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */ 55 }; 56 57 static const struct htmldata htmltags[TAG_MAX] = { 58 {"html", HTML_NLALL}, 59 {"head", HTML_NLALL | HTML_INDENT}, 60 {"meta", HTML_NOSTACK | HTML_NLALL}, 61 {"link", HTML_NOSTACK | HTML_NLALL}, 62 {"style", HTML_NLALL | HTML_INDENT}, 63 {"title", HTML_NLAROUND}, 64 {"body", HTML_NLALL}, 65 {"div", HTML_NLAROUND}, 66 {"section", HTML_NLALL}, 67 {"table", HTML_NLALL | HTML_INDENT}, 68 {"tr", HTML_NLALL | HTML_INDENT}, 69 {"td", HTML_NLAROUND}, 70 {"li", HTML_NLAROUND | HTML_INDENT}, 71 {"ul", HTML_NLALL | HTML_INDENT}, 72 {"ol", HTML_NLALL | HTML_INDENT}, 73 {"dl", HTML_NLALL | HTML_INDENT}, 74 {"dt", HTML_NLAROUND}, 75 {"dd", HTML_NLAROUND | HTML_INDENT}, 76 {"h1", HTML_TOPHRASE | HTML_NLAROUND}, 77 {"h2", HTML_TOPHRASE | HTML_NLAROUND}, 78 {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT}, 79 {"pre", HTML_TOPHRASE | HTML_NLALL | HTML_NOINDENT}, 80 {"a", HTML_INPHRASE | HTML_TOPHRASE}, 81 {"b", HTML_INPHRASE | HTML_TOPHRASE}, 82 {"cite", HTML_INPHRASE | HTML_TOPHRASE}, 83 {"code", HTML_INPHRASE | HTML_TOPHRASE}, 84 {"i", HTML_INPHRASE | HTML_TOPHRASE}, 85 {"small", HTML_INPHRASE | HTML_TOPHRASE}, 86 {"span", HTML_INPHRASE | HTML_TOPHRASE}, 87 {"var", HTML_INPHRASE | HTML_TOPHRASE}, 88 {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL}, 89 {"mark", HTML_INPHRASE }, 90 {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT}, 91 {"mrow", 0}, 92 {"mi", 0}, 93 {"mn", 0}, 94 {"mo", 0}, 95 {"msup", 0}, 96 {"msub", 0}, 97 {"msubsup", 0}, 98 {"mfrac", 0}, 99 {"msqrt", 0}, 100 {"mfenced", 0}, 101 {"mtable", 0}, 102 {"mtr", 0}, 103 {"mtd", 0}, 104 {"munderover", 0}, 105 {"munder", 0}, 106 {"mover", 0}, 107 }; 108 109 /* Avoid duplicate HTML id= attributes. */ 110 static struct ohash id_unique; 111 112 static void html_reset_internal(struct html *); 113 static void print_byte(struct html *, char); 114 static void print_endword(struct html *); 115 static void print_indent(struct html *); 116 static void print_word(struct html *, const char *); 117 118 static void print_ctag(struct html *, struct tag *); 119 static int print_escape(struct html *, char); 120 static int print_encode(struct html *, const char *, const char *, int); 121 static void print_href(struct html *, const char *, const char *, int); 122 static void print_metaf(struct html *); 123 124 125 void * 126 html_alloc(const struct manoutput *outopts) 127 { 128 struct html *h; 129 130 h = mandoc_calloc(1, sizeof(struct html)); 131 132 h->tag = NULL; 133 h->style = outopts->style; 134 if ((h->base_man1 = outopts->man) == NULL) 135 h->base_man2 = NULL; 136 else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL) 137 *h->base_man2++ = '\0'; 138 h->base_includes = outopts->includes; 139 if (outopts->fragment) 140 h->oflags |= HTML_FRAGMENT; 141 if (outopts->toc) 142 h->oflags |= HTML_TOC; 143 144 mandoc_ohash_init(&id_unique, 4, 0); 145 146 return h; 147 } 148 149 static void 150 html_reset_internal(struct html *h) 151 { 152 struct tag *tag; 153 char *cp; 154 unsigned int slot; 155 156 while ((tag = h->tag) != NULL) { 157 h->tag = tag->next; 158 free(tag); 159 } 160 cp = ohash_first(&id_unique, &slot); 161 while (cp != NULL) { 162 free(cp); 163 cp = ohash_next(&id_unique, &slot); 164 } 165 ohash_delete(&id_unique); 166 } 167 168 void 169 html_reset(void *p) 170 { 171 html_reset_internal(p); 172 mandoc_ohash_init(&id_unique, 4, 0); 173 } 174 175 void 176 html_free(void *p) 177 { 178 html_reset_internal(p); 179 free(p); 180 } 181 182 void 183 print_gen_head(struct html *h) 184 { 185 struct tag *t; 186 187 print_otag(h, TAG_META, "?", "charset", "utf-8"); 188 if (h->style != NULL) { 189 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet", 190 h->style, "type", "text/css", "media", "all"); 191 return; 192 } 193 194 /* 195 * Print a minimal embedded style sheet. 196 */ 197 198 t = print_otag(h, TAG_STYLE, ""); 199 print_text(h, "table.head, table.foot { width: 100%; }"); 200 print_endline(h); 201 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }"); 202 print_endline(h); 203 print_text(h, "td.head-vol { text-align: center; }"); 204 print_endline(h); 205 print_text(h, ".Nd, .Bf, .Op { display: inline; }"); 206 print_endline(h); 207 print_text(h, ".Pa, .Ad { font-style: italic; }"); 208 print_endline(h); 209 print_text(h, ".Ms { font-weight: bold; }"); 210 print_endline(h); 211 print_text(h, ".Bl-diag "); 212 print_byte(h, '>'); 213 print_text(h, " dt { font-weight: bold; }"); 214 print_endline(h); 215 print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd " 216 "{ font-weight: bold; font-family: inherit; }"); 217 print_tagq(h, t); 218 } 219 220 int 221 html_setfont(struct html *h, enum mandoc_esc font) 222 { 223 switch (font) { 224 case ESCAPE_FONTPREV: 225 font = h->metal; 226 break; 227 case ESCAPE_FONTITALIC: 228 case ESCAPE_FONTBOLD: 229 case ESCAPE_FONTBI: 230 case ESCAPE_FONTCW: 231 case ESCAPE_FONTROMAN: 232 break; 233 case ESCAPE_FONT: 234 font = ESCAPE_FONTROMAN; 235 break; 236 default: 237 return 0; 238 } 239 h->metal = h->metac; 240 h->metac = font; 241 return 1; 242 } 243 244 static void 245 print_metaf(struct html *h) 246 { 247 if (h->metaf) { 248 print_tagq(h, h->metaf); 249 h->metaf = NULL; 250 } 251 switch (h->metac) { 252 case ESCAPE_FONTITALIC: 253 h->metaf = print_otag(h, TAG_I, ""); 254 break; 255 case ESCAPE_FONTBOLD: 256 h->metaf = print_otag(h, TAG_B, ""); 257 break; 258 case ESCAPE_FONTBI: 259 h->metaf = print_otag(h, TAG_B, ""); 260 print_otag(h, TAG_I, ""); 261 break; 262 case ESCAPE_FONTCW: 263 h->metaf = print_otag(h, TAG_SPAN, "c", "Li"); 264 break; 265 default: 266 break; 267 } 268 } 269 270 void 271 html_close_paragraph(struct html *h) 272 { 273 struct tag *this, *next; 274 int flags; 275 276 this = h->tag; 277 for (;;) { 278 next = this->next; 279 flags = htmltags[this->tag].flags; 280 if (flags & (HTML_INPHRASE | HTML_TOPHRASE)) 281 print_ctag(h, this); 282 if ((flags & HTML_INPHRASE) == 0) 283 break; 284 this = next; 285 } 286 } 287 288 /* 289 * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode. 290 * TOKEN_NONE does not switch. The old mode is returned. 291 */ 292 enum roff_tok 293 html_fillmode(struct html *h, enum roff_tok want) 294 { 295 struct tag *t; 296 enum roff_tok had; 297 298 for (t = h->tag; t != NULL; t = t->next) 299 if (t->tag == TAG_PRE) 300 break; 301 302 had = t == NULL ? ROFF_fi : ROFF_nf; 303 304 if (want != had) { 305 switch (want) { 306 case ROFF_fi: 307 print_tagq(h, t); 308 break; 309 case ROFF_nf: 310 html_close_paragraph(h); 311 print_otag(h, TAG_PRE, ""); 312 break; 313 case TOKEN_NONE: 314 break; 315 default: 316 abort(); 317 } 318 } 319 return had; 320 } 321 322 char * 323 html_make_id(const struct roff_node *n, int unique) 324 { 325 const struct roff_node *nch; 326 char *buf, *bufs, *cp; 327 unsigned int slot; 328 int suffix; 329 330 for (nch = n->child; nch != NULL; nch = nch->next) 331 if (nch->type != ROFFT_TEXT) 332 return NULL; 333 334 buf = NULL; 335 deroff(&buf, n); 336 if (buf == NULL) 337 return NULL; 338 339 /* 340 * In ID attributes, only use ASCII characters that are 341 * permitted in URL-fragment strings according to the 342 * explicit list at: 343 * https://url.spec.whatwg.org/#url-fragment-string 344 */ 345 346 for (cp = buf; *cp != '\0'; cp++) 347 if (isalnum((unsigned char)*cp) == 0 && 348 strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL) 349 *cp = '_'; 350 351 if (unique == 0) 352 return buf; 353 354 /* Avoid duplicate HTML id= attributes. */ 355 356 bufs = NULL; 357 suffix = 1; 358 slot = ohash_qlookup(&id_unique, buf); 359 cp = ohash_find(&id_unique, slot); 360 if (cp != NULL) { 361 while (cp != NULL) { 362 free(bufs); 363 if (++suffix > 127) { 364 free(buf); 365 return NULL; 366 } 367 mandoc_asprintf(&bufs, "%s_%d", buf, suffix); 368 slot = ohash_qlookup(&id_unique, bufs); 369 cp = ohash_find(&id_unique, slot); 370 } 371 free(buf); 372 buf = bufs; 373 } 374 ohash_insert(&id_unique, slot, buf); 375 return buf; 376 } 377 378 static int 379 print_escape(struct html *h, char c) 380 { 381 382 switch (c) { 383 case '<': 384 print_word(h, "<"); 385 break; 386 case '>': 387 print_word(h, ">"); 388 break; 389 case '&': 390 print_word(h, "&"); 391 break; 392 case '"': 393 print_word(h, """); 394 break; 395 case ASCII_NBRSP: 396 print_word(h, " "); 397 break; 398 case ASCII_HYPH: 399 print_byte(h, '-'); 400 break; 401 case ASCII_BREAK: 402 break; 403 default: 404 return 0; 405 } 406 return 1; 407 } 408 409 static int 410 print_encode(struct html *h, const char *p, const char *pend, int norecurse) 411 { 412 char numbuf[16]; 413 const char *seq; 414 size_t sz; 415 int c, len, breakline, nospace; 416 enum mandoc_esc esc; 417 static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"', 418 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; 419 420 if (pend == NULL) 421 pend = strchr(p, '\0'); 422 423 breakline = 0; 424 nospace = 0; 425 426 while (p < pend) { 427 if (HTML_SKIPCHAR & h->flags && '\\' != *p) { 428 h->flags &= ~HTML_SKIPCHAR; 429 p++; 430 continue; 431 } 432 433 for (sz = strcspn(p, rejs); sz-- && p < pend; p++) 434 print_byte(h, *p); 435 436 if (breakline && 437 (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) { 438 print_otag(h, TAG_BR, ""); 439 breakline = 0; 440 while (p < pend && (*p == ' ' || *p == ASCII_NBRSP)) 441 p++; 442 continue; 443 } 444 445 if (p >= pend) 446 break; 447 448 if (*p == ' ') { 449 print_endword(h); 450 p++; 451 continue; 452 } 453 454 if (print_escape(h, *p++)) 455 continue; 456 457 esc = mandoc_escape(&p, &seq, &len); 458 switch (esc) { 459 case ESCAPE_FONT: 460 case ESCAPE_FONTPREV: 461 case ESCAPE_FONTBOLD: 462 case ESCAPE_FONTITALIC: 463 case ESCAPE_FONTBI: 464 case ESCAPE_FONTCW: 465 case ESCAPE_FONTROMAN: 466 if (0 == norecurse) { 467 h->flags |= HTML_NOSPACE; 468 if (html_setfont(h, esc)) 469 print_metaf(h); 470 h->flags &= ~HTML_NOSPACE; 471 } 472 continue; 473 case ESCAPE_SKIPCHAR: 474 h->flags |= HTML_SKIPCHAR; 475 continue; 476 case ESCAPE_ERROR: 477 continue; 478 default: 479 break; 480 } 481 482 if (h->flags & HTML_SKIPCHAR) { 483 h->flags &= ~HTML_SKIPCHAR; 484 continue; 485 } 486 487 switch (esc) { 488 case ESCAPE_UNICODE: 489 /* Skip past "u" header. */ 490 c = mchars_num2uc(seq + 1, len - 1); 491 break; 492 case ESCAPE_NUMBERED: 493 c = mchars_num2char(seq, len); 494 if (c < 0) 495 continue; 496 break; 497 case ESCAPE_SPECIAL: 498 c = mchars_spec2cp(seq, len); 499 if (c <= 0) 500 continue; 501 break; 502 case ESCAPE_UNDEF: 503 c = *seq; 504 break; 505 case ESCAPE_DEVICE: 506 print_word(h, "html"); 507 continue; 508 case ESCAPE_BREAK: 509 breakline = 1; 510 continue; 511 case ESCAPE_NOSPACE: 512 if ('\0' == *p) 513 nospace = 1; 514 continue; 515 case ESCAPE_OVERSTRIKE: 516 if (len == 0) 517 continue; 518 c = seq[len - 1]; 519 break; 520 default: 521 continue; 522 } 523 if ((c < 0x20 && c != 0x09) || 524 (c > 0x7E && c < 0xA0)) 525 c = 0xFFFD; 526 if (c > 0x7E) { 527 (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c); 528 print_word(h, numbuf); 529 } else if (print_escape(h, c) == 0) 530 print_byte(h, c); 531 } 532 533 return nospace; 534 } 535 536 static void 537 print_href(struct html *h, const char *name, const char *sec, int man) 538 { 539 struct stat sb; 540 const char *p, *pp; 541 char *filename; 542 543 if (man) { 544 pp = h->base_man1; 545 if (h->base_man2 != NULL) { 546 mandoc_asprintf(&filename, "%s.%s", name, sec); 547 if (stat(filename, &sb) == -1) 548 pp = h->base_man2; 549 free(filename); 550 } 551 } else 552 pp = h->base_includes; 553 554 while ((p = strchr(pp, '%')) != NULL) { 555 print_encode(h, pp, p, 1); 556 if (man && p[1] == 'S') { 557 if (sec == NULL) 558 print_byte(h, '1'); 559 else 560 print_encode(h, sec, NULL, 1); 561 } else if ((man && p[1] == 'N') || 562 (man == 0 && p[1] == 'I')) 563 print_encode(h, name, NULL, 1); 564 else 565 print_encode(h, p, p + 2, 1); 566 pp = p + 2; 567 } 568 if (*pp != '\0') 569 print_encode(h, pp, NULL, 1); 570 } 571 572 struct tag * 573 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) 574 { 575 va_list ap; 576 struct tag *t; 577 const char *attr; 578 char *arg1, *arg2; 579 int style_written, tflags; 580 581 tflags = htmltags[tag].flags; 582 583 /* Flow content is not allowed in phrasing context. */ 584 585 if ((tflags & HTML_INPHRASE) == 0) { 586 for (t = h->tag; t != NULL; t = t->next) { 587 if (t->closed) 588 continue; 589 assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0); 590 break; 591 } 592 593 /* 594 * Always wrap phrasing elements in a paragraph 595 * unless already contained in some flow container; 596 * never put them directly into a section. 597 */ 598 599 } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION) 600 print_otag(h, TAG_P, "c", "Pp"); 601 602 /* Push this tag onto the stack of open scopes. */ 603 604 if ((tflags & HTML_NOSTACK) == 0) { 605 t = mandoc_malloc(sizeof(struct tag)); 606 t->tag = tag; 607 t->next = h->tag; 608 t->refcnt = 0; 609 t->closed = 0; 610 h->tag = t; 611 } else 612 t = NULL; 613 614 if (tflags & HTML_NLBEFORE) 615 print_endline(h); 616 if (h->col == 0) 617 print_indent(h); 618 else if ((h->flags & HTML_NOSPACE) == 0) { 619 if (h->flags & HTML_KEEP) 620 print_word(h, " "); 621 else { 622 if (h->flags & HTML_PREKEEP) 623 h->flags |= HTML_KEEP; 624 print_endword(h); 625 } 626 } 627 628 if ( ! (h->flags & HTML_NONOSPACE)) 629 h->flags &= ~HTML_NOSPACE; 630 else 631 h->flags |= HTML_NOSPACE; 632 633 /* Print out the tag name and attributes. */ 634 635 print_byte(h, '<'); 636 print_word(h, htmltags[tag].name); 637 638 va_start(ap, fmt); 639 640 while (*fmt != '\0' && *fmt != 's') { 641 642 /* Parse attributes and arguments. */ 643 644 arg1 = va_arg(ap, char *); 645 arg2 = NULL; 646 switch (*fmt++) { 647 case 'c': 648 attr = "class"; 649 break; 650 case 'h': 651 attr = "href"; 652 break; 653 case 'i': 654 attr = "id"; 655 break; 656 case '?': 657 attr = arg1; 658 arg1 = va_arg(ap, char *); 659 break; 660 default: 661 abort(); 662 } 663 if (*fmt == 'M') 664 arg2 = va_arg(ap, char *); 665 if (arg1 == NULL) 666 continue; 667 668 /* Print the attributes. */ 669 670 print_byte(h, ' '); 671 print_word(h, attr); 672 print_byte(h, '='); 673 print_byte(h, '"'); 674 switch (*fmt) { 675 case 'I': 676 print_href(h, arg1, NULL, 0); 677 fmt++; 678 break; 679 case 'M': 680 print_href(h, arg1, arg2, 1); 681 fmt++; 682 break; 683 case 'R': 684 print_byte(h, '#'); 685 print_encode(h, arg1, NULL, 1); 686 fmt++; 687 break; 688 default: 689 print_encode(h, arg1, NULL, 1); 690 break; 691 } 692 print_byte(h, '"'); 693 } 694 695 style_written = 0; 696 while (*fmt++ == 's') { 697 arg1 = va_arg(ap, char *); 698 arg2 = va_arg(ap, char *); 699 if (arg2 == NULL) 700 continue; 701 print_byte(h, ' '); 702 if (style_written == 0) { 703 print_word(h, "style=\""); 704 style_written = 1; 705 } 706 print_word(h, arg1); 707 print_byte(h, ':'); 708 print_byte(h, ' '); 709 print_word(h, arg2); 710 print_byte(h, ';'); 711 } 712 if (style_written) 713 print_byte(h, '"'); 714 715 va_end(ap); 716 717 /* Accommodate for "well-formed" singleton escaping. */ 718 719 if (htmltags[tag].flags & HTML_NOSTACK) 720 print_byte(h, '/'); 721 722 print_byte(h, '>'); 723 724 if (tflags & HTML_NLBEGIN) 725 print_endline(h); 726 else 727 h->flags |= HTML_NOSPACE; 728 729 if (tflags & HTML_INDENT) 730 h->indent++; 731 if (tflags & HTML_NOINDENT) 732 h->noindent++; 733 734 return t; 735 } 736 737 static void 738 print_ctag(struct html *h, struct tag *tag) 739 { 740 int tflags; 741 742 if (tag->closed == 0) { 743 tag->closed = 1; 744 if (tag == h->metaf) 745 h->metaf = NULL; 746 if (tag == h->tblt) 747 h->tblt = NULL; 748 749 tflags = htmltags[tag->tag].flags; 750 if (tflags & HTML_INDENT) 751 h->indent--; 752 if (tflags & HTML_NOINDENT) 753 h->noindent--; 754 if (tflags & HTML_NLEND) 755 print_endline(h); 756 print_indent(h); 757 print_byte(h, '<'); 758 print_byte(h, '/'); 759 print_word(h, htmltags[tag->tag].name); 760 print_byte(h, '>'); 761 if (tflags & HTML_NLAFTER) 762 print_endline(h); 763 } 764 if (tag->refcnt == 0) { 765 h->tag = tag->next; 766 free(tag); 767 } 768 } 769 770 void 771 print_gen_decls(struct html *h) 772 { 773 print_word(h, "<!DOCTYPE html>"); 774 print_endline(h); 775 } 776 777 void 778 print_gen_comment(struct html *h, struct roff_node *n) 779 { 780 int wantblank; 781 782 print_word(h, "<!-- This is an automatically generated file." 783 " Do not edit."); 784 h->indent = 1; 785 wantblank = 0; 786 while (n != NULL && n->type == ROFFT_COMMENT) { 787 if (strstr(n->string, "-->") == NULL && 788 (wantblank || *n->string != '\0')) { 789 print_endline(h); 790 print_indent(h); 791 print_word(h, n->string); 792 wantblank = *n->string != '\0'; 793 } 794 n = n->next; 795 } 796 if (wantblank) 797 print_endline(h); 798 print_word(h, " -->"); 799 print_endline(h); 800 h->indent = 0; 801 } 802 803 void 804 print_text(struct html *h, const char *word) 805 { 806 /* 807 * Always wrap text in a paragraph unless already contained in 808 * some flow container; never put it directly into a section. 809 */ 810 811 if (h->tag->tag == TAG_SECTION) 812 print_otag(h, TAG_P, "c", "Pp"); 813 814 /* Output whitespace before this text? */ 815 816 if (h->col && (h->flags & HTML_NOSPACE) == 0) { 817 if ( ! (HTML_KEEP & h->flags)) { 818 if (HTML_PREKEEP & h->flags) 819 h->flags |= HTML_KEEP; 820 print_endword(h); 821 } else 822 print_word(h, " "); 823 } 824 825 /* 826 * Print the text, optionally surrounded by HTML whitespace, 827 * optionally manually switching fonts before and after. 828 */ 829 830 assert(h->metaf == NULL); 831 print_metaf(h); 832 print_indent(h); 833 if ( ! print_encode(h, word, NULL, 0)) { 834 if ( ! (h->flags & HTML_NONOSPACE)) 835 h->flags &= ~HTML_NOSPACE; 836 h->flags &= ~HTML_NONEWLINE; 837 } else 838 h->flags |= HTML_NOSPACE | HTML_NONEWLINE; 839 840 if (h->metaf != NULL) { 841 print_tagq(h, h->metaf); 842 h->metaf = NULL; 843 } 844 845 h->flags &= ~HTML_IGNDELIM; 846 } 847 848 void 849 print_tagq(struct html *h, const struct tag *until) 850 { 851 struct tag *this, *next; 852 853 for (this = h->tag; this != NULL; this = next) { 854 next = this == until ? NULL : this->next; 855 print_ctag(h, this); 856 } 857 } 858 859 /* 860 * Close out all open elements up to but excluding suntil. 861 * Note that a paragraph just inside stays open together with it 862 * because paragraphs include subsequent phrasing content. 863 */ 864 void 865 print_stagq(struct html *h, const struct tag *suntil) 866 { 867 struct tag *this, *next; 868 869 for (this = h->tag; this != NULL; this = next) { 870 next = this->next; 871 if (this == suntil || (next == suntil && 872 (this->tag == TAG_P || this->tag == TAG_PRE))) 873 break; 874 print_ctag(h, this); 875 } 876 } 877 878 879 /*********************************************************************** 880 * Low level output functions. 881 * They implement line breaking using a short static buffer. 882 ***********************************************************************/ 883 884 /* 885 * Buffer one HTML output byte. 886 * If the buffer is full, flush and deactivate it and start a new line. 887 * If the buffer is inactive, print directly. 888 */ 889 static void 890 print_byte(struct html *h, char c) 891 { 892 if ((h->flags & HTML_BUFFER) == 0) { 893 putchar(c); 894 h->col++; 895 return; 896 } 897 898 if (h->col + h->bufcol < sizeof(h->buf)) { 899 h->buf[h->bufcol++] = c; 900 return; 901 } 902 903 putchar('\n'); 904 h->col = 0; 905 print_indent(h); 906 putchar(' '); 907 putchar(' '); 908 fwrite(h->buf, h->bufcol, 1, stdout); 909 putchar(c); 910 h->col = (h->indent + 1) * 2 + h->bufcol + 1; 911 h->bufcol = 0; 912 h->flags &= ~HTML_BUFFER; 913 } 914 915 /* 916 * If something was printed on the current output line, end it. 917 * Not to be called right after print_indent(). 918 */ 919 void 920 print_endline(struct html *h) 921 { 922 if (h->col == 0) 923 return; 924 925 if (h->bufcol) { 926 putchar(' '); 927 fwrite(h->buf, h->bufcol, 1, stdout); 928 h->bufcol = 0; 929 } 930 putchar('\n'); 931 h->col = 0; 932 h->flags |= HTML_NOSPACE; 933 h->flags &= ~HTML_BUFFER; 934 } 935 936 /* 937 * Flush the HTML output buffer. 938 * If it is inactive, activate it. 939 */ 940 static void 941 print_endword(struct html *h) 942 { 943 if (h->noindent) { 944 print_byte(h, ' '); 945 return; 946 } 947 948 if ((h->flags & HTML_BUFFER) == 0) { 949 h->col++; 950 h->flags |= HTML_BUFFER; 951 } else if (h->bufcol) { 952 putchar(' '); 953 fwrite(h->buf, h->bufcol, 1, stdout); 954 h->col += h->bufcol + 1; 955 } 956 h->bufcol = 0; 957 } 958 959 /* 960 * If at the beginning of a new output line, 961 * perform indentation and mark the line as containing output. 962 * Make sure to really produce some output right afterwards, 963 * but do not use print_otag() for producing it. 964 */ 965 static void 966 print_indent(struct html *h) 967 { 968 size_t i; 969 970 if (h->col || h->noindent) 971 return; 972 973 h->col = h->indent * 2; 974 for (i = 0; i < h->col; i++) 975 putchar(' '); 976 } 977 978 /* 979 * Print or buffer some characters 980 * depending on the current HTML output buffer state. 981 */ 982 static void 983 print_word(struct html *h, const char *cp) 984 { 985 while (*cp != '\0') 986 print_byte(h, *cp++); 987 } 988