1 /* $OpenBSD: html.c,v 1.142 2020/10/16 17:22:38 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Common functions for mandoc(1) HTML formatters. 19 * For use by individual formatters and by the main program. 20 */ 21 #include <sys/types.h> 22 #include <sys/stat.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <stdarg.h> 27 #include <stddef.h> 28 #include <stdio.h> 29 #include <stdint.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <unistd.h> 33 34 #include "mandoc_aux.h" 35 #include "mandoc_ohash.h" 36 #include "mandoc.h" 37 #include "roff.h" 38 #include "out.h" 39 #include "html.h" 40 #include "manconf.h" 41 #include "main.h" 42 43 struct htmldata { 44 const char *name; 45 int flags; 46 #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */ 47 #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */ 48 #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */ 49 #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */ 50 #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */ 51 #define HTML_NLEND (1 << 5) /* Output line break before closing. */ 52 #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */ 53 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER) 54 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND) 55 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE) 56 #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */ 57 #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */ 58 }; 59 60 static const struct htmldata htmltags[TAG_MAX] = { 61 {"html", HTML_NLALL}, 62 {"head", HTML_NLALL | HTML_INDENT}, 63 {"meta", HTML_NOSTACK | HTML_NLALL}, 64 {"link", HTML_NOSTACK | HTML_NLALL}, 65 {"style", HTML_NLALL | HTML_INDENT}, 66 {"title", HTML_NLAROUND}, 67 {"body", HTML_NLALL}, 68 {"div", HTML_NLAROUND}, 69 {"section", HTML_NLALL}, 70 {"table", HTML_NLALL | HTML_INDENT}, 71 {"tr", HTML_NLALL | HTML_INDENT}, 72 {"td", HTML_NLAROUND}, 73 {"li", HTML_NLAROUND | HTML_INDENT}, 74 {"ul", HTML_NLALL | HTML_INDENT}, 75 {"ol", HTML_NLALL | HTML_INDENT}, 76 {"dl", HTML_NLALL | HTML_INDENT}, 77 {"dt", HTML_NLAROUND}, 78 {"dd", HTML_NLAROUND | HTML_INDENT}, 79 {"h1", HTML_TOPHRASE | HTML_NLAROUND}, 80 {"h2", HTML_TOPHRASE | HTML_NLAROUND}, 81 {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT}, 82 {"pre", HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT}, 83 {"a", HTML_INPHRASE | HTML_TOPHRASE}, 84 {"b", HTML_INPHRASE | HTML_TOPHRASE}, 85 {"cite", HTML_INPHRASE | HTML_TOPHRASE}, 86 {"code", HTML_INPHRASE | HTML_TOPHRASE}, 87 {"i", HTML_INPHRASE | HTML_TOPHRASE}, 88 {"small", HTML_INPHRASE | HTML_TOPHRASE}, 89 {"span", HTML_INPHRASE | HTML_TOPHRASE}, 90 {"var", HTML_INPHRASE | HTML_TOPHRASE}, 91 {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL}, 92 {"mark", HTML_INPHRASE }, 93 {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT}, 94 {"mrow", 0}, 95 {"mi", 0}, 96 {"mn", 0}, 97 {"mo", 0}, 98 {"msup", 0}, 99 {"msub", 0}, 100 {"msubsup", 0}, 101 {"mfrac", 0}, 102 {"msqrt", 0}, 103 {"mfenced", 0}, 104 {"mtable", 0}, 105 {"mtr", 0}, 106 {"mtd", 0}, 107 {"munderover", 0}, 108 {"munder", 0}, 109 {"mover", 0}, 110 }; 111 112 /* Avoid duplicate HTML id= attributes. */ 113 114 struct id_entry { 115 int ord; /* Ordinal number of the latest occurrence. */ 116 char id[]; /* The id= attribute without any ordinal suffix. */ 117 }; 118 static struct ohash id_unique; 119 120 static void html_reset_internal(struct html *); 121 static void print_byte(struct html *, char); 122 static void print_endword(struct html *); 123 static void print_indent(struct html *); 124 static void print_word(struct html *, const char *); 125 126 static void print_ctag(struct html *, struct tag *); 127 static int print_escape(struct html *, char); 128 static int print_encode(struct html *, const char *, const char *, int); 129 static void print_href(struct html *, const char *, const char *, int); 130 static void print_metaf(struct html *); 131 132 133 void * 134 html_alloc(const struct manoutput *outopts) 135 { 136 struct html *h; 137 138 h = mandoc_calloc(1, sizeof(struct html)); 139 140 h->tag = NULL; 141 h->style = outopts->style; 142 if ((h->base_man1 = outopts->man) == NULL) 143 h->base_man2 = NULL; 144 else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL) 145 *h->base_man2++ = '\0'; 146 h->base_includes = outopts->includes; 147 if (outopts->fragment) 148 h->oflags |= HTML_FRAGMENT; 149 if (outopts->toc) 150 h->oflags |= HTML_TOC; 151 152 mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id)); 153 154 return h; 155 } 156 157 static void 158 html_reset_internal(struct html *h) 159 { 160 struct tag *tag; 161 struct id_entry *entry; 162 unsigned int slot; 163 164 while ((tag = h->tag) != NULL) { 165 h->tag = tag->next; 166 free(tag); 167 } 168 entry = ohash_first(&id_unique, &slot); 169 while (entry != NULL) { 170 free(entry); 171 entry = ohash_next(&id_unique, &slot); 172 } 173 ohash_delete(&id_unique); 174 } 175 176 void 177 html_reset(void *p) 178 { 179 html_reset_internal(p); 180 mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id)); 181 } 182 183 void 184 html_free(void *p) 185 { 186 html_reset_internal(p); 187 free(p); 188 } 189 190 void 191 print_gen_head(struct html *h) 192 { 193 struct tag *t; 194 195 print_otag(h, TAG_META, "?", "charset", "utf-8"); 196 if (h->style != NULL) { 197 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet", 198 h->style, "type", "text/css", "media", "all"); 199 return; 200 } 201 202 /* 203 * Print a minimal embedded style sheet. 204 */ 205 206 t = print_otag(h, TAG_STYLE, ""); 207 print_text(h, "table.head, table.foot { width: 100%; }"); 208 print_endline(h); 209 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }"); 210 print_endline(h); 211 print_text(h, "td.head-vol { text-align: center; }"); 212 print_endline(h); 213 print_text(h, ".Nd, .Bf, .Op { display: inline; }"); 214 print_endline(h); 215 print_text(h, ".Pa, .Ad { font-style: italic; }"); 216 print_endline(h); 217 print_text(h, ".Ms { font-weight: bold; }"); 218 print_endline(h); 219 print_text(h, ".Bl-diag "); 220 print_byte(h, '>'); 221 print_text(h, " dt { font-weight: bold; }"); 222 print_endline(h); 223 print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd " 224 "{ font-weight: bold; font-family: inherit; }"); 225 print_tagq(h, t); 226 } 227 228 int 229 html_setfont(struct html *h, enum mandoc_esc font) 230 { 231 switch (font) { 232 case ESCAPE_FONTPREV: 233 font = h->metal; 234 break; 235 case ESCAPE_FONTITALIC: 236 case ESCAPE_FONTBOLD: 237 case ESCAPE_FONTBI: 238 case ESCAPE_FONTCW: 239 case ESCAPE_FONTROMAN: 240 break; 241 case ESCAPE_FONT: 242 font = ESCAPE_FONTROMAN; 243 break; 244 default: 245 return 0; 246 } 247 h->metal = h->metac; 248 h->metac = font; 249 return 1; 250 } 251 252 static void 253 print_metaf(struct html *h) 254 { 255 if (h->metaf) { 256 print_tagq(h, h->metaf); 257 h->metaf = NULL; 258 } 259 switch (h->metac) { 260 case ESCAPE_FONTITALIC: 261 h->metaf = print_otag(h, TAG_I, ""); 262 break; 263 case ESCAPE_FONTBOLD: 264 h->metaf = print_otag(h, TAG_B, ""); 265 break; 266 case ESCAPE_FONTBI: 267 h->metaf = print_otag(h, TAG_B, ""); 268 print_otag(h, TAG_I, ""); 269 break; 270 case ESCAPE_FONTCW: 271 h->metaf = print_otag(h, TAG_SPAN, "c", "Li"); 272 break; 273 default: 274 break; 275 } 276 } 277 278 void 279 html_close_paragraph(struct html *h) 280 { 281 struct tag *this, *next; 282 int flags; 283 284 this = h->tag; 285 for (;;) { 286 next = this->next; 287 flags = htmltags[this->tag].flags; 288 if (flags & (HTML_INPHRASE | HTML_TOPHRASE)) 289 print_ctag(h, this); 290 if ((flags & HTML_INPHRASE) == 0) 291 break; 292 this = next; 293 } 294 } 295 296 /* 297 * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode. 298 * TOKEN_NONE does not switch. The old mode is returned. 299 */ 300 enum roff_tok 301 html_fillmode(struct html *h, enum roff_tok want) 302 { 303 struct tag *t; 304 enum roff_tok had; 305 306 for (t = h->tag; t != NULL; t = t->next) 307 if (t->tag == TAG_PRE) 308 break; 309 310 had = t == NULL ? ROFF_fi : ROFF_nf; 311 312 if (want != had) { 313 switch (want) { 314 case ROFF_fi: 315 print_tagq(h, t); 316 break; 317 case ROFF_nf: 318 html_close_paragraph(h); 319 print_otag(h, TAG_PRE, ""); 320 break; 321 case TOKEN_NONE: 322 break; 323 default: 324 abort(); 325 } 326 } 327 return had; 328 } 329 330 /* 331 * Allocate a string to be used for the "id=" attribute of an HTML 332 * element and/or as a segment identifier for a URI in an <a> element. 333 * The function may fail and return NULL if the node lacks text data 334 * to create the attribute from. 335 * The caller is responsible for free(3)ing the returned string. 336 * 337 * If the "unique" argument is non-zero, the "id_unique" ohash table 338 * is used for de-duplication. If the "unique" argument is 1, 339 * it is the first time the function is called for this tag and 340 * location, so if an ordinal suffix is needed, it is incremented. 341 * If the "unique" argument is 2, it is the second time the function 342 * is called for this tag and location, so the ordinal suffix 343 * remains unchanged. 344 */ 345 char * 346 html_make_id(const struct roff_node *n, int unique) 347 { 348 const struct roff_node *nch; 349 struct id_entry *entry; 350 char *buf, *cp; 351 size_t len; 352 unsigned int slot; 353 354 if (n->tag != NULL) 355 buf = mandoc_strdup(n->tag); 356 else { 357 switch (n->tok) { 358 case MDOC_Sh: 359 case MDOC_Ss: 360 case MDOC_Sx: 361 case MAN_SH: 362 case MAN_SS: 363 for (nch = n->child; nch != NULL; nch = nch->next) 364 if (nch->type != ROFFT_TEXT) 365 return NULL; 366 buf = NULL; 367 deroff(&buf, n); 368 if (buf == NULL) 369 return NULL; 370 break; 371 default: 372 if (n->child == NULL || n->child->type != ROFFT_TEXT) 373 return NULL; 374 buf = mandoc_strdup(n->child->string); 375 break; 376 } 377 } 378 379 /* 380 * In ID attributes, only use ASCII characters that are 381 * permitted in URL-fragment strings according to the 382 * explicit list at: 383 * https://url.spec.whatwg.org/#url-fragment-string 384 * In addition, reserve '~' for ordinal suffixes. 385 */ 386 387 for (cp = buf; *cp != '\0'; cp++) 388 if (isalnum((unsigned char)*cp) == 0 && 389 strchr("!$&'()*+,-./:;=?@_", *cp) == NULL) 390 *cp = '_'; 391 392 if (unique == 0) 393 return buf; 394 395 /* Avoid duplicate HTML id= attributes. */ 396 397 slot = ohash_qlookup(&id_unique, buf); 398 if ((entry = ohash_find(&id_unique, slot)) == NULL) { 399 len = strlen(buf) + 1; 400 entry = mandoc_malloc(sizeof(*entry) + len); 401 entry->ord = 1; 402 memcpy(entry->id, buf, len); 403 ohash_insert(&id_unique, slot, entry); 404 } else if (unique == 1) 405 entry->ord++; 406 407 if (entry->ord > 1) { 408 cp = buf; 409 mandoc_asprintf(&buf, "%s~%d", cp, entry->ord); 410 free(cp); 411 } 412 return buf; 413 } 414 415 static int 416 print_escape(struct html *h, char c) 417 { 418 419 switch (c) { 420 case '<': 421 print_word(h, "<"); 422 break; 423 case '>': 424 print_word(h, ">"); 425 break; 426 case '&': 427 print_word(h, "&"); 428 break; 429 case '"': 430 print_word(h, """); 431 break; 432 case ASCII_NBRSP: 433 print_word(h, " "); 434 break; 435 case ASCII_HYPH: 436 print_byte(h, '-'); 437 break; 438 case ASCII_BREAK: 439 break; 440 default: 441 return 0; 442 } 443 return 1; 444 } 445 446 static int 447 print_encode(struct html *h, const char *p, const char *pend, int norecurse) 448 { 449 char numbuf[16]; 450 const char *seq; 451 size_t sz; 452 int c, len, breakline, nospace; 453 enum mandoc_esc esc; 454 static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"', 455 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; 456 457 if (pend == NULL) 458 pend = strchr(p, '\0'); 459 460 breakline = 0; 461 nospace = 0; 462 463 while (p < pend) { 464 if (HTML_SKIPCHAR & h->flags && '\\' != *p) { 465 h->flags &= ~HTML_SKIPCHAR; 466 p++; 467 continue; 468 } 469 470 for (sz = strcspn(p, rejs); sz-- && p < pend; p++) 471 print_byte(h, *p); 472 473 if (breakline && 474 (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) { 475 print_otag(h, TAG_BR, ""); 476 breakline = 0; 477 while (p < pend && (*p == ' ' || *p == ASCII_NBRSP)) 478 p++; 479 continue; 480 } 481 482 if (p >= pend) 483 break; 484 485 if (*p == ' ') { 486 print_endword(h); 487 p++; 488 continue; 489 } 490 491 if (print_escape(h, *p++)) 492 continue; 493 494 esc = mandoc_escape(&p, &seq, &len); 495 switch (esc) { 496 case ESCAPE_FONT: 497 case ESCAPE_FONTPREV: 498 case ESCAPE_FONTBOLD: 499 case ESCAPE_FONTITALIC: 500 case ESCAPE_FONTBI: 501 case ESCAPE_FONTCW: 502 case ESCAPE_FONTROMAN: 503 if (0 == norecurse) { 504 h->flags |= HTML_NOSPACE; 505 if (html_setfont(h, esc)) 506 print_metaf(h); 507 h->flags &= ~HTML_NOSPACE; 508 } 509 continue; 510 case ESCAPE_SKIPCHAR: 511 h->flags |= HTML_SKIPCHAR; 512 continue; 513 case ESCAPE_ERROR: 514 continue; 515 default: 516 break; 517 } 518 519 if (h->flags & HTML_SKIPCHAR) { 520 h->flags &= ~HTML_SKIPCHAR; 521 continue; 522 } 523 524 switch (esc) { 525 case ESCAPE_UNICODE: 526 /* Skip past "u" header. */ 527 c = mchars_num2uc(seq + 1, len - 1); 528 break; 529 case ESCAPE_NUMBERED: 530 c = mchars_num2char(seq, len); 531 if (c < 0) 532 continue; 533 break; 534 case ESCAPE_SPECIAL: 535 c = mchars_spec2cp(seq, len); 536 if (c <= 0) 537 continue; 538 break; 539 case ESCAPE_UNDEF: 540 c = *seq; 541 break; 542 case ESCAPE_DEVICE: 543 print_word(h, "html"); 544 continue; 545 case ESCAPE_BREAK: 546 breakline = 1; 547 continue; 548 case ESCAPE_NOSPACE: 549 if ('\0' == *p) 550 nospace = 1; 551 continue; 552 case ESCAPE_OVERSTRIKE: 553 if (len == 0) 554 continue; 555 c = seq[len - 1]; 556 break; 557 default: 558 continue; 559 } 560 if ((c < 0x20 && c != 0x09) || 561 (c > 0x7E && c < 0xA0)) 562 c = 0xFFFD; 563 if (c > 0x7E) { 564 (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c); 565 print_word(h, numbuf); 566 } else if (print_escape(h, c) == 0) 567 print_byte(h, c); 568 } 569 570 return nospace; 571 } 572 573 static void 574 print_href(struct html *h, const char *name, const char *sec, int man) 575 { 576 struct stat sb; 577 const char *p, *pp; 578 char *filename; 579 580 if (man) { 581 pp = h->base_man1; 582 if (h->base_man2 != NULL) { 583 mandoc_asprintf(&filename, "%s.%s", name, sec); 584 if (stat(filename, &sb) == -1) 585 pp = h->base_man2; 586 free(filename); 587 } 588 } else 589 pp = h->base_includes; 590 591 while ((p = strchr(pp, '%')) != NULL) { 592 print_encode(h, pp, p, 1); 593 if (man && p[1] == 'S') { 594 if (sec == NULL) 595 print_byte(h, '1'); 596 else 597 print_encode(h, sec, NULL, 1); 598 } else if ((man && p[1] == 'N') || 599 (man == 0 && p[1] == 'I')) 600 print_encode(h, name, NULL, 1); 601 else 602 print_encode(h, p, p + 2, 1); 603 pp = p + 2; 604 } 605 if (*pp != '\0') 606 print_encode(h, pp, NULL, 1); 607 } 608 609 struct tag * 610 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) 611 { 612 va_list ap; 613 struct tag *t; 614 const char *attr; 615 char *arg1, *arg2; 616 int style_written, tflags; 617 618 tflags = htmltags[tag].flags; 619 620 /* Flow content is not allowed in phrasing context. */ 621 622 if ((tflags & HTML_INPHRASE) == 0) { 623 for (t = h->tag; t != NULL; t = t->next) { 624 if (t->closed) 625 continue; 626 assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0); 627 break; 628 } 629 630 /* 631 * Always wrap phrasing elements in a paragraph 632 * unless already contained in some flow container; 633 * never put them directly into a section. 634 */ 635 636 } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION) 637 print_otag(h, TAG_P, "c", "Pp"); 638 639 /* Push this tag onto the stack of open scopes. */ 640 641 if ((tflags & HTML_NOSTACK) == 0) { 642 t = mandoc_malloc(sizeof(struct tag)); 643 t->tag = tag; 644 t->next = h->tag; 645 t->refcnt = 0; 646 t->closed = 0; 647 h->tag = t; 648 } else 649 t = NULL; 650 651 if (tflags & HTML_NLBEFORE) 652 print_endline(h); 653 if (h->col == 0) 654 print_indent(h); 655 else if ((h->flags & HTML_NOSPACE) == 0) { 656 if (h->flags & HTML_KEEP) 657 print_word(h, " "); 658 else { 659 if (h->flags & HTML_PREKEEP) 660 h->flags |= HTML_KEEP; 661 print_endword(h); 662 } 663 } 664 665 if ( ! (h->flags & HTML_NONOSPACE)) 666 h->flags &= ~HTML_NOSPACE; 667 else 668 h->flags |= HTML_NOSPACE; 669 670 /* Print out the tag name and attributes. */ 671 672 print_byte(h, '<'); 673 print_word(h, htmltags[tag].name); 674 675 va_start(ap, fmt); 676 677 while (*fmt != '\0' && *fmt != 's') { 678 679 /* Parse attributes and arguments. */ 680 681 arg1 = va_arg(ap, char *); 682 arg2 = NULL; 683 switch (*fmt++) { 684 case 'c': 685 attr = "class"; 686 break; 687 case 'h': 688 attr = "href"; 689 break; 690 case 'i': 691 attr = "id"; 692 break; 693 case '?': 694 attr = arg1; 695 arg1 = va_arg(ap, char *); 696 break; 697 default: 698 abort(); 699 } 700 if (*fmt == 'M') 701 arg2 = va_arg(ap, char *); 702 if (arg1 == NULL) 703 continue; 704 705 /* Print the attributes. */ 706 707 print_byte(h, ' '); 708 print_word(h, attr); 709 print_byte(h, '='); 710 print_byte(h, '"'); 711 switch (*fmt) { 712 case 'I': 713 print_href(h, arg1, NULL, 0); 714 fmt++; 715 break; 716 case 'M': 717 print_href(h, arg1, arg2, 1); 718 fmt++; 719 break; 720 case 'R': 721 print_byte(h, '#'); 722 print_encode(h, arg1, NULL, 1); 723 fmt++; 724 break; 725 default: 726 print_encode(h, arg1, NULL, 1); 727 break; 728 } 729 print_byte(h, '"'); 730 } 731 732 style_written = 0; 733 while (*fmt++ == 's') { 734 arg1 = va_arg(ap, char *); 735 arg2 = va_arg(ap, char *); 736 if (arg2 == NULL) 737 continue; 738 print_byte(h, ' '); 739 if (style_written == 0) { 740 print_word(h, "style=\""); 741 style_written = 1; 742 } 743 print_word(h, arg1); 744 print_byte(h, ':'); 745 print_byte(h, ' '); 746 print_word(h, arg2); 747 print_byte(h, ';'); 748 } 749 if (style_written) 750 print_byte(h, '"'); 751 752 va_end(ap); 753 754 /* Accommodate for "well-formed" singleton escaping. */ 755 756 if (htmltags[tag].flags & HTML_NOSTACK) 757 print_byte(h, '/'); 758 759 print_byte(h, '>'); 760 761 if (tflags & HTML_NLBEGIN) 762 print_endline(h); 763 else 764 h->flags |= HTML_NOSPACE; 765 766 if (tflags & HTML_INDENT) 767 h->indent++; 768 if (tflags & HTML_NOINDENT) 769 h->noindent++; 770 771 return t; 772 } 773 774 /* 775 * Print an element with an optional "id=" attribute. 776 * If the element has phrasing content and an "id=" attribute, 777 * also add a permalink: outside if it can be in phrasing context, 778 * inside otherwise. 779 */ 780 struct tag * 781 print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr, 782 struct roff_node *n) 783 { 784 struct roff_node *nch; 785 struct tag *ret, *t; 786 char *id, *href; 787 788 ret = NULL; 789 id = href = NULL; 790 if (n->flags & NODE_ID) 791 id = html_make_id(n, 1); 792 if (n->flags & NODE_HREF) 793 href = id == NULL ? html_make_id(n, 2) : id; 794 if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE) 795 ret = print_otag(h, TAG_A, "chR", "permalink", href); 796 t = print_otag(h, elemtype, "ci", cattr, id); 797 if (ret == NULL) { 798 ret = t; 799 if (href != NULL && (nch = n->child) != NULL) { 800 /* man(7) is safe, it tags phrasing content only. */ 801 if (n->tok > MDOC_MAX || 802 htmltags[elemtype].flags & HTML_TOPHRASE) 803 nch = NULL; 804 else /* For mdoc(7), beware of nested blocks. */ 805 while (nch != NULL && nch->type == ROFFT_TEXT) 806 nch = nch->next; 807 if (nch == NULL) 808 print_otag(h, TAG_A, "chR", "permalink", href); 809 } 810 } 811 free(id); 812 if (id == NULL) 813 free(href); 814 return ret; 815 } 816 817 static void 818 print_ctag(struct html *h, struct tag *tag) 819 { 820 int tflags; 821 822 if (tag->closed == 0) { 823 tag->closed = 1; 824 if (tag == h->metaf) 825 h->metaf = NULL; 826 if (tag == h->tblt) 827 h->tblt = NULL; 828 829 tflags = htmltags[tag->tag].flags; 830 if (tflags & HTML_INDENT) 831 h->indent--; 832 if (tflags & HTML_NOINDENT) 833 h->noindent--; 834 if (tflags & HTML_NLEND) 835 print_endline(h); 836 print_indent(h); 837 print_byte(h, '<'); 838 print_byte(h, '/'); 839 print_word(h, htmltags[tag->tag].name); 840 print_byte(h, '>'); 841 if (tflags & HTML_NLAFTER) 842 print_endline(h); 843 } 844 if (tag->refcnt == 0) { 845 h->tag = tag->next; 846 free(tag); 847 } 848 } 849 850 void 851 print_gen_decls(struct html *h) 852 { 853 print_word(h, "<!DOCTYPE html>"); 854 print_endline(h); 855 } 856 857 void 858 print_gen_comment(struct html *h, struct roff_node *n) 859 { 860 int wantblank; 861 862 print_word(h, "<!-- This is an automatically generated file." 863 " Do not edit."); 864 h->indent = 1; 865 wantblank = 0; 866 while (n != NULL && n->type == ROFFT_COMMENT) { 867 if (strstr(n->string, "-->") == NULL && 868 (wantblank || *n->string != '\0')) { 869 print_endline(h); 870 print_indent(h); 871 print_word(h, n->string); 872 wantblank = *n->string != '\0'; 873 } 874 n = n->next; 875 } 876 if (wantblank) 877 print_endline(h); 878 print_word(h, " -->"); 879 print_endline(h); 880 h->indent = 0; 881 } 882 883 void 884 print_text(struct html *h, const char *word) 885 { 886 print_tagged_text(h, word, NULL); 887 } 888 889 void 890 print_tagged_text(struct html *h, const char *word, struct roff_node *n) 891 { 892 struct tag *t; 893 char *href; 894 895 /* 896 * Always wrap text in a paragraph unless already contained in 897 * some flow container; never put it directly into a section. 898 */ 899 900 if (h->tag->tag == TAG_SECTION) 901 print_otag(h, TAG_P, "c", "Pp"); 902 903 /* Output whitespace before this text? */ 904 905 if (h->col && (h->flags & HTML_NOSPACE) == 0) { 906 if ( ! (HTML_KEEP & h->flags)) { 907 if (HTML_PREKEEP & h->flags) 908 h->flags |= HTML_KEEP; 909 print_endword(h); 910 } else 911 print_word(h, " "); 912 } 913 914 /* 915 * Optionally switch fonts, optionally write a permalink, then 916 * print the text, optionally surrounded by HTML whitespace. 917 */ 918 919 assert(h->metaf == NULL); 920 print_metaf(h); 921 print_indent(h); 922 923 if (n != NULL && (href = html_make_id(n, 2)) != NULL) { 924 t = print_otag(h, TAG_A, "chR", "permalink", href); 925 free(href); 926 } else 927 t = NULL; 928 929 if ( ! print_encode(h, word, NULL, 0)) { 930 if ( ! (h->flags & HTML_NONOSPACE)) 931 h->flags &= ~HTML_NOSPACE; 932 h->flags &= ~HTML_NONEWLINE; 933 } else 934 h->flags |= HTML_NOSPACE | HTML_NONEWLINE; 935 936 if (h->metaf != NULL) { 937 print_tagq(h, h->metaf); 938 h->metaf = NULL; 939 } else if (t != NULL) 940 print_tagq(h, t); 941 942 h->flags &= ~HTML_IGNDELIM; 943 } 944 945 void 946 print_tagq(struct html *h, const struct tag *until) 947 { 948 struct tag *this, *next; 949 950 for (this = h->tag; this != NULL; this = next) { 951 next = this == until ? NULL : this->next; 952 print_ctag(h, this); 953 } 954 } 955 956 /* 957 * Close out all open elements up to but excluding suntil. 958 * Note that a paragraph just inside stays open together with it 959 * because paragraphs include subsequent phrasing content. 960 */ 961 void 962 print_stagq(struct html *h, const struct tag *suntil) 963 { 964 struct tag *this, *next; 965 966 for (this = h->tag; this != NULL; this = next) { 967 next = this->next; 968 if (this == suntil || (next == suntil && 969 (this->tag == TAG_P || this->tag == TAG_PRE))) 970 break; 971 print_ctag(h, this); 972 } 973 } 974 975 976 /*********************************************************************** 977 * Low level output functions. 978 * They implement line breaking using a short static buffer. 979 ***********************************************************************/ 980 981 /* 982 * Buffer one HTML output byte. 983 * If the buffer is full, flush and deactivate it and start a new line. 984 * If the buffer is inactive, print directly. 985 */ 986 static void 987 print_byte(struct html *h, char c) 988 { 989 if ((h->flags & HTML_BUFFER) == 0) { 990 putchar(c); 991 h->col++; 992 return; 993 } 994 995 if (h->col + h->bufcol < sizeof(h->buf)) { 996 h->buf[h->bufcol++] = c; 997 return; 998 } 999 1000 putchar('\n'); 1001 h->col = 0; 1002 print_indent(h); 1003 putchar(' '); 1004 putchar(' '); 1005 fwrite(h->buf, h->bufcol, 1, stdout); 1006 putchar(c); 1007 h->col = (h->indent + 1) * 2 + h->bufcol + 1; 1008 h->bufcol = 0; 1009 h->flags &= ~HTML_BUFFER; 1010 } 1011 1012 /* 1013 * If something was printed on the current output line, end it. 1014 * Not to be called right after print_indent(). 1015 */ 1016 void 1017 print_endline(struct html *h) 1018 { 1019 if (h->col == 0) 1020 return; 1021 1022 if (h->bufcol) { 1023 putchar(' '); 1024 fwrite(h->buf, h->bufcol, 1, stdout); 1025 h->bufcol = 0; 1026 } 1027 putchar('\n'); 1028 h->col = 0; 1029 h->flags |= HTML_NOSPACE; 1030 h->flags &= ~HTML_BUFFER; 1031 } 1032 1033 /* 1034 * Flush the HTML output buffer. 1035 * If it is inactive, activate it. 1036 */ 1037 static void 1038 print_endword(struct html *h) 1039 { 1040 if (h->noindent) { 1041 print_byte(h, ' '); 1042 return; 1043 } 1044 1045 if ((h->flags & HTML_BUFFER) == 0) { 1046 h->col++; 1047 h->flags |= HTML_BUFFER; 1048 } else if (h->bufcol) { 1049 putchar(' '); 1050 fwrite(h->buf, h->bufcol, 1, stdout); 1051 h->col += h->bufcol + 1; 1052 } 1053 h->bufcol = 0; 1054 } 1055 1056 /* 1057 * If at the beginning of a new output line, 1058 * perform indentation and mark the line as containing output. 1059 * Make sure to really produce some output right afterwards, 1060 * but do not use print_otag() for producing it. 1061 */ 1062 static void 1063 print_indent(struct html *h) 1064 { 1065 size_t i; 1066 1067 if (h->col || h->noindent) 1068 return; 1069 1070 h->col = h->indent * 2; 1071 for (i = 0; i < h->col; i++) 1072 putchar(' '); 1073 } 1074 1075 /* 1076 * Print or buffer some characters 1077 * depending on the current HTML output buffer state. 1078 */ 1079 static void 1080 print_word(struct html *h, const char *cp) 1081 { 1082 while (*cp != '\0') 1083 print_byte(h, *cp++); 1084 } 1085