1 /* $OpenBSD: html.c,v 1.145 2021/08/10 12:36:42 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Common functions for mandoc(1) HTML formatters. 19 * For use by individual formatters and by the main program. 20 */ 21 #include <sys/types.h> 22 #include <sys/stat.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <stdarg.h> 27 #include <stddef.h> 28 #include <stdio.h> 29 #include <stdint.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <unistd.h> 33 34 #include "mandoc_aux.h" 35 #include "mandoc_ohash.h" 36 #include "mandoc.h" 37 #include "roff.h" 38 #include "out.h" 39 #include "html.h" 40 #include "manconf.h" 41 #include "main.h" 42 43 struct htmldata { 44 const char *name; 45 int flags; 46 #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */ 47 #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */ 48 #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */ 49 #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */ 50 #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */ 51 #define HTML_NLEND (1 << 5) /* Output line break before closing. */ 52 #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */ 53 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER) 54 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND) 55 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE) 56 #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */ 57 #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */ 58 }; 59 60 static const struct htmldata htmltags[TAG_MAX] = { 61 {"html", HTML_NLALL}, 62 {"head", HTML_NLALL | HTML_INDENT}, 63 {"meta", HTML_NOSTACK | HTML_NLALL}, 64 {"link", HTML_NOSTACK | HTML_NLALL}, 65 {"style", HTML_NLALL | HTML_INDENT}, 66 {"title", HTML_NLAROUND}, 67 {"body", HTML_NLALL}, 68 {"div", HTML_NLAROUND}, 69 {"section", HTML_NLALL}, 70 {"table", HTML_NLALL | HTML_INDENT}, 71 {"tr", HTML_NLALL | HTML_INDENT}, 72 {"td", HTML_NLAROUND}, 73 {"li", HTML_NLAROUND | HTML_INDENT}, 74 {"ul", HTML_NLALL | HTML_INDENT}, 75 {"ol", HTML_NLALL | HTML_INDENT}, 76 {"dl", HTML_NLALL | HTML_INDENT}, 77 {"dt", HTML_NLAROUND}, 78 {"dd", HTML_NLAROUND | HTML_INDENT}, 79 {"h1", HTML_TOPHRASE | HTML_NLAROUND}, 80 {"h2", HTML_TOPHRASE | HTML_NLAROUND}, 81 {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT}, 82 {"pre", HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT}, 83 {"a", HTML_INPHRASE | HTML_TOPHRASE}, 84 {"b", HTML_INPHRASE | HTML_TOPHRASE}, 85 {"cite", HTML_INPHRASE | HTML_TOPHRASE}, 86 {"code", HTML_INPHRASE | HTML_TOPHRASE}, 87 {"i", HTML_INPHRASE | HTML_TOPHRASE}, 88 {"small", HTML_INPHRASE | HTML_TOPHRASE}, 89 {"span", HTML_INPHRASE | HTML_TOPHRASE}, 90 {"var", HTML_INPHRASE | HTML_TOPHRASE}, 91 {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL}, 92 {"mark", HTML_INPHRASE }, 93 {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT}, 94 {"mrow", 0}, 95 {"mi", 0}, 96 {"mn", 0}, 97 {"mo", 0}, 98 {"msup", 0}, 99 {"msub", 0}, 100 {"msubsup", 0}, 101 {"mfrac", 0}, 102 {"msqrt", 0}, 103 {"mfenced", 0}, 104 {"mtable", 0}, 105 {"mtr", 0}, 106 {"mtd", 0}, 107 {"munderover", 0}, 108 {"munder", 0}, 109 {"mover", 0}, 110 }; 111 112 /* Avoid duplicate HTML id= attributes. */ 113 114 struct id_entry { 115 int ord; /* Ordinal number of the latest occurrence. */ 116 char id[]; /* The id= attribute without any ordinal suffix. */ 117 }; 118 static struct ohash id_unique; 119 120 static void html_reset_internal(struct html *); 121 static void print_byte(struct html *, char); 122 static void print_endword(struct html *); 123 static void print_indent(struct html *); 124 static void print_word(struct html *, const char *); 125 126 static void print_ctag(struct html *, struct tag *); 127 static int print_escape(struct html *, char); 128 static int print_encode(struct html *, const char *, const char *, int); 129 static void print_href(struct html *, const char *, const char *, int); 130 static void print_metaf(struct html *); 131 132 133 void * 134 html_alloc(const struct manoutput *outopts) 135 { 136 struct html *h; 137 138 h = mandoc_calloc(1, sizeof(struct html)); 139 140 h->tag = NULL; 141 h->metac = h->metal = ESCAPE_FONTROMAN; 142 h->style = outopts->style; 143 if ((h->base_man1 = outopts->man) == NULL) 144 h->base_man2 = NULL; 145 else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL) 146 *h->base_man2++ = '\0'; 147 h->base_includes = outopts->includes; 148 if (outopts->fragment) 149 h->oflags |= HTML_FRAGMENT; 150 if (outopts->toc) 151 h->oflags |= HTML_TOC; 152 153 mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id)); 154 155 return h; 156 } 157 158 static void 159 html_reset_internal(struct html *h) 160 { 161 struct tag *tag; 162 struct id_entry *entry; 163 unsigned int slot; 164 165 while ((tag = h->tag) != NULL) { 166 h->tag = tag->next; 167 free(tag); 168 } 169 entry = ohash_first(&id_unique, &slot); 170 while (entry != NULL) { 171 free(entry); 172 entry = ohash_next(&id_unique, &slot); 173 } 174 ohash_delete(&id_unique); 175 } 176 177 void 178 html_reset(void *p) 179 { 180 html_reset_internal(p); 181 mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id)); 182 } 183 184 void 185 html_free(void *p) 186 { 187 html_reset_internal(p); 188 free(p); 189 } 190 191 void 192 print_gen_head(struct html *h) 193 { 194 struct tag *t; 195 196 print_otag(h, TAG_META, "?", "charset", "utf-8"); 197 print_otag(h, TAG_META, "??", "name", "viewport", 198 "content", "width=device-width, initial-scale=1.0"); 199 if (h->style != NULL) { 200 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet", 201 h->style, "type", "text/css", "media", "all"); 202 return; 203 } 204 205 /* 206 * Print a minimal embedded style sheet. 207 */ 208 209 t = print_otag(h, TAG_STYLE, ""); 210 print_text(h, "table.head, table.foot { width: 100%; }"); 211 print_endline(h); 212 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }"); 213 print_endline(h); 214 print_text(h, "td.head-vol { text-align: center; }"); 215 print_endline(h); 216 print_text(h, ".Nd, .Bf, .Op { display: inline; }"); 217 print_endline(h); 218 print_text(h, ".Pa, .Ad { font-style: italic; }"); 219 print_endline(h); 220 print_text(h, ".Ms { font-weight: bold; }"); 221 print_endline(h); 222 print_text(h, ".Bl-diag "); 223 print_byte(h, '>'); 224 print_text(h, " dt { font-weight: bold; }"); 225 print_endline(h); 226 print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd " 227 "{ font-weight: bold; font-family: inherit; }"); 228 print_tagq(h, t); 229 } 230 231 int 232 html_setfont(struct html *h, enum mandoc_esc font) 233 { 234 switch (font) { 235 case ESCAPE_FONTPREV: 236 font = h->metal; 237 break; 238 case ESCAPE_FONTITALIC: 239 case ESCAPE_FONTBOLD: 240 case ESCAPE_FONTBI: 241 case ESCAPE_FONTROMAN: 242 case ESCAPE_FONTCR: 243 case ESCAPE_FONTCB: 244 case ESCAPE_FONTCI: 245 break; 246 case ESCAPE_FONT: 247 font = ESCAPE_FONTROMAN; 248 break; 249 default: 250 return 0; 251 } 252 h->metal = h->metac; 253 h->metac = font; 254 return 1; 255 } 256 257 static void 258 print_metaf(struct html *h) 259 { 260 if (h->metaf) { 261 print_tagq(h, h->metaf); 262 h->metaf = NULL; 263 } 264 switch (h->metac) { 265 case ESCAPE_FONTITALIC: 266 h->metaf = print_otag(h, TAG_I, ""); 267 break; 268 case ESCAPE_FONTBOLD: 269 h->metaf = print_otag(h, TAG_B, ""); 270 break; 271 case ESCAPE_FONTBI: 272 h->metaf = print_otag(h, TAG_B, ""); 273 print_otag(h, TAG_I, ""); 274 break; 275 case ESCAPE_FONTCR: 276 h->metaf = print_otag(h, TAG_SPAN, "c", "Li"); 277 break; 278 case ESCAPE_FONTCB: 279 h->metaf = print_otag(h, TAG_SPAN, "c", "Li"); 280 print_otag(h, TAG_B, ""); 281 break; 282 case ESCAPE_FONTCI: 283 h->metaf = print_otag(h, TAG_SPAN, "c", "Li"); 284 print_otag(h, TAG_I, ""); 285 break; 286 default: 287 break; 288 } 289 } 290 291 void 292 html_close_paragraph(struct html *h) 293 { 294 struct tag *this, *next; 295 int flags; 296 297 this = h->tag; 298 for (;;) { 299 next = this->next; 300 flags = htmltags[this->tag].flags; 301 if (flags & (HTML_INPHRASE | HTML_TOPHRASE)) 302 print_ctag(h, this); 303 if ((flags & HTML_INPHRASE) == 0) 304 break; 305 this = next; 306 } 307 } 308 309 /* 310 * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode. 311 * TOKEN_NONE does not switch. The old mode is returned. 312 */ 313 enum roff_tok 314 html_fillmode(struct html *h, enum roff_tok want) 315 { 316 struct tag *t; 317 enum roff_tok had; 318 319 for (t = h->tag; t != NULL; t = t->next) 320 if (t->tag == TAG_PRE) 321 break; 322 323 had = t == NULL ? ROFF_fi : ROFF_nf; 324 325 if (want != had) { 326 switch (want) { 327 case ROFF_fi: 328 print_tagq(h, t); 329 break; 330 case ROFF_nf: 331 html_close_paragraph(h); 332 print_otag(h, TAG_PRE, ""); 333 break; 334 case TOKEN_NONE: 335 break; 336 default: 337 abort(); 338 } 339 } 340 return had; 341 } 342 343 /* 344 * Allocate a string to be used for the "id=" attribute of an HTML 345 * element and/or as a segment identifier for a URI in an <a> element. 346 * The function may fail and return NULL if the node lacks text data 347 * to create the attribute from. 348 * The caller is responsible for free(3)ing the returned string. 349 * 350 * If the "unique" argument is non-zero, the "id_unique" ohash table 351 * is used for de-duplication. If the "unique" argument is 1, 352 * it is the first time the function is called for this tag and 353 * location, so if an ordinal suffix is needed, it is incremented. 354 * If the "unique" argument is 2, it is the second time the function 355 * is called for this tag and location, so the ordinal suffix 356 * remains unchanged. 357 */ 358 char * 359 html_make_id(const struct roff_node *n, int unique) 360 { 361 const struct roff_node *nch; 362 struct id_entry *entry; 363 char *buf, *cp; 364 size_t len; 365 unsigned int slot; 366 367 if (n->tag != NULL) 368 buf = mandoc_strdup(n->tag); 369 else { 370 switch (n->tok) { 371 case MDOC_Sh: 372 case MDOC_Ss: 373 case MDOC_Sx: 374 case MAN_SH: 375 case MAN_SS: 376 for (nch = n->child; nch != NULL; nch = nch->next) 377 if (nch->type != ROFFT_TEXT) 378 return NULL; 379 buf = NULL; 380 deroff(&buf, n); 381 if (buf == NULL) 382 return NULL; 383 break; 384 default: 385 if (n->child == NULL || n->child->type != ROFFT_TEXT) 386 return NULL; 387 buf = mandoc_strdup(n->child->string); 388 break; 389 } 390 } 391 392 /* 393 * In ID attributes, only use ASCII characters that are 394 * permitted in URL-fragment strings according to the 395 * explicit list at: 396 * https://url.spec.whatwg.org/#url-fragment-string 397 * In addition, reserve '~' for ordinal suffixes. 398 */ 399 400 for (cp = buf; *cp != '\0'; cp++) 401 if (isalnum((unsigned char)*cp) == 0 && 402 strchr("!$&'()*+,-./:;=?@_", *cp) == NULL) 403 *cp = '_'; 404 405 if (unique == 0) 406 return buf; 407 408 /* Avoid duplicate HTML id= attributes. */ 409 410 slot = ohash_qlookup(&id_unique, buf); 411 if ((entry = ohash_find(&id_unique, slot)) == NULL) { 412 len = strlen(buf) + 1; 413 entry = mandoc_malloc(sizeof(*entry) + len); 414 entry->ord = 1; 415 memcpy(entry->id, buf, len); 416 ohash_insert(&id_unique, slot, entry); 417 } else if (unique == 1) 418 entry->ord++; 419 420 if (entry->ord > 1) { 421 cp = buf; 422 mandoc_asprintf(&buf, "%s~%d", cp, entry->ord); 423 free(cp); 424 } 425 return buf; 426 } 427 428 static int 429 print_escape(struct html *h, char c) 430 { 431 432 switch (c) { 433 case '<': 434 print_word(h, "<"); 435 break; 436 case '>': 437 print_word(h, ">"); 438 break; 439 case '&': 440 print_word(h, "&"); 441 break; 442 case '"': 443 print_word(h, """); 444 break; 445 case ASCII_NBRSP: 446 print_word(h, " "); 447 break; 448 case ASCII_HYPH: 449 print_byte(h, '-'); 450 break; 451 case ASCII_BREAK: 452 break; 453 default: 454 return 0; 455 } 456 return 1; 457 } 458 459 static int 460 print_encode(struct html *h, const char *p, const char *pend, int norecurse) 461 { 462 char numbuf[16]; 463 const char *seq; 464 size_t sz; 465 int c, len, breakline, nospace; 466 enum mandoc_esc esc; 467 static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"', 468 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; 469 470 if (pend == NULL) 471 pend = strchr(p, '\0'); 472 473 breakline = 0; 474 nospace = 0; 475 476 while (p < pend) { 477 if (HTML_SKIPCHAR & h->flags && '\\' != *p) { 478 h->flags &= ~HTML_SKIPCHAR; 479 p++; 480 continue; 481 } 482 483 for (sz = strcspn(p, rejs); sz-- && p < pend; p++) 484 print_byte(h, *p); 485 486 if (breakline && 487 (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) { 488 print_otag(h, TAG_BR, ""); 489 breakline = 0; 490 while (p < pend && (*p == ' ' || *p == ASCII_NBRSP)) 491 p++; 492 continue; 493 } 494 495 if (p >= pend) 496 break; 497 498 if (*p == ' ') { 499 print_endword(h); 500 p++; 501 continue; 502 } 503 504 if (print_escape(h, *p++)) 505 continue; 506 507 esc = mandoc_escape(&p, &seq, &len); 508 switch (esc) { 509 case ESCAPE_FONT: 510 case ESCAPE_FONTPREV: 511 case ESCAPE_FONTBOLD: 512 case ESCAPE_FONTITALIC: 513 case ESCAPE_FONTBI: 514 case ESCAPE_FONTROMAN: 515 case ESCAPE_FONTCR: 516 case ESCAPE_FONTCB: 517 case ESCAPE_FONTCI: 518 if (0 == norecurse) { 519 h->flags |= HTML_NOSPACE; 520 if (html_setfont(h, esc)) 521 print_metaf(h); 522 h->flags &= ~HTML_NOSPACE; 523 } 524 continue; 525 case ESCAPE_SKIPCHAR: 526 h->flags |= HTML_SKIPCHAR; 527 continue; 528 case ESCAPE_ERROR: 529 continue; 530 default: 531 break; 532 } 533 534 if (h->flags & HTML_SKIPCHAR) { 535 h->flags &= ~HTML_SKIPCHAR; 536 continue; 537 } 538 539 switch (esc) { 540 case ESCAPE_UNICODE: 541 /* Skip past "u" header. */ 542 c = mchars_num2uc(seq + 1, len - 1); 543 break; 544 case ESCAPE_NUMBERED: 545 c = mchars_num2char(seq, len); 546 if (c < 0) 547 continue; 548 break; 549 case ESCAPE_SPECIAL: 550 c = mchars_spec2cp(seq, len); 551 if (c <= 0) 552 continue; 553 break; 554 case ESCAPE_UNDEF: 555 c = *seq; 556 break; 557 case ESCAPE_DEVICE: 558 print_word(h, "html"); 559 continue; 560 case ESCAPE_BREAK: 561 breakline = 1; 562 continue; 563 case ESCAPE_NOSPACE: 564 if ('\0' == *p) 565 nospace = 1; 566 continue; 567 case ESCAPE_OVERSTRIKE: 568 if (len == 0) 569 continue; 570 c = seq[len - 1]; 571 break; 572 default: 573 continue; 574 } 575 if ((c < 0x20 && c != 0x09) || 576 (c > 0x7E && c < 0xA0)) 577 c = 0xFFFD; 578 if (c > 0x7E) { 579 (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c); 580 print_word(h, numbuf); 581 } else if (print_escape(h, c) == 0) 582 print_byte(h, c); 583 } 584 585 return nospace; 586 } 587 588 static void 589 print_href(struct html *h, const char *name, const char *sec, int man) 590 { 591 struct stat sb; 592 const char *p, *pp; 593 char *filename; 594 595 if (man) { 596 pp = h->base_man1; 597 if (h->base_man2 != NULL) { 598 mandoc_asprintf(&filename, "%s.%s", name, sec); 599 if (stat(filename, &sb) == -1) 600 pp = h->base_man2; 601 free(filename); 602 } 603 } else 604 pp = h->base_includes; 605 606 while ((p = strchr(pp, '%')) != NULL) { 607 print_encode(h, pp, p, 1); 608 if (man && p[1] == 'S') { 609 if (sec == NULL) 610 print_byte(h, '1'); 611 else 612 print_encode(h, sec, NULL, 1); 613 } else if ((man && p[1] == 'N') || 614 (man == 0 && p[1] == 'I')) 615 print_encode(h, name, NULL, 1); 616 else 617 print_encode(h, p, p + 2, 1); 618 pp = p + 2; 619 } 620 if (*pp != '\0') 621 print_encode(h, pp, NULL, 1); 622 } 623 624 struct tag * 625 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) 626 { 627 va_list ap; 628 struct tag *t; 629 const char *attr; 630 char *arg1, *arg2; 631 int style_written, tflags; 632 633 tflags = htmltags[tag].flags; 634 635 /* Flow content is not allowed in phrasing context. */ 636 637 if ((tflags & HTML_INPHRASE) == 0) { 638 for (t = h->tag; t != NULL; t = t->next) { 639 if (t->closed) 640 continue; 641 assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0); 642 break; 643 } 644 645 /* 646 * Always wrap phrasing elements in a paragraph 647 * unless already contained in some flow container; 648 * never put them directly into a section. 649 */ 650 651 } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION) 652 print_otag(h, TAG_P, "c", "Pp"); 653 654 /* Push this tag onto the stack of open scopes. */ 655 656 if ((tflags & HTML_NOSTACK) == 0) { 657 t = mandoc_malloc(sizeof(struct tag)); 658 t->tag = tag; 659 t->next = h->tag; 660 t->refcnt = 0; 661 t->closed = 0; 662 h->tag = t; 663 } else 664 t = NULL; 665 666 if (tflags & HTML_NLBEFORE) 667 print_endline(h); 668 if (h->col == 0) 669 print_indent(h); 670 else if ((h->flags & HTML_NOSPACE) == 0) { 671 if (h->flags & HTML_KEEP) 672 print_word(h, " "); 673 else { 674 if (h->flags & HTML_PREKEEP) 675 h->flags |= HTML_KEEP; 676 print_endword(h); 677 } 678 } 679 680 if ( ! (h->flags & HTML_NONOSPACE)) 681 h->flags &= ~HTML_NOSPACE; 682 else 683 h->flags |= HTML_NOSPACE; 684 685 /* Print out the tag name and attributes. */ 686 687 print_byte(h, '<'); 688 print_word(h, htmltags[tag].name); 689 690 va_start(ap, fmt); 691 692 while (*fmt != '\0' && *fmt != 's') { 693 694 /* Parse attributes and arguments. */ 695 696 arg1 = va_arg(ap, char *); 697 arg2 = NULL; 698 switch (*fmt++) { 699 case 'c': 700 attr = "class"; 701 break; 702 case 'h': 703 attr = "href"; 704 break; 705 case 'i': 706 attr = "id"; 707 break; 708 case '?': 709 attr = arg1; 710 arg1 = va_arg(ap, char *); 711 break; 712 default: 713 abort(); 714 } 715 if (*fmt == 'M') 716 arg2 = va_arg(ap, char *); 717 if (arg1 == NULL) 718 continue; 719 720 /* Print the attributes. */ 721 722 print_byte(h, ' '); 723 print_word(h, attr); 724 print_byte(h, '='); 725 print_byte(h, '"'); 726 switch (*fmt) { 727 case 'I': 728 print_href(h, arg1, NULL, 0); 729 fmt++; 730 break; 731 case 'M': 732 print_href(h, arg1, arg2, 1); 733 fmt++; 734 break; 735 case 'R': 736 print_byte(h, '#'); 737 print_encode(h, arg1, NULL, 1); 738 fmt++; 739 break; 740 default: 741 print_encode(h, arg1, NULL, 1); 742 break; 743 } 744 print_byte(h, '"'); 745 } 746 747 style_written = 0; 748 while (*fmt++ == 's') { 749 arg1 = va_arg(ap, char *); 750 arg2 = va_arg(ap, char *); 751 if (arg2 == NULL) 752 continue; 753 print_byte(h, ' '); 754 if (style_written == 0) { 755 print_word(h, "style=\""); 756 style_written = 1; 757 } 758 print_word(h, arg1); 759 print_byte(h, ':'); 760 print_byte(h, ' '); 761 print_word(h, arg2); 762 print_byte(h, ';'); 763 } 764 if (style_written) 765 print_byte(h, '"'); 766 767 va_end(ap); 768 769 /* Accommodate for "well-formed" singleton escaping. */ 770 771 if (htmltags[tag].flags & HTML_NOSTACK) 772 print_byte(h, '/'); 773 774 print_byte(h, '>'); 775 776 if (tflags & HTML_NLBEGIN) 777 print_endline(h); 778 else 779 h->flags |= HTML_NOSPACE; 780 781 if (tflags & HTML_INDENT) 782 h->indent++; 783 if (tflags & HTML_NOINDENT) 784 h->noindent++; 785 786 return t; 787 } 788 789 /* 790 * Print an element with an optional "id=" attribute. 791 * If the element has phrasing content and an "id=" attribute, 792 * also add a permalink: outside if it can be in phrasing context, 793 * inside otherwise. 794 */ 795 struct tag * 796 print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr, 797 struct roff_node *n) 798 { 799 struct roff_node *nch; 800 struct tag *ret, *t; 801 char *id, *href; 802 803 ret = NULL; 804 id = href = NULL; 805 if (n->flags & NODE_ID) 806 id = html_make_id(n, 1); 807 if (n->flags & NODE_HREF) 808 href = id == NULL ? html_make_id(n, 2) : id; 809 if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE) 810 ret = print_otag(h, TAG_A, "chR", "permalink", href); 811 t = print_otag(h, elemtype, "ci", cattr, id); 812 if (ret == NULL) { 813 ret = t; 814 if (href != NULL && (nch = n->child) != NULL) { 815 /* man(7) is safe, it tags phrasing content only. */ 816 if (n->tok > MDOC_MAX || 817 htmltags[elemtype].flags & HTML_TOPHRASE) 818 nch = NULL; 819 else /* For mdoc(7), beware of nested blocks. */ 820 while (nch != NULL && nch->type == ROFFT_TEXT) 821 nch = nch->next; 822 if (nch == NULL) 823 print_otag(h, TAG_A, "chR", "permalink", href); 824 } 825 } 826 free(id); 827 if (id == NULL) 828 free(href); 829 return ret; 830 } 831 832 static void 833 print_ctag(struct html *h, struct tag *tag) 834 { 835 int tflags; 836 837 if (tag->closed == 0) { 838 tag->closed = 1; 839 if (tag == h->metaf) 840 h->metaf = NULL; 841 if (tag == h->tblt) 842 h->tblt = NULL; 843 844 tflags = htmltags[tag->tag].flags; 845 if (tflags & HTML_INDENT) 846 h->indent--; 847 if (tflags & HTML_NOINDENT) 848 h->noindent--; 849 if (tflags & HTML_NLEND) 850 print_endline(h); 851 print_indent(h); 852 print_byte(h, '<'); 853 print_byte(h, '/'); 854 print_word(h, htmltags[tag->tag].name); 855 print_byte(h, '>'); 856 if (tflags & HTML_NLAFTER) 857 print_endline(h); 858 } 859 if (tag->refcnt == 0) { 860 h->tag = tag->next; 861 free(tag); 862 } 863 } 864 865 void 866 print_gen_decls(struct html *h) 867 { 868 print_word(h, "<!DOCTYPE html>"); 869 print_endline(h); 870 } 871 872 void 873 print_gen_comment(struct html *h, struct roff_node *n) 874 { 875 int wantblank; 876 877 print_word(h, "<!-- This is an automatically generated file." 878 " Do not edit."); 879 h->indent = 1; 880 wantblank = 0; 881 while (n != NULL && n->type == ROFFT_COMMENT) { 882 if (strstr(n->string, "-->") == NULL && 883 (wantblank || *n->string != '\0')) { 884 print_endline(h); 885 print_indent(h); 886 print_word(h, n->string); 887 wantblank = *n->string != '\0'; 888 } 889 n = n->next; 890 } 891 if (wantblank) 892 print_endline(h); 893 print_word(h, " -->"); 894 print_endline(h); 895 h->indent = 0; 896 } 897 898 void 899 print_text(struct html *h, const char *word) 900 { 901 print_tagged_text(h, word, NULL); 902 } 903 904 void 905 print_tagged_text(struct html *h, const char *word, struct roff_node *n) 906 { 907 struct tag *t; 908 char *href; 909 910 /* 911 * Always wrap text in a paragraph unless already contained in 912 * some flow container; never put it directly into a section. 913 */ 914 915 if (h->tag->tag == TAG_SECTION) 916 print_otag(h, TAG_P, "c", "Pp"); 917 918 /* Output whitespace before this text? */ 919 920 if (h->col && (h->flags & HTML_NOSPACE) == 0) { 921 if ( ! (HTML_KEEP & h->flags)) { 922 if (HTML_PREKEEP & h->flags) 923 h->flags |= HTML_KEEP; 924 print_endword(h); 925 } else 926 print_word(h, " "); 927 } 928 929 /* 930 * Optionally switch fonts, optionally write a permalink, then 931 * print the text, optionally surrounded by HTML whitespace. 932 */ 933 934 assert(h->metaf == NULL); 935 print_metaf(h); 936 print_indent(h); 937 938 if (n != NULL && (href = html_make_id(n, 2)) != NULL) { 939 t = print_otag(h, TAG_A, "chR", "permalink", href); 940 free(href); 941 } else 942 t = NULL; 943 944 if ( ! print_encode(h, word, NULL, 0)) { 945 if ( ! (h->flags & HTML_NONOSPACE)) 946 h->flags &= ~HTML_NOSPACE; 947 h->flags &= ~HTML_NONEWLINE; 948 } else 949 h->flags |= HTML_NOSPACE | HTML_NONEWLINE; 950 951 if (h->metaf != NULL) { 952 print_tagq(h, h->metaf); 953 h->metaf = NULL; 954 } else if (t != NULL) 955 print_tagq(h, t); 956 957 h->flags &= ~HTML_IGNDELIM; 958 } 959 960 void 961 print_tagq(struct html *h, const struct tag *until) 962 { 963 struct tag *this, *next; 964 965 for (this = h->tag; this != NULL; this = next) { 966 next = this == until ? NULL : this->next; 967 print_ctag(h, this); 968 } 969 } 970 971 /* 972 * Close out all open elements up to but excluding suntil. 973 * Note that a paragraph just inside stays open together with it 974 * because paragraphs include subsequent phrasing content. 975 */ 976 void 977 print_stagq(struct html *h, const struct tag *suntil) 978 { 979 struct tag *this, *next; 980 981 for (this = h->tag; this != NULL; this = next) { 982 next = this->next; 983 if (this == suntil || (next == suntil && 984 (this->tag == TAG_P || this->tag == TAG_PRE))) 985 break; 986 print_ctag(h, this); 987 } 988 } 989 990 991 /*********************************************************************** 992 * Low level output functions. 993 * They implement line breaking using a short static buffer. 994 ***********************************************************************/ 995 996 /* 997 * Buffer one HTML output byte. 998 * If the buffer is full, flush and deactivate it and start a new line. 999 * If the buffer is inactive, print directly. 1000 */ 1001 static void 1002 print_byte(struct html *h, char c) 1003 { 1004 if ((h->flags & HTML_BUFFER) == 0) { 1005 putchar(c); 1006 h->col++; 1007 return; 1008 } 1009 1010 if (h->col + h->bufcol < sizeof(h->buf)) { 1011 h->buf[h->bufcol++] = c; 1012 return; 1013 } 1014 1015 putchar('\n'); 1016 h->col = 0; 1017 print_indent(h); 1018 putchar(' '); 1019 putchar(' '); 1020 fwrite(h->buf, h->bufcol, 1, stdout); 1021 putchar(c); 1022 h->col = (h->indent + 1) * 2 + h->bufcol + 1; 1023 h->bufcol = 0; 1024 h->flags &= ~HTML_BUFFER; 1025 } 1026 1027 /* 1028 * If something was printed on the current output line, end it. 1029 * Not to be called right after print_indent(). 1030 */ 1031 void 1032 print_endline(struct html *h) 1033 { 1034 if (h->col == 0) 1035 return; 1036 1037 if (h->bufcol) { 1038 putchar(' '); 1039 fwrite(h->buf, h->bufcol, 1, stdout); 1040 h->bufcol = 0; 1041 } 1042 putchar('\n'); 1043 h->col = 0; 1044 h->flags |= HTML_NOSPACE; 1045 h->flags &= ~HTML_BUFFER; 1046 } 1047 1048 /* 1049 * Flush the HTML output buffer. 1050 * If it is inactive, activate it. 1051 */ 1052 static void 1053 print_endword(struct html *h) 1054 { 1055 if (h->noindent) { 1056 print_byte(h, ' '); 1057 return; 1058 } 1059 1060 if ((h->flags & HTML_BUFFER) == 0) { 1061 h->col++; 1062 h->flags |= HTML_BUFFER; 1063 } else if (h->bufcol) { 1064 putchar(' '); 1065 fwrite(h->buf, h->bufcol, 1, stdout); 1066 h->col += h->bufcol + 1; 1067 } 1068 h->bufcol = 0; 1069 } 1070 1071 /* 1072 * If at the beginning of a new output line, 1073 * perform indentation and mark the line as containing output. 1074 * Make sure to really produce some output right afterwards, 1075 * but do not use print_otag() for producing it. 1076 */ 1077 static void 1078 print_indent(struct html *h) 1079 { 1080 size_t i; 1081 1082 if (h->col || h->noindent) 1083 return; 1084 1085 h->col = h->indent * 2; 1086 for (i = 0; i < h->col; i++) 1087 putchar(' '); 1088 } 1089 1090 /* 1091 * Print or buffer some characters 1092 * depending on the current HTML output buffer state. 1093 */ 1094 static void 1095 print_word(struct html *h, const char *cp) 1096 { 1097 while (*cp != '\0') 1098 print_byte(h, *cp++); 1099 } 1100