1 /* $Vendor-Id: html.c,v 1.110 2010/07/26 22:26:05 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #ifdef HAVE_CONFIG_H 18 #include "config.h" 19 #endif 20 21 #include <sys/types.h> 22 23 #include <assert.h> 24 #include <ctype.h> 25 #include <stdarg.h> 26 #include <stdio.h> 27 #include <stdint.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include "mandoc.h" 33 #include "out.h" 34 #include "chars.h" 35 #include "html.h" 36 #include "main.h" 37 38 struct htmldata { 39 const char *name; 40 int flags; 41 #define HTML_CLRLINE (1 << 0) 42 #define HTML_NOSTACK (1 << 1) 43 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */ 44 }; 45 46 static const struct htmldata htmltags[TAG_MAX] = { 47 {"html", HTML_CLRLINE}, /* TAG_HTML */ 48 {"head", HTML_CLRLINE}, /* TAG_HEAD */ 49 {"body", HTML_CLRLINE}, /* TAG_BODY */ 50 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */ 51 {"title", HTML_CLRLINE}, /* TAG_TITLE */ 52 {"div", HTML_CLRLINE}, /* TAG_DIV */ 53 {"h1", 0}, /* TAG_H1 */ 54 {"h2", 0}, /* TAG_H2 */ 55 {"span", 0}, /* TAG_SPAN */ 56 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */ 57 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */ 58 {"a", 0}, /* TAG_A */ 59 {"table", HTML_CLRLINE}, /* TAG_TABLE */ 60 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */ 61 {"tr", HTML_CLRLINE}, /* TAG_TR */ 62 {"td", HTML_CLRLINE}, /* TAG_TD */ 63 {"li", HTML_CLRLINE}, /* TAG_LI */ 64 {"ul", HTML_CLRLINE}, /* TAG_UL */ 65 {"ol", HTML_CLRLINE}, /* TAG_OL */ 66 }; 67 68 static const char *const htmlfonts[HTMLFONT_MAX] = { 69 "roman", 70 "bold", 71 "italic" 72 }; 73 74 static const char *const htmlattrs[ATTR_MAX] = { 75 "http-equiv", 76 "content", 77 "name", 78 "rel", 79 "href", 80 "type", 81 "media", 82 "class", 83 "style", 84 "width", 85 "valign", 86 "target", 87 "id", 88 "summary", 89 }; 90 91 static void print_spec(struct html *, enum roffdeco, 92 const char *, size_t); 93 static void print_res(struct html *, const char *, size_t); 94 static void print_ctag(struct html *, enum htmltag); 95 static void print_doctype(struct html *); 96 static void print_xmltype(struct html *); 97 static int print_encode(struct html *, const char *, int); 98 static void print_metaf(struct html *, enum roffdeco); 99 static void print_attr(struct html *, 100 const char *, const char *); 101 static void *ml_alloc(char *, enum htmltype); 102 103 104 static void * 105 ml_alloc(char *outopts, enum htmltype type) 106 { 107 struct html *h; 108 const char *toks[4]; 109 char *v; 110 111 toks[0] = "style"; 112 toks[1] = "man"; 113 toks[2] = "includes"; 114 toks[3] = NULL; 115 116 h = calloc(1, sizeof(struct html)); 117 if (NULL == h) { 118 perror(NULL); 119 exit(EXIT_FAILURE); 120 } 121 122 h->type = type; 123 h->tags.head = NULL; 124 h->ords.head = NULL; 125 h->symtab = chars_init(CHARS_HTML); 126 127 while (outopts && *outopts) 128 switch (getsubopt(&outopts, UNCONST(toks), &v)) { 129 case (0): 130 h->style = v; 131 break; 132 case (1): 133 h->base_man = v; 134 break; 135 case (2): 136 h->base_includes = v; 137 break; 138 default: 139 break; 140 } 141 142 return(h); 143 } 144 145 void * 146 html_alloc(char *outopts) 147 { 148 149 return(ml_alloc(outopts, HTML_HTML_4_01_STRICT)); 150 } 151 152 153 void * 154 xhtml_alloc(char *outopts) 155 { 156 157 return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT)); 158 } 159 160 161 void 162 html_free(void *p) 163 { 164 struct tag *tag; 165 struct ord *ord; 166 struct html *h; 167 168 h = (struct html *)p; 169 170 while ((ord = h->ords.head) != NULL) { 171 h->ords.head = ord->next; 172 free(ord); 173 } 174 175 while ((tag = h->tags.head) != NULL) { 176 h->tags.head = tag->next; 177 free(tag); 178 } 179 180 if (h->symtab) 181 chars_free(h->symtab); 182 183 free(h); 184 } 185 186 187 void 188 print_gen_head(struct html *h) 189 { 190 struct htmlpair tag[4]; 191 192 tag[0].key = ATTR_HTTPEQUIV; 193 tag[0].val = "Content-Type"; 194 tag[1].key = ATTR_CONTENT; 195 tag[1].val = "text/html; charset=utf-8"; 196 print_otag(h, TAG_META, 2, tag); 197 198 tag[0].key = ATTR_NAME; 199 tag[0].val = "resource-type"; 200 tag[1].key = ATTR_CONTENT; 201 tag[1].val = "document"; 202 print_otag(h, TAG_META, 2, tag); 203 204 if (h->style) { 205 tag[0].key = ATTR_REL; 206 tag[0].val = "stylesheet"; 207 tag[1].key = ATTR_HREF; 208 tag[1].val = h->style; 209 tag[2].key = ATTR_TYPE; 210 tag[2].val = "text/css"; 211 tag[3].key = ATTR_MEDIA; 212 tag[3].val = "all"; 213 print_otag(h, TAG_LINK, 4, tag); 214 } 215 } 216 217 218 static void 219 print_spec(struct html *h, enum roffdeco d, const char *p, size_t len) 220 { 221 int cp; 222 const char *rhs; 223 size_t sz; 224 225 if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) { 226 printf("&#%d;", cp); 227 return; 228 } else if (-1 == cp && DECO_SSPECIAL == d) { 229 fwrite(p, 1, len, stdout); 230 return; 231 } else if (-1 == cp) 232 return; 233 234 if (NULL != (rhs = chars_spec2str(h->symtab, p, len, &sz))) 235 fwrite(rhs, 1, sz, stdout); 236 } 237 238 239 static void 240 print_res(struct html *h, const char *p, size_t len) 241 { 242 int cp; 243 const char *rhs; 244 size_t sz; 245 246 if ((cp = chars_res2cp(h->symtab, p, len)) > 0) { 247 printf("&#%d;", cp); 248 return; 249 } else if (-1 == cp) 250 return; 251 252 if (NULL != (rhs = chars_res2str(h->symtab, p, len, &sz))) 253 fwrite(rhs, 1, sz, stdout); 254 } 255 256 257 struct tag * 258 print_ofont(struct html *h, enum htmlfont font) 259 { 260 struct htmlpair tag; 261 262 h->metal = h->metac; 263 h->metac = font; 264 265 /* FIXME: DECO_ROMAN should just close out preexisting. */ 266 267 if (h->metaf && h->tags.head == h->metaf) 268 print_tagq(h, h->metaf); 269 270 PAIR_CLASS_INIT(&tag, htmlfonts[font]); 271 h->metaf = print_otag(h, TAG_SPAN, 1, &tag); 272 return(h->metaf); 273 } 274 275 276 static void 277 print_metaf(struct html *h, enum roffdeco deco) 278 { 279 enum htmlfont font; 280 281 switch (deco) { 282 case (DECO_PREVIOUS): 283 font = h->metal; 284 break; 285 case (DECO_ITALIC): 286 font = HTMLFONT_ITALIC; 287 break; 288 case (DECO_BOLD): 289 font = HTMLFONT_BOLD; 290 break; 291 case (DECO_ROMAN): 292 font = HTMLFONT_NONE; 293 break; 294 default: 295 abort(); 296 /* NOTREACHED */ 297 } 298 299 (void)print_ofont(h, font); 300 } 301 302 303 static int 304 print_encode(struct html *h, const char *p, int norecurse) 305 { 306 size_t sz; 307 int len, nospace; 308 const char *seq; 309 enum roffdeco deco; 310 static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' }; 311 312 nospace = 0; 313 314 for (; *p; p++) { 315 sz = strcspn(p, rejs); 316 317 fwrite(p, 1, sz, stdout); 318 p += /* LINTED */ 319 sz; 320 321 if ('<' == *p) { 322 printf("<"); 323 continue; 324 } else if ('>' == *p) { 325 printf(">"); 326 continue; 327 } else if ('&' == *p) { 328 printf("&"); 329 continue; 330 } else if (ASCII_HYPH == *p) { 331 /* 332 * Note: "soft hyphens" aren't graphically 333 * displayed when not breaking the text; we want 334 * them to be displayed. 335 */ 336 /*printf("­");*/ 337 putchar('-'); 338 continue; 339 } else if ('\0' == *p) 340 break; 341 342 seq = ++p; 343 len = a2roffdeco(&deco, &seq, &sz); 344 345 switch (deco) { 346 case (DECO_RESERVED): 347 print_res(h, seq, sz); 348 break; 349 case (DECO_SSPECIAL): 350 /* FALLTHROUGH */ 351 case (DECO_SPECIAL): 352 print_spec(h, deco, seq, sz); 353 break; 354 case (DECO_PREVIOUS): 355 /* FALLTHROUGH */ 356 case (DECO_BOLD): 357 /* FALLTHROUGH */ 358 case (DECO_ITALIC): 359 /* FALLTHROUGH */ 360 case (DECO_ROMAN): 361 if (norecurse) 362 break; 363 print_metaf(h, deco); 364 break; 365 default: 366 break; 367 } 368 369 p += len - 1; 370 371 if (DECO_NOSPACE == deco && '\0' == *(p + 1)) 372 nospace = 1; 373 } 374 375 return(nospace); 376 } 377 378 379 static void 380 print_attr(struct html *h, const char *key, const char *val) 381 { 382 printf(" %s=\"", key); 383 (void)print_encode(h, val, 1); 384 putchar('\"'); 385 } 386 387 388 struct tag * 389 print_otag(struct html *h, enum htmltag tag, 390 int sz, const struct htmlpair *p) 391 { 392 int i; 393 struct tag *t; 394 395 /* Push this tags onto the stack of open scopes. */ 396 397 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) { 398 t = malloc(sizeof(struct tag)); 399 if (NULL == t) { 400 perror(NULL); 401 exit(EXIT_FAILURE); 402 } 403 t->tag = tag; 404 t->next = h->tags.head; 405 h->tags.head = t; 406 } else 407 t = NULL; 408 409 if ( ! (HTML_NOSPACE & h->flags)) 410 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) { 411 /* Manage keeps! */ 412 if ( ! (HTML_KEEP & h->flags)) { 413 if (HTML_PREKEEP & h->flags) 414 h->flags |= HTML_KEEP; 415 putchar(' '); 416 } else 417 printf(" "); 418 } 419 420 if ( ! (h->flags & HTML_NONOSPACE)) 421 h->flags &= ~HTML_NOSPACE; 422 else 423 h->flags |= HTML_NOSPACE; 424 425 /* Print out the tag name and attributes. */ 426 427 printf("<%s", htmltags[tag].name); 428 for (i = 0; i < sz; i++) 429 print_attr(h, htmlattrs[p[i].key], p[i].val); 430 431 /* Add non-overridable attributes. */ 432 433 if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) { 434 print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml"); 435 print_attr(h, "xml:lang", "en"); 436 print_attr(h, "lang", "en"); 437 } 438 439 /* Accomodate for XML "well-formed" singleton escaping. */ 440 441 if (HTML_AUTOCLOSE & htmltags[tag].flags) 442 switch (h->type) { 443 case (HTML_XHTML_1_0_STRICT): 444 putchar('/'); 445 break; 446 default: 447 break; 448 } 449 450 putchar('>'); 451 452 h->flags |= HTML_NOSPACE; 453 return(t); 454 } 455 456 457 static void 458 print_ctag(struct html *h, enum htmltag tag) 459 { 460 461 printf("</%s>", htmltags[tag].name); 462 if (HTML_CLRLINE & htmltags[tag].flags) { 463 h->flags |= HTML_NOSPACE; 464 putchar('\n'); 465 } 466 } 467 468 469 void 470 print_gen_decls(struct html *h) 471 { 472 473 print_xmltype(h); 474 print_doctype(h); 475 } 476 477 478 static void 479 print_xmltype(struct html *h) 480 { 481 482 if (HTML_XHTML_1_0_STRICT == h->type) 483 printf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); 484 } 485 486 487 static void 488 print_doctype(struct html *h) 489 { 490 const char *doctype; 491 const char *dtd; 492 const char *name; 493 494 switch (h->type) { 495 case (HTML_HTML_4_01_STRICT): 496 name = "HTML"; 497 doctype = "-//W3C//DTD HTML 4.01//EN"; 498 dtd = "http://www.w3.org/TR/html4/strict.dtd"; 499 break; 500 default: 501 name = "html"; 502 doctype = "-//W3C//DTD XHTML 1.0 Strict//EN"; 503 dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; 504 break; 505 } 506 507 printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n", 508 name, doctype, dtd); 509 } 510 511 512 void 513 print_text(struct html *h, const char *word) 514 { 515 516 if (word[0] && '\0' == word[1]) 517 switch (word[0]) { 518 case('.'): 519 /* FALLTHROUGH */ 520 case(','): 521 /* FALLTHROUGH */ 522 case(';'): 523 /* FALLTHROUGH */ 524 case(':'): 525 /* FALLTHROUGH */ 526 case('?'): 527 /* FALLTHROUGH */ 528 case('!'): 529 /* FALLTHROUGH */ 530 case(')'): 531 /* FALLTHROUGH */ 532 case(']'): 533 if ( ! (HTML_IGNDELIM & h->flags)) 534 h->flags |= HTML_NOSPACE; 535 break; 536 default: 537 break; 538 } 539 540 if ( ! (HTML_NOSPACE & h->flags)) { 541 /* Manage keeps! */ 542 if ( ! (HTML_KEEP & h->flags)) { 543 if (HTML_PREKEEP & h->flags) 544 h->flags |= HTML_KEEP; 545 putchar(' '); 546 } else 547 printf(" "); 548 } 549 550 assert(word); 551 if ( ! print_encode(h, word, 0)) 552 if ( ! (h->flags & HTML_NONOSPACE)) 553 h->flags &= ~HTML_NOSPACE; 554 555 /* 556 * Note that we don't process the pipe: the parser sees it as 557 * punctuation, but we don't in terms of typography. 558 */ 559 if (word[0] && '\0' == word[1]) 560 switch (word[0]) { 561 case('('): 562 /* FALLTHROUGH */ 563 case('['): 564 h->flags |= HTML_NOSPACE; 565 break; 566 default: 567 break; 568 } 569 } 570 571 572 void 573 print_tagq(struct html *h, const struct tag *until) 574 { 575 struct tag *tag; 576 577 while ((tag = h->tags.head) != NULL) { 578 if (tag == h->metaf) 579 h->metaf = NULL; 580 print_ctag(h, tag->tag); 581 h->tags.head = tag->next; 582 free(tag); 583 if (until && tag == until) 584 return; 585 } 586 } 587 588 589 void 590 print_stagq(struct html *h, const struct tag *suntil) 591 { 592 struct tag *tag; 593 594 while ((tag = h->tags.head) != NULL) { 595 if (suntil && tag == suntil) 596 return; 597 if (tag == h->metaf) 598 h->metaf = NULL; 599 print_ctag(h, tag->tag); 600 h->tags.head = tag->next; 601 free(tag); 602 } 603 } 604 605 606 void 607 bufinit(struct html *h) 608 { 609 610 h->buf[0] = '\0'; 611 h->buflen = 0; 612 } 613 614 615 void 616 bufcat_style(struct html *h, const char *key, const char *val) 617 { 618 619 bufcat(h, key); 620 bufncat(h, ":", 1); 621 bufcat(h, val); 622 bufncat(h, ";", 1); 623 } 624 625 626 void 627 bufcat(struct html *h, const char *p) 628 { 629 630 bufncat(h, p, strlen(p)); 631 } 632 633 634 void 635 buffmt(struct html *h, const char *fmt, ...) 636 { 637 va_list ap; 638 639 va_start(ap, fmt); 640 (void)vsnprintf(h->buf + (int)h->buflen, 641 BUFSIZ - h->buflen - 1, fmt, ap); 642 va_end(ap); 643 h->buflen = strlen(h->buf); 644 } 645 646 647 void 648 bufncat(struct html *h, const char *p, size_t sz) 649 { 650 651 if (h->buflen + sz > BUFSIZ - 1) 652 sz = BUFSIZ - 1 - h->buflen; 653 654 (void)strncat(h->buf, p, sz); 655 h->buflen += sz; 656 } 657 658 659 void 660 buffmt_includes(struct html *h, const char *name) 661 { 662 const char *p, *pp; 663 664 pp = h->base_includes; 665 666 while (NULL != (p = strchr(pp, '%'))) { 667 bufncat(h, pp, (size_t)(p - pp)); 668 switch (*(p + 1)) { 669 case('I'): 670 bufcat(h, name); 671 break; 672 default: 673 bufncat(h, p, 2); 674 break; 675 } 676 pp = p + 2; 677 } 678 if (pp) 679 bufcat(h, pp); 680 } 681 682 683 void 684 buffmt_man(struct html *h, 685 const char *name, const char *sec) 686 { 687 const char *p, *pp; 688 689 pp = h->base_man; 690 691 /* LINTED */ 692 while (NULL != (p = strchr(pp, '%'))) { 693 bufncat(h, pp, (size_t)(p - pp)); 694 switch (*(p + 1)) { 695 case('S'): 696 bufcat(h, sec ? sec : "1"); 697 break; 698 case('N'): 699 buffmt(h, name); 700 break; 701 default: 702 bufncat(h, p, 2); 703 break; 704 } 705 pp = p + 2; 706 } 707 if (pp) 708 bufcat(h, pp); 709 } 710 711 712 void 713 bufcat_su(struct html *h, const char *p, const struct roffsu *su) 714 { 715 double v; 716 const char *u; 717 718 v = su->scale; 719 720 switch (su->unit) { 721 case (SCALE_CM): 722 u = "cm"; 723 break; 724 case (SCALE_IN): 725 u = "in"; 726 break; 727 case (SCALE_PC): 728 u = "pc"; 729 break; 730 case (SCALE_PT): 731 u = "pt"; 732 break; 733 case (SCALE_EM): 734 u = "em"; 735 break; 736 case (SCALE_MM): 737 if (0 == (v /= 100)) 738 v = 1; 739 u = "em"; 740 break; 741 case (SCALE_EN): 742 u = "ex"; 743 break; 744 case (SCALE_BU): 745 u = "ex"; 746 break; 747 case (SCALE_VS): 748 u = "em"; 749 break; 750 default: 751 u = "ex"; 752 break; 753 } 754 755 /* 756 * XXX: the CSS spec isn't clear as to which types accept 757 * integer or real numbers, so we just make them all decimals. 758 */ 759 buffmt(h, "%s: %.2f%s;", p, v, u); 760 } 761 762 763 void 764 html_idcat(char *dst, const char *src, int sz) 765 { 766 int ssz; 767 768 assert(sz); 769 770 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */ 771 772 for ( ; *dst != '\0' && sz; dst++, sz--) 773 /* Jump to end. */ ; 774 775 assert(sz > 2); 776 777 /* We can't start with a number (bah). */ 778 779 *dst++ = 'x'; 780 *dst = '\0'; 781 sz--; 782 783 for ( ; *src != '\0' && sz > 1; src++) { 784 ssz = snprintf(dst, (size_t)sz, "%.2x", *src); 785 sz -= ssz; 786 dst += ssz; 787 } 788 } 789