1 /* $Id: html.c,v 1.7 2010/04/07 23:15:05 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #include <sys/types.h> 18 19 #include <assert.h> 20 #include <ctype.h> 21 #include <stdarg.h> 22 #include <stdio.h> 23 #include <stdint.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <unistd.h> 27 28 #include "out.h" 29 #include "chars.h" 30 #include "html.h" 31 #include "main.h" 32 33 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a)) 34 35 struct htmldata { 36 const char *name; 37 int flags; 38 #define HTML_CLRLINE (1 << 0) 39 #define HTML_NOSTACK (1 << 1) 40 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */ 41 }; 42 43 static const struct htmldata htmltags[TAG_MAX] = { 44 {"html", HTML_CLRLINE}, /* TAG_HTML */ 45 {"head", HTML_CLRLINE}, /* TAG_HEAD */ 46 {"body", HTML_CLRLINE}, /* TAG_BODY */ 47 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */ 48 {"title", HTML_CLRLINE}, /* TAG_TITLE */ 49 {"div", HTML_CLRLINE}, /* TAG_DIV */ 50 {"h1", 0}, /* TAG_H1 */ 51 {"h2", 0}, /* TAG_H2 */ 52 {"span", 0}, /* TAG_SPAN */ 53 {"link", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */ 54 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */ 55 {"a", 0}, /* TAG_A */ 56 {"table", HTML_CLRLINE}, /* TAG_TABLE */ 57 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */ 58 {"tr", HTML_CLRLINE}, /* TAG_TR */ 59 {"td", HTML_CLRLINE}, /* TAG_TD */ 60 {"li", HTML_CLRLINE}, /* TAG_LI */ 61 {"ul", HTML_CLRLINE}, /* TAG_UL */ 62 {"ol", HTML_CLRLINE}, /* TAG_OL */ 63 }; 64 65 static const char *const htmlfonts[HTMLFONT_MAX] = { 66 "roman", 67 "bold", 68 "italic" 69 }; 70 71 static const char *const htmlattrs[ATTR_MAX] = { 72 "http-equiv", 73 "content", 74 "name", 75 "rel", 76 "href", 77 "type", 78 "media", 79 "class", 80 "style", 81 "width", 82 "valign", 83 "target", 84 "id", 85 "summary", 86 }; 87 88 static void print_spec(struct html *, const char *, size_t); 89 static void print_res(struct html *, const char *, size_t); 90 static void print_ctag(struct html *, enum htmltag); 91 static void print_doctype(struct html *); 92 static void print_xmltype(struct html *); 93 static int print_encode(struct html *, const char *, int); 94 static void print_metaf(struct html *, enum roffdeco); 95 static void print_attr(struct html *, 96 const char *, const char *); 97 static void *ml_alloc(char *, enum htmltype); 98 99 100 static void * 101 ml_alloc(char *outopts, enum htmltype type) 102 { 103 struct html *h; 104 const char *toks[4]; 105 char *v; 106 107 toks[0] = "style"; 108 toks[1] = "man"; 109 toks[2] = "includes"; 110 toks[3] = NULL; 111 112 h = calloc(1, sizeof(struct html)); 113 if (NULL == h) { 114 perror(NULL); 115 exit(EXIT_FAILURE); 116 } 117 118 h->type = type; 119 h->tags.head = NULL; 120 h->ords.head = NULL; 121 h->symtab = chars_init(CHARS_HTML); 122 123 while (outopts && *outopts) 124 switch (getsubopt(&outopts, UNCONST(toks), &v)) { 125 case (0): 126 h->style = v; 127 break; 128 case (1): 129 h->base_man = v; 130 break; 131 case (2): 132 h->base_includes = v; 133 break; 134 default: 135 break; 136 } 137 138 return(h); 139 } 140 141 void * 142 html_alloc(char *outopts) 143 { 144 145 return(ml_alloc(outopts, HTML_HTML_4_01_STRICT)); 146 } 147 148 149 void * 150 xhtml_alloc(char *outopts) 151 { 152 153 return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT)); 154 } 155 156 157 void 158 html_free(void *p) 159 { 160 struct tag *tag; 161 struct ord *ord; 162 struct html *h; 163 164 h = (struct html *)p; 165 166 while ((ord = h->ords.head) != NULL) { 167 h->ords.head = ord->next; 168 free(ord); 169 } 170 171 while ((tag = h->tags.head) != NULL) { 172 h->tags.head = tag->next; 173 free(tag); 174 } 175 176 if (h->symtab) 177 chars_free(h->symtab); 178 179 free(h); 180 } 181 182 183 void 184 print_gen_head(struct html *h) 185 { 186 struct htmlpair tag[4]; 187 188 tag[0].key = ATTR_HTTPEQUIV; 189 tag[0].val = "Content-Type"; 190 tag[1].key = ATTR_CONTENT; 191 tag[1].val = "text/html; charset=utf-8"; 192 print_otag(h, TAG_META, 2, tag); 193 194 tag[0].key = ATTR_NAME; 195 tag[0].val = "resource-type"; 196 tag[1].key = ATTR_CONTENT; 197 tag[1].val = "document"; 198 print_otag(h, TAG_META, 2, tag); 199 200 if (h->style) { 201 tag[0].key = ATTR_REL; 202 tag[0].val = "stylesheet"; 203 tag[1].key = ATTR_HREF; 204 tag[1].val = h->style; 205 tag[2].key = ATTR_TYPE; 206 tag[2].val = "text/css"; 207 tag[3].key = ATTR_MEDIA; 208 tag[3].val = "all"; 209 print_otag(h, TAG_LINK, 4, tag); 210 } 211 } 212 213 214 static void 215 print_spec(struct html *h, const char *p, size_t len) 216 { 217 const char *rhs; 218 size_t sz; 219 220 rhs = chars_a2ascii(h->symtab, p, len, &sz); 221 222 if (NULL == rhs) 223 return; 224 fwrite(rhs, 1, sz, stdout); 225 } 226 227 228 static void 229 print_res(struct html *h, const char *p, size_t len) 230 { 231 const char *rhs; 232 size_t sz; 233 234 rhs = chars_a2res(h->symtab, p, len, &sz); 235 236 if (NULL == rhs) 237 return; 238 fwrite(rhs, 1, sz, stdout); 239 } 240 241 242 struct tag * 243 print_ofont(struct html *h, enum htmlfont font) 244 { 245 struct htmlpair tag; 246 247 h->metal = h->metac; 248 h->metac = font; 249 250 /* FIXME: DECO_ROMAN should just close out preexisting. */ 251 252 if (h->metaf && h->tags.head == h->metaf) 253 print_tagq(h, h->metaf); 254 255 PAIR_CLASS_INIT(&tag, htmlfonts[font]); 256 h->metaf = print_otag(h, TAG_SPAN, 1, &tag); 257 return(h->metaf); 258 } 259 260 261 static void 262 print_metaf(struct html *h, enum roffdeco deco) 263 { 264 enum htmlfont font; 265 266 switch (deco) { 267 case (DECO_PREVIOUS): 268 font = h->metal; 269 break; 270 case (DECO_ITALIC): 271 font = HTMLFONT_ITALIC; 272 break; 273 case (DECO_BOLD): 274 font = HTMLFONT_BOLD; 275 break; 276 case (DECO_ROMAN): 277 font = HTMLFONT_NONE; 278 break; 279 default: 280 abort(); 281 /* NOTREACHED */ 282 } 283 284 (void)print_ofont(h, font); 285 } 286 287 288 static int 289 print_encode(struct html *h, const char *p, int norecurse) 290 { 291 size_t sz; 292 int len, nospace; 293 const char *seq; 294 enum roffdeco deco; 295 296 nospace = 0; 297 298 for (; *p; p++) { 299 sz = strcspn(p, "\\<>&"); 300 301 fwrite(p, 1, sz, stdout); 302 p += /* LINTED */ 303 sz; 304 305 if ('<' == *p) { 306 printf("<"); 307 continue; 308 } else if ('>' == *p) { 309 printf(">"); 310 continue; 311 } else if ('&' == *p) { 312 printf("&"); 313 continue; 314 } else if ('\0' == *p) 315 break; 316 317 seq = ++p; 318 len = a2roffdeco(&deco, &seq, &sz); 319 320 switch (deco) { 321 case (DECO_RESERVED): 322 print_res(h, seq, sz); 323 break; 324 case (DECO_SPECIAL): 325 print_spec(h, seq, sz); 326 break; 327 case (DECO_PREVIOUS): 328 /* FALLTHROUGH */ 329 case (DECO_BOLD): 330 /* FALLTHROUGH */ 331 case (DECO_ITALIC): 332 /* FALLTHROUGH */ 333 case (DECO_ROMAN): 334 if (norecurse) 335 break; 336 print_metaf(h, deco); 337 break; 338 default: 339 break; 340 } 341 342 p += len - 1; 343 344 if (DECO_NOSPACE == deco && '\0' == *(p + 1)) 345 nospace = 1; 346 } 347 348 return(nospace); 349 } 350 351 352 static void 353 print_attr(struct html *h, const char *key, const char *val) 354 { 355 printf(" %s=\"", key); 356 (void)print_encode(h, val, 1); 357 putchar('\"'); 358 } 359 360 361 struct tag * 362 print_otag(struct html *h, enum htmltag tag, 363 int sz, const struct htmlpair *p) 364 { 365 int i; 366 struct tag *t; 367 368 /* Push this tags onto the stack of open scopes. */ 369 370 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) { 371 t = malloc(sizeof(struct tag)); 372 if (NULL == t) { 373 perror(NULL); 374 exit(EXIT_FAILURE); 375 } 376 t->tag = tag; 377 t->next = h->tags.head; 378 h->tags.head = t; 379 } else 380 t = NULL; 381 382 if ( ! (HTML_NOSPACE & h->flags)) 383 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) 384 putchar(' '); 385 386 /* Print out the tag name and attributes. */ 387 388 printf("<%s", htmltags[tag].name); 389 for (i = 0; i < sz; i++) 390 print_attr(h, htmlattrs[p[i].key], p[i].val); 391 392 /* Add non-overridable attributes. */ 393 394 if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) { 395 print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml"); 396 print_attr(h, "xml:lang", "en"); 397 print_attr(h, "lang", "en"); 398 } 399 400 /* Accomodate for XML "well-formed" singleton escaping. */ 401 402 if (HTML_AUTOCLOSE & htmltags[tag].flags) 403 switch (h->type) { 404 case (HTML_XHTML_1_0_STRICT): 405 putchar('/'); 406 break; 407 default: 408 break; 409 } 410 411 putchar('>'); 412 413 h->flags |= HTML_NOSPACE; 414 return(t); 415 } 416 417 418 static void 419 print_ctag(struct html *h, enum htmltag tag) 420 { 421 422 printf("</%s>", htmltags[tag].name); 423 if (HTML_CLRLINE & htmltags[tag].flags) { 424 h->flags |= HTML_NOSPACE; 425 putchar('\n'); 426 } 427 } 428 429 430 void 431 print_gen_decls(struct html *h) 432 { 433 434 print_xmltype(h); 435 print_doctype(h); 436 } 437 438 439 static void 440 print_xmltype(struct html *h) 441 { 442 const char *decl; 443 444 switch (h->type) { 445 case (HTML_XHTML_1_0_STRICT): 446 decl = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; 447 break; 448 default: 449 decl = NULL; 450 break; 451 } 452 453 if (NULL == decl) 454 return; 455 456 printf("%s\n", decl); 457 } 458 459 460 static void 461 print_doctype(struct html *h) 462 { 463 const char *doctype; 464 const char *dtd; 465 const char *name; 466 467 switch (h->type) { 468 case (HTML_HTML_4_01_STRICT): 469 name = "HTML"; 470 doctype = "-//W3C//DTD HTML 4.01//EN"; 471 dtd = "http://www.w3.org/TR/html4/strict.dtd"; 472 break; 473 default: 474 name = "html"; 475 doctype = "-//W3C//DTD XHTML 1.0 Strict//EN"; 476 dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; 477 break; 478 } 479 480 printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n", 481 name, doctype, dtd); 482 } 483 484 485 void 486 print_text(struct html *h, const char *p) 487 { 488 489 if (*p && 0 == *(p + 1)) 490 switch (*p) { 491 case('.'): 492 /* FALLTHROUGH */ 493 case(','): 494 /* FALLTHROUGH */ 495 case(';'): 496 /* FALLTHROUGH */ 497 case(':'): 498 /* FALLTHROUGH */ 499 case('?'): 500 /* FALLTHROUGH */ 501 case('!'): 502 /* FALLTHROUGH */ 503 case(')'): 504 /* FALLTHROUGH */ 505 case(']'): 506 if ( ! (HTML_IGNDELIM & h->flags)) 507 h->flags |= HTML_NOSPACE; 508 break; 509 default: 510 break; 511 } 512 513 if ( ! (h->flags & HTML_NOSPACE)) 514 putchar(' '); 515 516 assert(p); 517 if ( ! print_encode(h, p, 0)) 518 h->flags &= ~HTML_NOSPACE; 519 520 if (*p && 0 == *(p + 1)) 521 switch (*p) { 522 case('('): 523 /* FALLTHROUGH */ 524 case('['): 525 h->flags |= HTML_NOSPACE; 526 break; 527 default: 528 break; 529 } 530 } 531 532 533 void 534 print_tagq(struct html *h, const struct tag *until) 535 { 536 struct tag *tag; 537 538 while ((tag = h->tags.head) != NULL) { 539 if (tag == h->metaf) 540 h->metaf = NULL; 541 print_ctag(h, tag->tag); 542 h->tags.head = tag->next; 543 free(tag); 544 if (until && tag == until) 545 return; 546 } 547 } 548 549 550 void 551 print_stagq(struct html *h, const struct tag *suntil) 552 { 553 struct tag *tag; 554 555 while ((tag = h->tags.head) != NULL) { 556 if (suntil && tag == suntil) 557 return; 558 if (tag == h->metaf) 559 h->metaf = NULL; 560 print_ctag(h, tag->tag); 561 h->tags.head = tag->next; 562 free(tag); 563 } 564 } 565 566 567 void 568 bufinit(struct html *h) 569 { 570 571 h->buf[0] = '\0'; 572 h->buflen = 0; 573 } 574 575 576 void 577 bufcat_style(struct html *h, const char *key, const char *val) 578 { 579 580 bufcat(h, key); 581 bufncat(h, ":", 1); 582 bufcat(h, val); 583 bufncat(h, ";", 1); 584 } 585 586 587 void 588 bufcat(struct html *h, const char *p) 589 { 590 591 bufncat(h, p, strlen(p)); 592 } 593 594 595 void 596 buffmt(struct html *h, const char *fmt, ...) 597 { 598 va_list ap; 599 600 va_start(ap, fmt); 601 (void)vsnprintf(h->buf + (int)h->buflen, 602 BUFSIZ - h->buflen - 1, fmt, ap); 603 va_end(ap); 604 h->buflen = strlen(h->buf); 605 } 606 607 608 void 609 bufncat(struct html *h, const char *p, size_t sz) 610 { 611 612 if (h->buflen + sz > BUFSIZ - 1) 613 sz = BUFSIZ - 1 - h->buflen; 614 615 (void)strncat(h->buf, p, sz); 616 h->buflen += sz; 617 } 618 619 620 void 621 buffmt_includes(struct html *h, const char *name) 622 { 623 const char *p, *pp; 624 625 pp = h->base_includes; 626 627 while (NULL != (p = strchr(pp, '%'))) { 628 bufncat(h, pp, (size_t)(p - pp)); 629 switch (*(p + 1)) { 630 case('I'): 631 bufcat(h, name); 632 break; 633 default: 634 bufncat(h, p, 2); 635 break; 636 } 637 pp = p + 2; 638 } 639 if (pp) 640 bufcat(h, pp); 641 } 642 643 644 void 645 buffmt_man(struct html *h, 646 const char *name, const char *sec) 647 { 648 const char *p, *pp; 649 650 pp = h->base_man; 651 652 /* LINTED */ 653 while (NULL != (p = strchr(pp, '%'))) { 654 bufncat(h, pp, (size_t)(p - pp)); 655 switch (*(p + 1)) { 656 case('S'): 657 bufcat(h, sec ? sec : "1"); 658 break; 659 case('N'): 660 buffmt(h, name); 661 break; 662 default: 663 bufncat(h, p, 2); 664 break; 665 } 666 pp = p + 2; 667 } 668 if (pp) 669 bufcat(h, pp); 670 } 671 672 673 void 674 bufcat_su(struct html *h, const char *p, const struct roffsu *su) 675 { 676 double v; 677 const char *u; 678 679 v = su->scale; 680 681 switch (su->unit) { 682 case (SCALE_CM): 683 u = "cm"; 684 break; 685 case (SCALE_IN): 686 u = "in"; 687 break; 688 case (SCALE_PC): 689 u = "pc"; 690 break; 691 case (SCALE_PT): 692 u = "pt"; 693 break; 694 case (SCALE_EM): 695 u = "em"; 696 break; 697 case (SCALE_MM): 698 if (0 == (v /= 100)) 699 v = 1; 700 u = "em"; 701 break; 702 case (SCALE_EN): 703 u = "ex"; 704 break; 705 case (SCALE_BU): 706 u = "ex"; 707 break; 708 case (SCALE_VS): 709 u = "em"; 710 break; 711 default: 712 u = "ex"; 713 break; 714 } 715 716 if (su->pt) 717 buffmt(h, "%s: %f%s;", p, v, u); 718 else 719 /* LINTED */ 720 buffmt(h, "%s: %d%s;", p, (int)v, u); 721 } 722 723 724 void 725 html_idcat(char *dst, const char *src, int sz) 726 { 727 int ssz; 728 729 assert(sz); 730 731 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */ 732 733 for ( ; *dst != '\0' && sz; dst++, sz--) 734 /* Jump to end. */ ; 735 736 assert(sz > 2); 737 738 /* We can't start with a number (bah). */ 739 740 *dst++ = 'x'; 741 *dst = '\0'; 742 sz--; 743 744 for ( ; *src != '\0' && sz > 1; src++) { 745 ssz = snprintf(dst, (size_t)sz, "%.2x", *src); 746 sz -= ssz; 747 dst += ssz; 748 } 749 } 750