1 /* $Id: html.c,v 1.2 2009/10/27 21:40:07 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #include <sys/types.h> 18 19 #include <assert.h> 20 #include <err.h> 21 #include <stdio.h> 22 #include <stdarg.h> 23 #include <stdint.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <unistd.h> 27 28 #include "out.h" 29 #include "chars.h" 30 #include "html.h" 31 #include "main.h" 32 33 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a)) 34 35 #define DOCTYPE "-//W3C//DTD HTML 4.01//EN" 36 #define DTD "http://www.w3.org/TR/html4/strict.dtd" 37 38 struct htmldata { 39 const char *name; 40 int flags; 41 #define HTML_CLRLINE (1 << 0) 42 #define HTML_NOSTACK (1 << 1) 43 }; 44 45 static const struct htmldata htmltags[TAG_MAX] = { 46 {"html", HTML_CLRLINE}, /* TAG_HTML */ 47 {"head", HTML_CLRLINE}, /* TAG_HEAD */ 48 {"body", HTML_CLRLINE}, /* TAG_BODY */ 49 {"meta", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_META */ 50 {"title", HTML_CLRLINE}, /* TAG_TITLE */ 51 {"div", HTML_CLRLINE}, /* TAG_DIV */ 52 {"h1", 0}, /* TAG_H1 */ 53 {"h2", 0}, /* TAG_H2 */ 54 {"p", HTML_CLRLINE}, /* TAG_P */ 55 {"span", 0}, /* TAG_SPAN */ 56 {"link", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */ 57 {"br", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */ 58 {"a", 0}, /* TAG_A */ 59 {"table", HTML_CLRLINE}, /* TAG_TABLE */ 60 {"col", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_COL */ 61 {"tr", HTML_CLRLINE}, /* TAG_TR */ 62 {"td", HTML_CLRLINE}, /* TAG_TD */ 63 {"li", HTML_CLRLINE}, /* TAG_LI */ 64 {"ul", HTML_CLRLINE}, /* TAG_UL */ 65 {"ol", HTML_CLRLINE}, /* TAG_OL */ 66 {"base", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */ 67 }; 68 69 static const char *const htmlattrs[ATTR_MAX] = { 70 "http-equiv", 71 "content", 72 "name", 73 "rel", 74 "href", 75 "type", 76 "media", 77 "class", 78 "style", 79 "width", 80 "valign", 81 "target", 82 "id", 83 }; 84 85 void * 86 html_alloc(char *outopts) 87 { 88 struct html *h; 89 const char *toks[4]; 90 char *v; 91 92 toks[0] = "style"; 93 toks[1] = "man"; 94 toks[2] = "includes"; 95 toks[3] = NULL; 96 97 if (NULL == (h = calloc(1, sizeof(struct html)))) 98 return(NULL); 99 100 h->tags.head = NULL; 101 h->ords.head = NULL; 102 103 if (NULL == (h->symtab = chars_init(CHARS_HTML))) { 104 free(h); 105 return(NULL); 106 } 107 108 while (outopts && *outopts) 109 switch (getsubopt(&outopts, UNCONST(toks), &v)) { 110 case (0): 111 h->style = v; 112 break; 113 case (1): 114 h->base_man = v; 115 break; 116 case (2): 117 h->base_includes = v; 118 break; 119 default: 120 break; 121 } 122 123 return(h); 124 } 125 126 127 void 128 html_free(void *p) 129 { 130 struct tag *tag; 131 struct ord *ord; 132 struct html *h; 133 134 h = (struct html *)p; 135 136 while ((ord = h->ords.head) != NULL) { 137 h->ords.head = ord->next; 138 free(ord); 139 } 140 141 while ((tag = h->tags.head) != NULL) { 142 h->tags.head = tag->next; 143 free(tag); 144 } 145 146 if (h->symtab) 147 chars_free(h->symtab); 148 149 free(h); 150 } 151 152 153 void 154 print_gen_head(struct html *h) 155 { 156 struct htmlpair tag[4]; 157 158 tag[0].key = ATTR_HTTPEQUIV; 159 tag[0].val = "Content-Type"; 160 tag[1].key = ATTR_CONTENT; 161 tag[1].val = "text/html; charset=utf-8"; 162 print_otag(h, TAG_META, 2, tag); 163 164 tag[0].key = ATTR_NAME; 165 tag[0].val = "resource-type"; 166 tag[1].key = ATTR_CONTENT; 167 tag[1].val = "document"; 168 print_otag(h, TAG_META, 2, tag); 169 170 if (h->style) { 171 tag[0].key = ATTR_REL; 172 tag[0].val = "stylesheet"; 173 tag[1].key = ATTR_HREF; 174 tag[1].val = h->style; 175 tag[2].key = ATTR_TYPE; 176 tag[2].val = "text/css"; 177 tag[3].key = ATTR_MEDIA; 178 tag[3].val = "all"; 179 print_otag(h, TAG_LINK, 4, tag); 180 } 181 } 182 183 184 static void 185 print_spec(struct html *h, const char *p, int len) 186 { 187 const char *rhs; 188 int i; 189 size_t sz; 190 191 rhs = chars_a2ascii(h->symtab, p, (size_t)len, &sz); 192 193 if (NULL == rhs) 194 return; 195 for (i = 0; i < (int)sz; i++) 196 putchar(rhs[i]); 197 } 198 199 200 static void 201 print_res(struct html *h, const char *p, int len) 202 { 203 const char *rhs; 204 int i; 205 size_t sz; 206 207 rhs = chars_a2res(h->symtab, p, (size_t)len, &sz); 208 209 if (NULL == rhs) 210 return; 211 for (i = 0; i < (int)sz; i++) 212 putchar(rhs[i]); 213 } 214 215 216 static void 217 print_escape(struct html *h, const char **p) 218 { 219 int j, type; 220 const char *wp; 221 222 wp = *p; 223 type = 1; 224 225 if (0 == *(++wp)) { 226 *p = wp; 227 return; 228 } 229 230 if ('(' == *wp) { 231 wp++; 232 if (0 == *wp || 0 == *(wp + 1)) { 233 *p = 0 == *wp ? wp : wp + 1; 234 return; 235 } 236 237 print_spec(h, wp, 2); 238 *p = ++wp; 239 return; 240 241 } else if ('*' == *wp) { 242 if (0 == *(++wp)) { 243 *p = wp; 244 return; 245 } 246 247 switch (*wp) { 248 case ('('): 249 wp++; 250 if (0 == *wp || 0 == *(wp + 1)) { 251 *p = 0 == *wp ? wp : wp + 1; 252 return; 253 } 254 255 print_res(h, wp, 2); 256 *p = ++wp; 257 return; 258 case ('['): 259 type = 0; 260 break; 261 default: 262 print_res(h, wp, 1); 263 *p = wp; 264 return; 265 } 266 267 } else if ('f' == *wp) { 268 if (0 == *(++wp)) { 269 *p = wp; 270 return; 271 } 272 273 switch (*wp) { 274 case ('B'): 275 /* TODO */ 276 break; 277 case ('I'): 278 /* TODO */ 279 break; 280 case ('P'): 281 /* FALLTHROUGH */ 282 case ('R'): 283 /* TODO */ 284 break; 285 default: 286 break; 287 } 288 289 *p = wp; 290 return; 291 292 } else if ('[' != *wp) { 293 print_spec(h, wp, 1); 294 *p = wp; 295 return; 296 } 297 298 wp++; 299 for (j = 0; *wp && ']' != *wp; wp++, j++) 300 /* Loop... */ ; 301 302 if (0 == *wp) { 303 *p = wp; 304 return; 305 } 306 307 if (type) 308 print_spec(h, wp - j, j); 309 else 310 print_res(h, wp - j, j); 311 312 *p = wp; 313 } 314 315 316 static void 317 print_encode(struct html *h, const char *p) 318 { 319 320 for (; *p; p++) { 321 if ('\\' == *p) { 322 print_escape(h, &p); 323 continue; 324 } 325 switch (*p) { 326 case ('<'): 327 printf("<"); 328 break; 329 case ('>'): 330 printf(">"); 331 break; 332 case ('&'): 333 printf("&"); 334 break; 335 default: 336 putchar(*p); 337 break; 338 } 339 } 340 } 341 342 343 struct tag * 344 print_otag(struct html *h, enum htmltag tag, 345 int sz, const struct htmlpair *p) 346 { 347 int i; 348 struct tag *t; 349 350 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) { 351 if (NULL == (t = malloc(sizeof(struct tag)))) 352 err(EXIT_FAILURE, "malloc"); 353 t->tag = tag; 354 t->next = h->tags.head; 355 h->tags.head = t; 356 } else 357 t = NULL; 358 359 if ( ! (HTML_NOSPACE & h->flags)) 360 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) 361 printf(" "); 362 363 printf("<%s", htmltags[tag].name); 364 for (i = 0; i < sz; i++) { 365 printf(" %s=\"", htmlattrs[p[i].key]); 366 assert(p->val); 367 print_encode(h, p[i].val); 368 printf("\""); 369 } 370 printf(">"); 371 372 h->flags |= HTML_NOSPACE; 373 if (HTML_CLRLINE & htmltags[tag].flags) 374 h->flags |= HTML_NEWLINE; 375 else 376 h->flags &= ~HTML_NEWLINE; 377 378 return(t); 379 } 380 381 382 /* ARGSUSED */ 383 static void 384 print_ctag(struct html *h, enum htmltag tag) 385 { 386 387 printf("</%s>", htmltags[tag].name); 388 if (HTML_CLRLINE & htmltags[tag].flags) 389 h->flags |= HTML_NOSPACE; 390 if (HTML_CLRLINE & htmltags[tag].flags) 391 h->flags |= HTML_NEWLINE; 392 else 393 h->flags &= ~HTML_NEWLINE; 394 } 395 396 397 /* ARGSUSED */ 398 void 399 print_gen_doctype(struct html *h) 400 { 401 402 printf("<!DOCTYPE HTML PUBLIC \"%s\" \"%s\">", DOCTYPE, DTD); 403 } 404 405 406 void 407 print_text(struct html *h, const char *p) 408 { 409 410 if (*p && 0 == *(p + 1)) 411 switch (*p) { 412 case('.'): 413 /* FALLTHROUGH */ 414 case(','): 415 /* FALLTHROUGH */ 416 case(';'): 417 /* FALLTHROUGH */ 418 case(':'): 419 /* FALLTHROUGH */ 420 case('?'): 421 /* FALLTHROUGH */ 422 case('!'): 423 /* FALLTHROUGH */ 424 case(')'): 425 /* FALLTHROUGH */ 426 case(']'): 427 /* FALLTHROUGH */ 428 case('}'): 429 if ( ! (HTML_IGNDELIM & h->flags)) 430 h->flags |= HTML_NOSPACE; 431 break; 432 default: 433 break; 434 } 435 436 if ( ! (h->flags & HTML_NOSPACE)) 437 printf(" "); 438 439 h->flags &= ~HTML_NOSPACE; 440 h->flags &= ~HTML_NEWLINE; 441 442 if (p) 443 print_encode(h, p); 444 445 if (*p && 0 == *(p + 1)) 446 switch (*p) { 447 case('('): 448 /* FALLTHROUGH */ 449 case('['): 450 /* FALLTHROUGH */ 451 case('{'): 452 h->flags |= HTML_NOSPACE; 453 break; 454 default: 455 break; 456 } 457 } 458 459 460 void 461 print_tagq(struct html *h, const struct tag *until) 462 { 463 struct tag *tag; 464 465 while ((tag = h->tags.head) != NULL) { 466 print_ctag(h, tag->tag); 467 h->tags.head = tag->next; 468 free(tag); 469 if (until && tag == until) 470 return; 471 } 472 } 473 474 475 void 476 print_stagq(struct html *h, const struct tag *suntil) 477 { 478 struct tag *tag; 479 480 while ((tag = h->tags.head) != NULL) { 481 if (suntil && tag == suntil) 482 return; 483 print_ctag(h, tag->tag); 484 h->tags.head = tag->next; 485 free(tag); 486 } 487 } 488 489 490 void 491 bufinit(struct html *h) 492 { 493 494 h->buf[0] = '\0'; 495 h->buflen = 0; 496 } 497 498 499 void 500 bufcat_style(struct html *h, const char *key, const char *val) 501 { 502 503 bufcat(h, key); 504 bufncat(h, ":", 1); 505 bufcat(h, val); 506 bufncat(h, ";", 1); 507 } 508 509 510 void 511 bufcat(struct html *h, const char *p) 512 { 513 514 bufncat(h, p, strlen(p)); 515 } 516 517 518 void 519 buffmt(struct html *h, const char *fmt, ...) 520 { 521 va_list ap; 522 523 va_start(ap, fmt); 524 (void)vsnprintf(h->buf + (int)h->buflen, 525 BUFSIZ - h->buflen - 1, fmt, ap); 526 va_end(ap); 527 h->buflen = strlen(h->buf); 528 } 529 530 531 void 532 bufncat(struct html *h, const char *p, size_t sz) 533 { 534 535 if (h->buflen + sz > BUFSIZ - 1) 536 sz = BUFSIZ - 1 - h->buflen; 537 538 (void)strncat(h->buf, p, sz); 539 h->buflen += sz; 540 } 541 542 543 void 544 buffmt_includes(struct html *h, const char *name) 545 { 546 const char *p, *pp; 547 548 pp = h->base_includes; 549 550 while (NULL != (p = strchr(pp, '%'))) { 551 bufncat(h, pp, (size_t)(p - pp)); 552 switch (*(p + 1)) { 553 case('I'): 554 bufcat(h, name); 555 break; 556 default: 557 bufncat(h, p, 2); 558 break; 559 } 560 pp = p + 2; 561 } 562 if (pp) 563 bufcat(h, pp); 564 } 565 566 567 void 568 buffmt_man(struct html *h, 569 const char *name, const char *sec) 570 { 571 const char *p, *pp; 572 573 pp = h->base_man; 574 575 /* LINTED */ 576 while (NULL != (p = strchr(pp, '%'))) { 577 bufncat(h, pp, (size_t)(p - pp)); 578 switch (*(p + 1)) { 579 case('S'): 580 bufcat(h, sec ? sec : "1"); 581 break; 582 case('N'): 583 buffmt(h, name); 584 break; 585 default: 586 bufncat(h, p, 2); 587 break; 588 } 589 pp = p + 2; 590 } 591 if (pp) 592 bufcat(h, pp); 593 } 594 595 596 void 597 bufcat_su(struct html *h, const char *p, const struct roffsu *su) 598 { 599 double v; 600 const char *u; 601 602 v = su->scale; 603 604 switch (su->unit) { 605 case (SCALE_CM): 606 u = "cm"; 607 break; 608 case (SCALE_IN): 609 u = "in"; 610 break; 611 case (SCALE_PC): 612 u = "pc"; 613 break; 614 case (SCALE_PT): 615 u = "pt"; 616 break; 617 case (SCALE_EM): 618 u = "em"; 619 break; 620 case (SCALE_MM): 621 if (0 == (v /= 100)) 622 v = 1; 623 u = "em"; 624 break; 625 case (SCALE_EN): 626 u = "ex"; 627 break; 628 case (SCALE_BU): 629 u = "ex"; 630 break; 631 case (SCALE_VS): 632 u = "em"; 633 break; 634 default: 635 u = "ex"; 636 break; 637 } 638 639 if (su->pt) 640 buffmt(h, "%s: %f%s;", p, v, u); 641 else 642 /* LINTED */ 643 buffmt(h, "%s: %d%s;", p, (int)v, u); 644 } 645 646