xref: /openbsd-src/usr.bin/mandoc/html.c (revision cd1eb269cafb12c415be1749cd4a4b5422710415)
1 /*	$Id: html.c,v 1.7 2010/04/07 23:15:05 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <sys/types.h>
18 
19 #include <assert.h>
20 #include <ctype.h>
21 #include <stdarg.h>
22 #include <stdio.h>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27 
28 #include "out.h"
29 #include "chars.h"
30 #include "html.h"
31 #include "main.h"
32 
33 #define	UNCONST(a)	((void *)(uintptr_t)(const void *)(a))
34 
35 struct	htmldata {
36 	const char	 *name;
37 	int		  flags;
38 #define	HTML_CLRLINE	 (1 << 0)
39 #define	HTML_NOSTACK	 (1 << 1)
40 #define	HTML_AUTOCLOSE	 (1 << 2) /* Tag has auto-closure. */
41 };
42 
43 static	const struct htmldata htmltags[TAG_MAX] = {
44 	{"html",	HTML_CLRLINE}, /* TAG_HTML */
45 	{"head",	HTML_CLRLINE}, /* TAG_HEAD */
46 	{"body",	HTML_CLRLINE}, /* TAG_BODY */
47 	{"meta",	HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
48 	{"title",	HTML_CLRLINE}, /* TAG_TITLE */
49 	{"div",		HTML_CLRLINE}, /* TAG_DIV */
50 	{"h1",		0}, /* TAG_H1 */
51 	{"h2",		0}, /* TAG_H2 */
52 	{"span",	0}, /* TAG_SPAN */
53 	{"link",	HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
54 	{"br",		HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
55 	{"a",		0}, /* TAG_A */
56 	{"table",	HTML_CLRLINE}, /* TAG_TABLE */
57 	{"col",		HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
58 	{"tr",		HTML_CLRLINE}, /* TAG_TR */
59 	{"td",		HTML_CLRLINE}, /* TAG_TD */
60 	{"li",		HTML_CLRLINE}, /* TAG_LI */
61 	{"ul",		HTML_CLRLINE}, /* TAG_UL */
62 	{"ol",		HTML_CLRLINE}, /* TAG_OL */
63 };
64 
65 static	const char	*const htmlfonts[HTMLFONT_MAX] = {
66 	"roman",
67 	"bold",
68 	"italic"
69 };
70 
71 static	const char	*const htmlattrs[ATTR_MAX] = {
72 	"http-equiv",
73 	"content",
74 	"name",
75 	"rel",
76 	"href",
77 	"type",
78 	"media",
79 	"class",
80 	"style",
81 	"width",
82 	"valign",
83 	"target",
84 	"id",
85 	"summary",
86 };
87 
88 static	void		  print_spec(struct html *, const char *, size_t);
89 static	void		  print_res(struct html *, const char *, size_t);
90 static	void		  print_ctag(struct html *, enum htmltag);
91 static	void		  print_doctype(struct html *);
92 static	void		  print_xmltype(struct html *);
93 static	int		  print_encode(struct html *, const char *, int);
94 static	void		  print_metaf(struct html *, enum roffdeco);
95 static	void		  print_attr(struct html *,
96 				const char *, const char *);
97 static	void		 *ml_alloc(char *, enum htmltype);
98 
99 
100 static void *
101 ml_alloc(char *outopts, enum htmltype type)
102 {
103 	struct html	*h;
104 	const char	*toks[4];
105 	char		*v;
106 
107 	toks[0] = "style";
108 	toks[1] = "man";
109 	toks[2] = "includes";
110 	toks[3] = NULL;
111 
112 	h = calloc(1, sizeof(struct html));
113 	if (NULL == h) {
114 		perror(NULL);
115 		exit(EXIT_FAILURE);
116 	}
117 
118 	h->type = type;
119 	h->tags.head = NULL;
120 	h->ords.head = NULL;
121 	h->symtab = chars_init(CHARS_HTML);
122 
123 	while (outopts && *outopts)
124 		switch (getsubopt(&outopts, UNCONST(toks), &v)) {
125 		case (0):
126 			h->style = v;
127 			break;
128 		case (1):
129 			h->base_man = v;
130 			break;
131 		case (2):
132 			h->base_includes = v;
133 			break;
134 		default:
135 			break;
136 		}
137 
138 	return(h);
139 }
140 
141 void *
142 html_alloc(char *outopts)
143 {
144 
145 	return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
146 }
147 
148 
149 void *
150 xhtml_alloc(char *outopts)
151 {
152 
153 	return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
154 }
155 
156 
157 void
158 html_free(void *p)
159 {
160 	struct tag	*tag;
161 	struct ord	*ord;
162 	struct html	*h;
163 
164 	h = (struct html *)p;
165 
166 	while ((ord = h->ords.head) != NULL) {
167 		h->ords.head = ord->next;
168 		free(ord);
169 	}
170 
171 	while ((tag = h->tags.head) != NULL) {
172 		h->tags.head = tag->next;
173 		free(tag);
174 	}
175 
176 	if (h->symtab)
177 		chars_free(h->symtab);
178 
179 	free(h);
180 }
181 
182 
183 void
184 print_gen_head(struct html *h)
185 {
186 	struct htmlpair	 tag[4];
187 
188 	tag[0].key = ATTR_HTTPEQUIV;
189 	tag[0].val = "Content-Type";
190 	tag[1].key = ATTR_CONTENT;
191 	tag[1].val = "text/html; charset=utf-8";
192 	print_otag(h, TAG_META, 2, tag);
193 
194 	tag[0].key = ATTR_NAME;
195 	tag[0].val = "resource-type";
196 	tag[1].key = ATTR_CONTENT;
197 	tag[1].val = "document";
198 	print_otag(h, TAG_META, 2, tag);
199 
200 	if (h->style) {
201 		tag[0].key = ATTR_REL;
202 		tag[0].val = "stylesheet";
203 		tag[1].key = ATTR_HREF;
204 		tag[1].val = h->style;
205 		tag[2].key = ATTR_TYPE;
206 		tag[2].val = "text/css";
207 		tag[3].key = ATTR_MEDIA;
208 		tag[3].val = "all";
209 		print_otag(h, TAG_LINK, 4, tag);
210 	}
211 }
212 
213 
214 static void
215 print_spec(struct html *h, const char *p, size_t len)
216 {
217 	const char	*rhs;
218 	size_t		 sz;
219 
220 	rhs = chars_a2ascii(h->symtab, p, len, &sz);
221 
222 	if (NULL == rhs)
223 		return;
224 	fwrite(rhs, 1, sz, stdout);
225 }
226 
227 
228 static void
229 print_res(struct html *h, const char *p, size_t len)
230 {
231 	const char	*rhs;
232 	size_t		 sz;
233 
234 	rhs = chars_a2res(h->symtab, p, len, &sz);
235 
236 	if (NULL == rhs)
237 		return;
238 	fwrite(rhs, 1, sz, stdout);
239 }
240 
241 
242 struct tag *
243 print_ofont(struct html *h, enum htmlfont font)
244 {
245 	struct htmlpair	 tag;
246 
247 	h->metal = h->metac;
248 	h->metac = font;
249 
250 	/* FIXME: DECO_ROMAN should just close out preexisting. */
251 
252 	if (h->metaf && h->tags.head == h->metaf)
253 		print_tagq(h, h->metaf);
254 
255 	PAIR_CLASS_INIT(&tag, htmlfonts[font]);
256 	h->metaf = print_otag(h, TAG_SPAN, 1, &tag);
257 	return(h->metaf);
258 }
259 
260 
261 static void
262 print_metaf(struct html *h, enum roffdeco deco)
263 {
264 	enum htmlfont	 font;
265 
266 	switch (deco) {
267 	case (DECO_PREVIOUS):
268 		font = h->metal;
269 		break;
270 	case (DECO_ITALIC):
271 		font = HTMLFONT_ITALIC;
272 		break;
273 	case (DECO_BOLD):
274 		font = HTMLFONT_BOLD;
275 		break;
276 	case (DECO_ROMAN):
277 		font = HTMLFONT_NONE;
278 		break;
279 	default:
280 		abort();
281 		/* NOTREACHED */
282 	}
283 
284 	(void)print_ofont(h, font);
285 }
286 
287 
288 static int
289 print_encode(struct html *h, const char *p, int norecurse)
290 {
291 	size_t		 sz;
292 	int		 len, nospace;
293 	const char	*seq;
294 	enum roffdeco	 deco;
295 
296 	nospace = 0;
297 
298 	for (; *p; p++) {
299 		sz = strcspn(p, "\\<>&");
300 
301 		fwrite(p, 1, sz, stdout);
302 		p += /* LINTED */
303 			sz;
304 
305 		if ('<' == *p) {
306 			printf("&lt;");
307 			continue;
308 		} else if ('>' == *p) {
309 			printf("&gt;");
310 			continue;
311 		} else if ('&' == *p) {
312 			printf("&amp;");
313 			continue;
314 		} else if ('\0' == *p)
315 			break;
316 
317 		seq = ++p;
318 		len = a2roffdeco(&deco, &seq, &sz);
319 
320 		switch (deco) {
321 		case (DECO_RESERVED):
322 			print_res(h, seq, sz);
323 			break;
324 		case (DECO_SPECIAL):
325 			print_spec(h, seq, sz);
326 			break;
327 		case (DECO_PREVIOUS):
328 			/* FALLTHROUGH */
329 		case (DECO_BOLD):
330 			/* FALLTHROUGH */
331 		case (DECO_ITALIC):
332 			/* FALLTHROUGH */
333 		case (DECO_ROMAN):
334 			if (norecurse)
335 				break;
336 			print_metaf(h, deco);
337 			break;
338 		default:
339 			break;
340 		}
341 
342 		p += len - 1;
343 
344 		if (DECO_NOSPACE == deco && '\0' == *(p + 1))
345 			nospace = 1;
346 	}
347 
348 	return(nospace);
349 }
350 
351 
352 static void
353 print_attr(struct html *h, const char *key, const char *val)
354 {
355 	printf(" %s=\"", key);
356 	(void)print_encode(h, val, 1);
357 	putchar('\"');
358 }
359 
360 
361 struct tag *
362 print_otag(struct html *h, enum htmltag tag,
363 		int sz, const struct htmlpair *p)
364 {
365 	int		 i;
366 	struct tag	*t;
367 
368 	/* Push this tags onto the stack of open scopes. */
369 
370 	if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
371 		t = malloc(sizeof(struct tag));
372 		if (NULL == t) {
373 			perror(NULL);
374 			exit(EXIT_FAILURE);
375 		}
376 		t->tag = tag;
377 		t->next = h->tags.head;
378 		h->tags.head = t;
379 	} else
380 		t = NULL;
381 
382 	if ( ! (HTML_NOSPACE & h->flags))
383 		if ( ! (HTML_CLRLINE & htmltags[tag].flags))
384 			putchar(' ');
385 
386 	/* Print out the tag name and attributes. */
387 
388 	printf("<%s", htmltags[tag].name);
389 	for (i = 0; i < sz; i++)
390 		print_attr(h, htmlattrs[p[i].key], p[i].val);
391 
392 	/* Add non-overridable attributes. */
393 
394 	if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) {
395 		print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml");
396 		print_attr(h, "xml:lang", "en");
397 		print_attr(h, "lang", "en");
398 	}
399 
400 	/* Accomodate for XML "well-formed" singleton escaping. */
401 
402 	if (HTML_AUTOCLOSE & htmltags[tag].flags)
403 		switch (h->type) {
404 		case (HTML_XHTML_1_0_STRICT):
405 			putchar('/');
406 			break;
407 		default:
408 			break;
409 		}
410 
411 	putchar('>');
412 
413 	h->flags |= HTML_NOSPACE;
414 	return(t);
415 }
416 
417 
418 static void
419 print_ctag(struct html *h, enum htmltag tag)
420 {
421 
422 	printf("</%s>", htmltags[tag].name);
423 	if (HTML_CLRLINE & htmltags[tag].flags) {
424 		h->flags |= HTML_NOSPACE;
425 		putchar('\n');
426 	}
427 }
428 
429 
430 void
431 print_gen_decls(struct html *h)
432 {
433 
434 	print_xmltype(h);
435 	print_doctype(h);
436 }
437 
438 
439 static void
440 print_xmltype(struct html *h)
441 {
442 	const char	*decl;
443 
444 	switch (h->type) {
445 	case (HTML_XHTML_1_0_STRICT):
446 		decl = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
447 		break;
448 	default:
449 		decl = NULL;
450 		break;
451 	}
452 
453 	if (NULL == decl)
454 		return;
455 
456 	printf("%s\n", decl);
457 }
458 
459 
460 static void
461 print_doctype(struct html *h)
462 {
463 	const char	*doctype;
464 	const char	*dtd;
465 	const char	*name;
466 
467 	switch (h->type) {
468 	case (HTML_HTML_4_01_STRICT):
469 		name = "HTML";
470 		doctype = "-//W3C//DTD HTML 4.01//EN";
471 		dtd = "http://www.w3.org/TR/html4/strict.dtd";
472 		break;
473 	default:
474 		name = "html";
475 		doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";
476 		dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
477 		break;
478 	}
479 
480 	printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n",
481 			name, doctype, dtd);
482 }
483 
484 
485 void
486 print_text(struct html *h, const char *p)
487 {
488 
489 	if (*p && 0 == *(p + 1))
490 		switch (*p) {
491 		case('.'):
492 			/* FALLTHROUGH */
493 		case(','):
494 			/* FALLTHROUGH */
495 		case(';'):
496 			/* FALLTHROUGH */
497 		case(':'):
498 			/* FALLTHROUGH */
499 		case('?'):
500 			/* FALLTHROUGH */
501 		case('!'):
502 			/* FALLTHROUGH */
503 		case(')'):
504 			/* FALLTHROUGH */
505 		case(']'):
506 			if ( ! (HTML_IGNDELIM & h->flags))
507 				h->flags |= HTML_NOSPACE;
508 			break;
509 		default:
510 			break;
511 		}
512 
513 	if ( ! (h->flags & HTML_NOSPACE))
514 		putchar(' ');
515 
516 	assert(p);
517 	if ( ! print_encode(h, p, 0))
518 		h->flags &= ~HTML_NOSPACE;
519 
520 	if (*p && 0 == *(p + 1))
521 		switch (*p) {
522 		case('('):
523 			/* FALLTHROUGH */
524 		case('['):
525 			h->flags |= HTML_NOSPACE;
526 			break;
527 		default:
528 			break;
529 		}
530 }
531 
532 
533 void
534 print_tagq(struct html *h, const struct tag *until)
535 {
536 	struct tag	*tag;
537 
538 	while ((tag = h->tags.head) != NULL) {
539 		if (tag == h->metaf)
540 			h->metaf = NULL;
541 		print_ctag(h, tag->tag);
542 		h->tags.head = tag->next;
543 		free(tag);
544 		if (until && tag == until)
545 			return;
546 	}
547 }
548 
549 
550 void
551 print_stagq(struct html *h, const struct tag *suntil)
552 {
553 	struct tag	*tag;
554 
555 	while ((tag = h->tags.head) != NULL) {
556 		if (suntil && tag == suntil)
557 			return;
558 		if (tag == h->metaf)
559 			h->metaf = NULL;
560 		print_ctag(h, tag->tag);
561 		h->tags.head = tag->next;
562 		free(tag);
563 	}
564 }
565 
566 
567 void
568 bufinit(struct html *h)
569 {
570 
571 	h->buf[0] = '\0';
572 	h->buflen = 0;
573 }
574 
575 
576 void
577 bufcat_style(struct html *h, const char *key, const char *val)
578 {
579 
580 	bufcat(h, key);
581 	bufncat(h, ":", 1);
582 	bufcat(h, val);
583 	bufncat(h, ";", 1);
584 }
585 
586 
587 void
588 bufcat(struct html *h, const char *p)
589 {
590 
591 	bufncat(h, p, strlen(p));
592 }
593 
594 
595 void
596 buffmt(struct html *h, const char *fmt, ...)
597 {
598 	va_list		 ap;
599 
600 	va_start(ap, fmt);
601 	(void)vsnprintf(h->buf + (int)h->buflen,
602 			BUFSIZ - h->buflen - 1, fmt, ap);
603 	va_end(ap);
604 	h->buflen = strlen(h->buf);
605 }
606 
607 
608 void
609 bufncat(struct html *h, const char *p, size_t sz)
610 {
611 
612 	if (h->buflen + sz > BUFSIZ - 1)
613 		sz = BUFSIZ - 1 - h->buflen;
614 
615 	(void)strncat(h->buf, p, sz);
616 	h->buflen += sz;
617 }
618 
619 
620 void
621 buffmt_includes(struct html *h, const char *name)
622 {
623 	const char	*p, *pp;
624 
625 	pp = h->base_includes;
626 
627 	while (NULL != (p = strchr(pp, '%'))) {
628 		bufncat(h, pp, (size_t)(p - pp));
629 		switch (*(p + 1)) {
630 		case('I'):
631 			bufcat(h, name);
632 			break;
633 		default:
634 			bufncat(h, p, 2);
635 			break;
636 		}
637 		pp = p + 2;
638 	}
639 	if (pp)
640 		bufcat(h, pp);
641 }
642 
643 
644 void
645 buffmt_man(struct html *h,
646 		const char *name, const char *sec)
647 {
648 	const char	*p, *pp;
649 
650 	pp = h->base_man;
651 
652 	/* LINTED */
653 	while (NULL != (p = strchr(pp, '%'))) {
654 		bufncat(h, pp, (size_t)(p - pp));
655 		switch (*(p + 1)) {
656 		case('S'):
657 			bufcat(h, sec ? sec : "1");
658 			break;
659 		case('N'):
660 			buffmt(h, name);
661 			break;
662 		default:
663 			bufncat(h, p, 2);
664 			break;
665 		}
666 		pp = p + 2;
667 	}
668 	if (pp)
669 		bufcat(h, pp);
670 }
671 
672 
673 void
674 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
675 {
676 	double		 v;
677 	const char	*u;
678 
679 	v = su->scale;
680 
681 	switch (su->unit) {
682 	case (SCALE_CM):
683 		u = "cm";
684 		break;
685 	case (SCALE_IN):
686 		u = "in";
687 		break;
688 	case (SCALE_PC):
689 		u = "pc";
690 		break;
691 	case (SCALE_PT):
692 		u = "pt";
693 		break;
694 	case (SCALE_EM):
695 		u = "em";
696 		break;
697 	case (SCALE_MM):
698 		if (0 == (v /= 100))
699 			v = 1;
700 		u = "em";
701 		break;
702 	case (SCALE_EN):
703 		u = "ex";
704 		break;
705 	case (SCALE_BU):
706 		u = "ex";
707 		break;
708 	case (SCALE_VS):
709 		u = "em";
710 		break;
711 	default:
712 		u = "ex";
713 		break;
714 	}
715 
716 	if (su->pt)
717 		buffmt(h, "%s: %f%s;", p, v, u);
718 	else
719 		/* LINTED */
720 		buffmt(h, "%s: %d%s;", p, (int)v, u);
721 }
722 
723 
724 void
725 html_idcat(char *dst, const char *src, int sz)
726 {
727 	int		 ssz;
728 
729 	assert(sz);
730 
731 	/* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
732 
733 	for ( ; *dst != '\0' && sz; dst++, sz--)
734 		/* Jump to end. */ ;
735 
736 	assert(sz > 2);
737 
738 	/* We can't start with a number (bah). */
739 
740 	*dst++ = 'x';
741 	*dst = '\0';
742 	sz--;
743 
744 	for ( ; *src != '\0' && sz > 1; src++) {
745 		ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
746 		sz -= ssz;
747 		dst += ssz;
748 	}
749 }
750