xref: /netbsd-src/external/bsd/mdocml/dist/html.c (revision 4e6df137e8e14049b5a701d249962c480449c141)
1 /*	$Vendor-Id: html.c,v 1.96 2010/02/17 19:48:33 kristaps Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include <sys/types.h>
22 
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <stdint.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "out.h"
33 #include "chars.h"
34 #include "html.h"
35 #include "main.h"
36 
37 #define	UNCONST(a)	((void *)(uintptr_t)(const void *)(a))
38 
39 struct	htmldata {
40 	const char	 *name;
41 	int		  flags;
42 #define	HTML_CLRLINE	 (1 << 0)
43 #define	HTML_NOSTACK	 (1 << 1)
44 #define	HTML_AUTOCLOSE	 (1 << 2) /* Tag has auto-closure. */
45 };
46 
47 static	const struct htmldata htmltags[TAG_MAX] = {
48 	{"html",	HTML_CLRLINE}, /* TAG_HTML */
49 	{"head",	HTML_CLRLINE}, /* TAG_HEAD */
50 	{"body",	HTML_CLRLINE}, /* TAG_BODY */
51 	{"meta",	HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
52 	{"title",	HTML_CLRLINE}, /* TAG_TITLE */
53 	{"div",		HTML_CLRLINE}, /* TAG_DIV */
54 	{"h1",		0}, /* TAG_H1 */
55 	{"h2",		0}, /* TAG_H2 */
56 	{"span",	0}, /* TAG_SPAN */
57 	{"link",	HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
58 	{"br",		HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
59 	{"a",		0}, /* TAG_A */
60 	{"table",	HTML_CLRLINE}, /* TAG_TABLE */
61 	{"col",		HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
62 	{"tr",		HTML_CLRLINE}, /* TAG_TR */
63 	{"td",		HTML_CLRLINE}, /* TAG_TD */
64 	{"li",		HTML_CLRLINE}, /* TAG_LI */
65 	{"ul",		HTML_CLRLINE}, /* TAG_UL */
66 	{"ol",		HTML_CLRLINE}, /* TAG_OL */
67 };
68 
69 static	const char	*const htmlfonts[HTMLFONT_MAX] = {
70 	"roman",
71 	"bold",
72 	"italic"
73 };
74 
75 static	const char	*const htmlattrs[ATTR_MAX] = {
76 	"http-equiv",
77 	"content",
78 	"name",
79 	"rel",
80 	"href",
81 	"type",
82 	"media",
83 	"class",
84 	"style",
85 	"width",
86 	"valign",
87 	"target",
88 	"id",
89 	"summary",
90 };
91 
92 static	void		  print_spec(struct html *, const char *, size_t);
93 static	void		  print_res(struct html *, const char *, size_t);
94 static	void		  print_ctag(struct html *, enum htmltag);
95 static	void		  print_doctype(struct html *);
96 static	void		  print_xmltype(struct html *);
97 static	int		  print_encode(struct html *, const char *, int);
98 static	void		  print_metaf(struct html *, enum roffdeco);
99 static	void		  print_attr(struct html *,
100 				const char *, const char *);
101 static	void		 *ml_alloc(char *, enum htmltype);
102 
103 
104 static void *
105 ml_alloc(char *outopts, enum htmltype type)
106 {
107 	struct html	*h;
108 	const char	*toks[4];
109 	char		*v;
110 
111 	toks[0] = "style";
112 	toks[1] = "man";
113 	toks[2] = "includes";
114 	toks[3] = NULL;
115 
116 	h = calloc(1, sizeof(struct html));
117 	if (NULL == h) {
118 		perror(NULL);
119 		exit(EXIT_FAILURE);
120 	}
121 
122 	h->type = type;
123 	h->tags.head = NULL;
124 	h->ords.head = NULL;
125 	h->symtab = chars_init(CHARS_HTML);
126 
127 	while (outopts && *outopts)
128 		switch (getsubopt(&outopts, UNCONST(toks), &v)) {
129 		case (0):
130 			h->style = v;
131 			break;
132 		case (1):
133 			h->base_man = v;
134 			break;
135 		case (2):
136 			h->base_includes = v;
137 			break;
138 		default:
139 			break;
140 		}
141 
142 	return(h);
143 }
144 
145 void *
146 html_alloc(char *outopts)
147 {
148 
149 	return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
150 }
151 
152 
153 void *
154 xhtml_alloc(char *outopts)
155 {
156 
157 	return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
158 }
159 
160 
161 void
162 html_free(void *p)
163 {
164 	struct tag	*tag;
165 	struct ord	*ord;
166 	struct html	*h;
167 
168 	h = (struct html *)p;
169 
170 	while ((ord = h->ords.head) != NULL) {
171 		h->ords.head = ord->next;
172 		free(ord);
173 	}
174 
175 	while ((tag = h->tags.head) != NULL) {
176 		h->tags.head = tag->next;
177 		free(tag);
178 	}
179 
180 	if (h->symtab)
181 		chars_free(h->symtab);
182 
183 	free(h);
184 }
185 
186 
187 void
188 print_gen_head(struct html *h)
189 {
190 	struct htmlpair	 tag[4];
191 
192 	tag[0].key = ATTR_HTTPEQUIV;
193 	tag[0].val = "Content-Type";
194 	tag[1].key = ATTR_CONTENT;
195 	tag[1].val = "text/html; charset=utf-8";
196 	print_otag(h, TAG_META, 2, tag);
197 
198 	tag[0].key = ATTR_NAME;
199 	tag[0].val = "resource-type";
200 	tag[1].key = ATTR_CONTENT;
201 	tag[1].val = "document";
202 	print_otag(h, TAG_META, 2, tag);
203 
204 	if (h->style) {
205 		tag[0].key = ATTR_REL;
206 		tag[0].val = "stylesheet";
207 		tag[1].key = ATTR_HREF;
208 		tag[1].val = h->style;
209 		tag[2].key = ATTR_TYPE;
210 		tag[2].val = "text/css";
211 		tag[3].key = ATTR_MEDIA;
212 		tag[3].val = "all";
213 		print_otag(h, TAG_LINK, 4, tag);
214 	}
215 }
216 
217 
218 static void
219 print_spec(struct html *h, const char *p, size_t len)
220 {
221 	const char	*rhs;
222 	size_t		 sz;
223 
224 	rhs = chars_a2ascii(h->symtab, p, len, &sz);
225 
226 	if (NULL == rhs)
227 		return;
228 	fwrite(rhs, 1, sz, stdout);
229 }
230 
231 
232 static void
233 print_res(struct html *h, const char *p, size_t len)
234 {
235 	const char	*rhs;
236 	size_t		 sz;
237 
238 	rhs = chars_a2res(h->symtab, p, len, &sz);
239 
240 	if (NULL == rhs)
241 		return;
242 	fwrite(rhs, 1, sz, stdout);
243 }
244 
245 
246 struct tag *
247 print_ofont(struct html *h, enum htmlfont font)
248 {
249 	struct htmlpair	 tag;
250 
251 	h->metal = h->metac;
252 	h->metac = font;
253 
254 	/* FIXME: DECO_ROMAN should just close out preexisting. */
255 
256 	if (h->metaf && h->tags.head == h->metaf)
257 		print_tagq(h, h->metaf);
258 
259 	PAIR_CLASS_INIT(&tag, htmlfonts[font]);
260 	h->metaf = print_otag(h, TAG_SPAN, 1, &tag);
261 	return(h->metaf);
262 }
263 
264 
265 static void
266 print_metaf(struct html *h, enum roffdeco deco)
267 {
268 	enum htmlfont	 font;
269 
270 	switch (deco) {
271 	case (DECO_PREVIOUS):
272 		font = h->metal;
273 		break;
274 	case (DECO_ITALIC):
275 		font = HTMLFONT_ITALIC;
276 		break;
277 	case (DECO_BOLD):
278 		font = HTMLFONT_BOLD;
279 		break;
280 	case (DECO_ROMAN):
281 		font = HTMLFONT_NONE;
282 		break;
283 	default:
284 		abort();
285 		/* NOTREACHED */
286 	}
287 
288 	(void)print_ofont(h, font);
289 }
290 
291 
292 static int
293 print_encode(struct html *h, const char *p, int norecurse)
294 {
295 	size_t		 sz;
296 	int		 len, nospace;
297 	const char	*seq;
298 	enum roffdeco	 deco;
299 
300 	nospace = 0;
301 
302 	for (; *p; p++) {
303 		sz = strcspn(p, "\\<>&");
304 
305 		fwrite(p, 1, sz, stdout);
306 		p += /* LINTED */
307 			sz;
308 
309 		if ('<' == *p) {
310 			printf("&lt;");
311 			continue;
312 		} else if ('>' == *p) {
313 			printf("&gt;");
314 			continue;
315 		} else if ('&' == *p) {
316 			printf("&amp;");
317 			continue;
318 		} else if ('\0' == *p)
319 			break;
320 
321 		seq = ++p;
322 		len = a2roffdeco(&deco, &seq, &sz);
323 
324 		switch (deco) {
325 		case (DECO_RESERVED):
326 			print_res(h, seq, sz);
327 			break;
328 		case (DECO_SPECIAL):
329 			print_spec(h, seq, sz);
330 			break;
331 		case (DECO_PREVIOUS):
332 			/* FALLTHROUGH */
333 		case (DECO_BOLD):
334 			/* FALLTHROUGH */
335 		case (DECO_ITALIC):
336 			/* FALLTHROUGH */
337 		case (DECO_ROMAN):
338 			if (norecurse)
339 				break;
340 			print_metaf(h, deco);
341 			break;
342 		default:
343 			break;
344 		}
345 
346 		p += len - 1;
347 
348 		if (DECO_NOSPACE == deco && '\0' == *(p + 1))
349 			nospace = 1;
350 	}
351 
352 	return(nospace);
353 }
354 
355 
356 static void
357 print_attr(struct html *h, const char *key, const char *val)
358 {
359 	printf(" %s=\"", key);
360 	(void)print_encode(h, val, 1);
361 	putchar('\"');
362 }
363 
364 
365 struct tag *
366 print_otag(struct html *h, enum htmltag tag,
367 		int sz, const struct htmlpair *p)
368 {
369 	int		 i;
370 	struct tag	*t;
371 
372 	/* Push this tags onto the stack of open scopes. */
373 
374 	if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
375 		t = malloc(sizeof(struct tag));
376 		if (NULL == t) {
377 			perror(NULL);
378 			exit(EXIT_FAILURE);
379 		}
380 		t->tag = tag;
381 		t->next = h->tags.head;
382 		h->tags.head = t;
383 	} else
384 		t = NULL;
385 
386 	if ( ! (HTML_NOSPACE & h->flags))
387 		if ( ! (HTML_CLRLINE & htmltags[tag].flags))
388 			putchar(' ');
389 
390 	/* Print out the tag name and attributes. */
391 
392 	printf("<%s", htmltags[tag].name);
393 	for (i = 0; i < sz; i++)
394 		print_attr(h, htmlattrs[p[i].key], p[i].val);
395 
396 	/* Add non-overridable attributes. */
397 
398 	if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) {
399 		print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml");
400 		print_attr(h, "xml:lang", "en");
401 		print_attr(h, "lang", "en");
402 	}
403 
404 	/* Accomodate for XML "well-formed" singleton escaping. */
405 
406 	if (HTML_AUTOCLOSE & htmltags[tag].flags)
407 		switch (h->type) {
408 		case (HTML_XHTML_1_0_STRICT):
409 			putchar('/');
410 			break;
411 		default:
412 			break;
413 		}
414 
415 	putchar('>');
416 
417 	h->flags |= HTML_NOSPACE;
418 	return(t);
419 }
420 
421 
422 static void
423 print_ctag(struct html *h, enum htmltag tag)
424 {
425 
426 	printf("</%s>", htmltags[tag].name);
427 	if (HTML_CLRLINE & htmltags[tag].flags) {
428 		h->flags |= HTML_NOSPACE;
429 		putchar('\n');
430 	}
431 }
432 
433 
434 void
435 print_gen_decls(struct html *h)
436 {
437 
438 	print_xmltype(h);
439 	print_doctype(h);
440 }
441 
442 
443 static void
444 print_xmltype(struct html *h)
445 {
446 	const char	*decl;
447 
448 	switch (h->type) {
449 	case (HTML_XHTML_1_0_STRICT):
450 		decl = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
451 		break;
452 	default:
453 		decl = NULL;
454 		break;
455 	}
456 
457 	if (NULL == decl)
458 		return;
459 
460 	printf("%s\n", decl);
461 }
462 
463 
464 static void
465 print_doctype(struct html *h)
466 {
467 	const char	*doctype;
468 	const char	*dtd;
469 	const char	*name;
470 
471 	switch (h->type) {
472 	case (HTML_HTML_4_01_STRICT):
473 		name = "HTML";
474 		doctype = "-//W3C//DTD HTML 4.01//EN";
475 		dtd = "http://www.w3.org/TR/html4/strict.dtd";
476 		break;
477 	default:
478 		name = "html";
479 		doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";
480 		dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
481 		break;
482 	}
483 
484 	printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n",
485 			name, doctype, dtd);
486 }
487 
488 
489 void
490 print_text(struct html *h, const char *p)
491 {
492 
493 	if (*p && 0 == *(p + 1))
494 		switch (*p) {
495 		case('.'):
496 			/* FALLTHROUGH */
497 		case(','):
498 			/* FALLTHROUGH */
499 		case(';'):
500 			/* FALLTHROUGH */
501 		case(':'):
502 			/* FALLTHROUGH */
503 		case('?'):
504 			/* FALLTHROUGH */
505 		case('!'):
506 			/* FALLTHROUGH */
507 		case(')'):
508 			/* FALLTHROUGH */
509 		case(']'):
510 			/* FALLTHROUGH */
511 		case('}'):
512 			if ( ! (HTML_IGNDELIM & h->flags))
513 				h->flags |= HTML_NOSPACE;
514 			break;
515 		default:
516 			break;
517 		}
518 
519 	if ( ! (h->flags & HTML_NOSPACE))
520 		putchar(' ');
521 
522 	assert(p);
523 	if ( ! print_encode(h, p, 0))
524 		h->flags &= ~HTML_NOSPACE;
525 
526 	if (*p && 0 == *(p + 1))
527 		switch (*p) {
528 		case('('):
529 			/* FALLTHROUGH */
530 		case('['):
531 			/* FALLTHROUGH */
532 		case('{'):
533 			h->flags |= HTML_NOSPACE;
534 			break;
535 		default:
536 			break;
537 		}
538 }
539 
540 
541 void
542 print_tagq(struct html *h, const struct tag *until)
543 {
544 	struct tag	*tag;
545 
546 	while ((tag = h->tags.head) != NULL) {
547 		if (tag == h->metaf)
548 			h->metaf = NULL;
549 		print_ctag(h, tag->tag);
550 		h->tags.head = tag->next;
551 		free(tag);
552 		if (until && tag == until)
553 			return;
554 	}
555 }
556 
557 
558 void
559 print_stagq(struct html *h, const struct tag *suntil)
560 {
561 	struct tag	*tag;
562 
563 	while ((tag = h->tags.head) != NULL) {
564 		if (suntil && tag == suntil)
565 			return;
566 		if (tag == h->metaf)
567 			h->metaf = NULL;
568 		print_ctag(h, tag->tag);
569 		h->tags.head = tag->next;
570 		free(tag);
571 	}
572 }
573 
574 
575 void
576 bufinit(struct html *h)
577 {
578 
579 	h->buf[0] = '\0';
580 	h->buflen = 0;
581 }
582 
583 
584 void
585 bufcat_style(struct html *h, const char *key, const char *val)
586 {
587 
588 	bufcat(h, key);
589 	bufncat(h, ":", 1);
590 	bufcat(h, val);
591 	bufncat(h, ";", 1);
592 }
593 
594 
595 void
596 bufcat(struct html *h, const char *p)
597 {
598 
599 	bufncat(h, p, strlen(p));
600 }
601 
602 
603 void
604 buffmt(struct html *h, const char *fmt, ...)
605 {
606 	va_list		 ap;
607 
608 	va_start(ap, fmt);
609 	(void)vsnprintf(h->buf + (int)h->buflen,
610 			BUFSIZ - h->buflen - 1, fmt, ap);
611 	va_end(ap);
612 	h->buflen = strlen(h->buf);
613 }
614 
615 
616 void
617 bufncat(struct html *h, const char *p, size_t sz)
618 {
619 
620 	if (h->buflen + sz > BUFSIZ - 1)
621 		sz = BUFSIZ - 1 - h->buflen;
622 
623 	(void)strncat(h->buf, p, sz);
624 	h->buflen += sz;
625 }
626 
627 
628 void
629 buffmt_includes(struct html *h, const char *name)
630 {
631 	const char	*p, *pp;
632 
633 	pp = h->base_includes;
634 
635 	while (NULL != (p = strchr(pp, '%'))) {
636 		bufncat(h, pp, (size_t)(p - pp));
637 		switch (*(p + 1)) {
638 		case('I'):
639 			bufcat(h, name);
640 			break;
641 		default:
642 			bufncat(h, p, 2);
643 			break;
644 		}
645 		pp = p + 2;
646 	}
647 	if (pp)
648 		bufcat(h, pp);
649 }
650 
651 
652 void
653 buffmt_man(struct html *h,
654 		const char *name, const char *sec)
655 {
656 	const char	*p, *pp;
657 
658 	pp = h->base_man;
659 
660 	/* LINTED */
661 	while (NULL != (p = strchr(pp, '%'))) {
662 		bufncat(h, pp, (size_t)(p - pp));
663 		switch (*(p + 1)) {
664 		case('S'):
665 			bufcat(h, sec ? sec : "1");
666 			break;
667 		case('N'):
668 			buffmt(h, name);
669 			break;
670 		default:
671 			bufncat(h, p, 2);
672 			break;
673 		}
674 		pp = p + 2;
675 	}
676 	if (pp)
677 		bufcat(h, pp);
678 }
679 
680 
681 void
682 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
683 {
684 	double		 v;
685 	const char	*u;
686 
687 	v = su->scale;
688 
689 	switch (su->unit) {
690 	case (SCALE_CM):
691 		u = "cm";
692 		break;
693 	case (SCALE_IN):
694 		u = "in";
695 		break;
696 	case (SCALE_PC):
697 		u = "pc";
698 		break;
699 	case (SCALE_PT):
700 		u = "pt";
701 		break;
702 	case (SCALE_EM):
703 		u = "em";
704 		break;
705 	case (SCALE_MM):
706 		if (0 == (v /= 100))
707 			v = 1;
708 		u = "em";
709 		break;
710 	case (SCALE_EN):
711 		u = "ex";
712 		break;
713 	case (SCALE_BU):
714 		u = "ex";
715 		break;
716 	case (SCALE_VS):
717 		u = "em";
718 		break;
719 	default:
720 		u = "ex";
721 		break;
722 	}
723 
724 	if (su->pt)
725 		buffmt(h, "%s: %f%s;", p, v, u);
726 	else
727 		/* LINTED */
728 		buffmt(h, "%s: %d%s;", p, (int)v, u);
729 }
730 
731 
732 void
733 html_idcat(char *dst, const char *src, int sz)
734 {
735 	int		 ssz;
736 
737 	assert(sz);
738 
739 	/* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
740 
741 	for ( ; *dst != '\0' && sz; dst++, sz--)
742 		/* Jump to end. */ ;
743 
744 	assert(sz > 2);
745 
746 	/* We can't start with a number (bah). */
747 
748 	*dst++ = 'x';
749 	*dst = '\0';
750 	sz--;
751 
752 	for ( ; *src != '\0' && sz > 1; src++) {
753 		ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
754 		sz -= ssz;
755 		dst += ssz;
756 	}
757 }
758