xref: /openbsd-src/usr.bin/mandoc/html.c (revision 7bbe964f6b7d22ad07ca46292495604f942eba4e)
1 /*	$Id: html.c,v 1.2 2009/10/27 21:40:07 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <sys/types.h>
18 
19 #include <assert.h>
20 #include <err.h>
21 #include <stdio.h>
22 #include <stdarg.h>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27 
28 #include "out.h"
29 #include "chars.h"
30 #include "html.h"
31 #include "main.h"
32 
33 #define	UNCONST(a)	((void *)(uintptr_t)(const void *)(a))
34 
35 #define	DOCTYPE		"-//W3C//DTD HTML 4.01//EN"
36 #define	DTD		"http://www.w3.org/TR/html4/strict.dtd"
37 
38 struct	htmldata {
39 	const char	 *name;
40 	int		  flags;
41 #define	HTML_CLRLINE	 (1 << 0)
42 #define	HTML_NOSTACK	 (1 << 1)
43 };
44 
45 static	const struct htmldata htmltags[TAG_MAX] = {
46 	{"html",	HTML_CLRLINE}, /* TAG_HTML */
47 	{"head",	HTML_CLRLINE}, /* TAG_HEAD */
48 	{"body",	HTML_CLRLINE}, /* TAG_BODY */
49 	{"meta",	HTML_CLRLINE | HTML_NOSTACK}, /* TAG_META */
50 	{"title",	HTML_CLRLINE}, /* TAG_TITLE */
51 	{"div",		HTML_CLRLINE}, /* TAG_DIV */
52 	{"h1",		0}, /* TAG_H1 */
53 	{"h2",		0}, /* TAG_H2 */
54 	{"p",		HTML_CLRLINE}, /* TAG_P */
55 	{"span",	0}, /* TAG_SPAN */
56 	{"link",	HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
57 	{"br",		HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
58 	{"a",		0}, /* TAG_A */
59 	{"table",	HTML_CLRLINE}, /* TAG_TABLE */
60 	{"col",		HTML_CLRLINE | HTML_NOSTACK}, /* TAG_COL */
61 	{"tr",		HTML_CLRLINE}, /* TAG_TR */
62 	{"td",		HTML_CLRLINE}, /* TAG_TD */
63 	{"li",		HTML_CLRLINE}, /* TAG_LI */
64 	{"ul",		HTML_CLRLINE}, /* TAG_UL */
65 	{"ol",		HTML_CLRLINE}, /* TAG_OL */
66 	{"base",	HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */
67 };
68 
69 static	const char	 *const htmlattrs[ATTR_MAX] = {
70 	"http-equiv",
71 	"content",
72 	"name",
73 	"rel",
74 	"href",
75 	"type",
76 	"media",
77 	"class",
78 	"style",
79 	"width",
80 	"valign",
81 	"target",
82 	"id",
83 };
84 
85 void *
86 html_alloc(char *outopts)
87 {
88 	struct html	*h;
89 	const char	*toks[4];
90 	char		*v;
91 
92 	toks[0] = "style";
93 	toks[1] = "man";
94 	toks[2] = "includes";
95 	toks[3] = NULL;
96 
97 	if (NULL == (h = calloc(1, sizeof(struct html))))
98 		return(NULL);
99 
100 	h->tags.head = NULL;
101 	h->ords.head = NULL;
102 
103 	if (NULL == (h->symtab = chars_init(CHARS_HTML))) {
104 		free(h);
105 		return(NULL);
106 	}
107 
108 	while (outopts && *outopts)
109 		switch (getsubopt(&outopts, UNCONST(toks), &v)) {
110 		case (0):
111 			h->style = v;
112 			break;
113 		case (1):
114 			h->base_man = v;
115 			break;
116 		case (2):
117 			h->base_includes = v;
118 			break;
119 		default:
120 			break;
121 		}
122 
123 	return(h);
124 }
125 
126 
127 void
128 html_free(void *p)
129 {
130 	struct tag	*tag;
131 	struct ord	*ord;
132 	struct html	*h;
133 
134 	h = (struct html *)p;
135 
136 	while ((ord = h->ords.head) != NULL) {
137 		h->ords.head = ord->next;
138 		free(ord);
139 	}
140 
141 	while ((tag = h->tags.head) != NULL) {
142 		h->tags.head = tag->next;
143 		free(tag);
144 	}
145 
146 	if (h->symtab)
147 		chars_free(h->symtab);
148 
149 	free(h);
150 }
151 
152 
153 void
154 print_gen_head(struct html *h)
155 {
156 	struct htmlpair	 tag[4];
157 
158 	tag[0].key = ATTR_HTTPEQUIV;
159 	tag[0].val = "Content-Type";
160 	tag[1].key = ATTR_CONTENT;
161 	tag[1].val = "text/html; charset=utf-8";
162 	print_otag(h, TAG_META, 2, tag);
163 
164 	tag[0].key = ATTR_NAME;
165 	tag[0].val = "resource-type";
166 	tag[1].key = ATTR_CONTENT;
167 	tag[1].val = "document";
168 	print_otag(h, TAG_META, 2, tag);
169 
170 	if (h->style) {
171 		tag[0].key = ATTR_REL;
172 		tag[0].val = "stylesheet";
173 		tag[1].key = ATTR_HREF;
174 		tag[1].val = h->style;
175 		tag[2].key = ATTR_TYPE;
176 		tag[2].val = "text/css";
177 		tag[3].key = ATTR_MEDIA;
178 		tag[3].val = "all";
179 		print_otag(h, TAG_LINK, 4, tag);
180 	}
181 }
182 
183 
184 static void
185 print_spec(struct html *h, const char *p, int len)
186 {
187 	const char	*rhs;
188 	int		 i;
189 	size_t		 sz;
190 
191 	rhs = chars_a2ascii(h->symtab, p, (size_t)len, &sz);
192 
193 	if (NULL == rhs)
194 		return;
195 	for (i = 0; i < (int)sz; i++)
196 		putchar(rhs[i]);
197 }
198 
199 
200 static void
201 print_res(struct html *h, const char *p, int len)
202 {
203 	const char	*rhs;
204 	int		 i;
205 	size_t		 sz;
206 
207 	rhs = chars_a2res(h->symtab, p, (size_t)len, &sz);
208 
209 	if (NULL == rhs)
210 		return;
211 	for (i = 0; i < (int)sz; i++)
212 		putchar(rhs[i]);
213 }
214 
215 
216 static void
217 print_escape(struct html *h, const char **p)
218 {
219 	int		 j, type;
220 	const char	*wp;
221 
222 	wp = *p;
223 	type = 1;
224 
225 	if (0 == *(++wp)) {
226 		*p = wp;
227 		return;
228 	}
229 
230 	if ('(' == *wp) {
231 		wp++;
232 		if (0 == *wp || 0 == *(wp + 1)) {
233 			*p = 0 == *wp ? wp : wp + 1;
234 			return;
235 		}
236 
237 		print_spec(h, wp, 2);
238 		*p = ++wp;
239 		return;
240 
241 	} else if ('*' == *wp) {
242 		if (0 == *(++wp)) {
243 			*p = wp;
244 			return;
245 		}
246 
247 		switch (*wp) {
248 		case ('('):
249 			wp++;
250 			if (0 == *wp || 0 == *(wp + 1)) {
251 				*p = 0 == *wp ? wp : wp + 1;
252 				return;
253 			}
254 
255 			print_res(h, wp, 2);
256 			*p = ++wp;
257 			return;
258 		case ('['):
259 			type = 0;
260 			break;
261 		default:
262 			print_res(h, wp, 1);
263 			*p = wp;
264 			return;
265 		}
266 
267 	} else if ('f' == *wp) {
268 		if (0 == *(++wp)) {
269 			*p = wp;
270 			return;
271 		}
272 
273 		switch (*wp) {
274 		case ('B'):
275 			/* TODO */
276 			break;
277 		case ('I'):
278 			/* TODO */
279 			break;
280 		case ('P'):
281 			/* FALLTHROUGH */
282 		case ('R'):
283 			/* TODO */
284 			break;
285 		default:
286 			break;
287 		}
288 
289 		*p = wp;
290 		return;
291 
292 	} else if ('[' != *wp) {
293 		print_spec(h, wp, 1);
294 		*p = wp;
295 		return;
296 	}
297 
298 	wp++;
299 	for (j = 0; *wp && ']' != *wp; wp++, j++)
300 		/* Loop... */ ;
301 
302 	if (0 == *wp) {
303 		*p = wp;
304 		return;
305 	}
306 
307 	if (type)
308 		print_spec(h, wp - j, j);
309 	else
310 		print_res(h, wp - j, j);
311 
312 	*p = wp;
313 }
314 
315 
316 static void
317 print_encode(struct html *h, const char *p)
318 {
319 
320 	for (; *p; p++) {
321 		if ('\\' == *p) {
322 			print_escape(h, &p);
323 			continue;
324 		}
325 		switch (*p) {
326 		case ('<'):
327 			printf("&lt;");
328 			break;
329 		case ('>'):
330 			printf("&gt;");
331 			break;
332 		case ('&'):
333 			printf("&amp;");
334 			break;
335 		default:
336 			putchar(*p);
337 			break;
338 		}
339 	}
340 }
341 
342 
343 struct tag *
344 print_otag(struct html *h, enum htmltag tag,
345 		int sz, const struct htmlpair *p)
346 {
347 	int		 i;
348 	struct tag	*t;
349 
350 	if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
351 		if (NULL == (t = malloc(sizeof(struct tag))))
352 			err(EXIT_FAILURE, "malloc");
353 		t->tag = tag;
354 		t->next = h->tags.head;
355 		h->tags.head = t;
356 	} else
357 		t = NULL;
358 
359 	if ( ! (HTML_NOSPACE & h->flags))
360 		if ( ! (HTML_CLRLINE & htmltags[tag].flags))
361 			printf(" ");
362 
363 	printf("<%s", htmltags[tag].name);
364 	for (i = 0; i < sz; i++) {
365 		printf(" %s=\"", htmlattrs[p[i].key]);
366 		assert(p->val);
367 		print_encode(h, p[i].val);
368 		printf("\"");
369 	}
370 	printf(">");
371 
372 	h->flags |= HTML_NOSPACE;
373 	if (HTML_CLRLINE & htmltags[tag].flags)
374 		h->flags |= HTML_NEWLINE;
375 	else
376 		h->flags &= ~HTML_NEWLINE;
377 
378 	return(t);
379 }
380 
381 
382 /* ARGSUSED */
383 static void
384 print_ctag(struct html *h, enum htmltag tag)
385 {
386 
387 	printf("</%s>", htmltags[tag].name);
388 	if (HTML_CLRLINE & htmltags[tag].flags)
389 		h->flags |= HTML_NOSPACE;
390 	if (HTML_CLRLINE & htmltags[tag].flags)
391 		h->flags |= HTML_NEWLINE;
392 	else
393 		h->flags &= ~HTML_NEWLINE;
394 }
395 
396 
397 /* ARGSUSED */
398 void
399 print_gen_doctype(struct html *h)
400 {
401 
402 	printf("<!DOCTYPE HTML PUBLIC \"%s\" \"%s\">", DOCTYPE, DTD);
403 }
404 
405 
406 void
407 print_text(struct html *h, const char *p)
408 {
409 
410 	if (*p && 0 == *(p + 1))
411 		switch (*p) {
412 		case('.'):
413 			/* FALLTHROUGH */
414 		case(','):
415 			/* FALLTHROUGH */
416 		case(';'):
417 			/* FALLTHROUGH */
418 		case(':'):
419 			/* FALLTHROUGH */
420 		case('?'):
421 			/* FALLTHROUGH */
422 		case('!'):
423 			/* FALLTHROUGH */
424 		case(')'):
425 			/* FALLTHROUGH */
426 		case(']'):
427 			/* FALLTHROUGH */
428 		case('}'):
429 			if ( ! (HTML_IGNDELIM & h->flags))
430 				h->flags |= HTML_NOSPACE;
431 			break;
432 		default:
433 			break;
434 		}
435 
436 	if ( ! (h->flags & HTML_NOSPACE))
437 		printf(" ");
438 
439 	h->flags &= ~HTML_NOSPACE;
440 	h->flags &= ~HTML_NEWLINE;
441 
442 	if (p)
443 		print_encode(h, p);
444 
445 	if (*p && 0 == *(p + 1))
446 		switch (*p) {
447 		case('('):
448 			/* FALLTHROUGH */
449 		case('['):
450 			/* FALLTHROUGH */
451 		case('{'):
452 			h->flags |= HTML_NOSPACE;
453 			break;
454 		default:
455 			break;
456 		}
457 }
458 
459 
460 void
461 print_tagq(struct html *h, const struct tag *until)
462 {
463 	struct tag	*tag;
464 
465 	while ((tag = h->tags.head) != NULL) {
466 		print_ctag(h, tag->tag);
467 		h->tags.head = tag->next;
468 		free(tag);
469 		if (until && tag == until)
470 			return;
471 	}
472 }
473 
474 
475 void
476 print_stagq(struct html *h, const struct tag *suntil)
477 {
478 	struct tag	*tag;
479 
480 	while ((tag = h->tags.head) != NULL) {
481 		if (suntil && tag == suntil)
482 			return;
483 		print_ctag(h, tag->tag);
484 		h->tags.head = tag->next;
485 		free(tag);
486 	}
487 }
488 
489 
490 void
491 bufinit(struct html *h)
492 {
493 
494 	h->buf[0] = '\0';
495 	h->buflen = 0;
496 }
497 
498 
499 void
500 bufcat_style(struct html *h, const char *key, const char *val)
501 {
502 
503 	bufcat(h, key);
504 	bufncat(h, ":", 1);
505 	bufcat(h, val);
506 	bufncat(h, ";", 1);
507 }
508 
509 
510 void
511 bufcat(struct html *h, const char *p)
512 {
513 
514 	bufncat(h, p, strlen(p));
515 }
516 
517 
518 void
519 buffmt(struct html *h, const char *fmt, ...)
520 {
521 	va_list		 ap;
522 
523 	va_start(ap, fmt);
524 	(void)vsnprintf(h->buf + (int)h->buflen,
525 			BUFSIZ - h->buflen - 1, fmt, ap);
526 	va_end(ap);
527 	h->buflen = strlen(h->buf);
528 }
529 
530 
531 void
532 bufncat(struct html *h, const char *p, size_t sz)
533 {
534 
535 	if (h->buflen + sz > BUFSIZ - 1)
536 		sz = BUFSIZ - 1 - h->buflen;
537 
538 	(void)strncat(h->buf, p, sz);
539 	h->buflen += sz;
540 }
541 
542 
543 void
544 buffmt_includes(struct html *h, const char *name)
545 {
546 	const char	*p, *pp;
547 
548 	pp = h->base_includes;
549 
550 	while (NULL != (p = strchr(pp, '%'))) {
551 		bufncat(h, pp, (size_t)(p - pp));
552 		switch (*(p + 1)) {
553 		case('I'):
554 			bufcat(h, name);
555 			break;
556 		default:
557 			bufncat(h, p, 2);
558 			break;
559 		}
560 		pp = p + 2;
561 	}
562 	if (pp)
563 		bufcat(h, pp);
564 }
565 
566 
567 void
568 buffmt_man(struct html *h,
569 		const char *name, const char *sec)
570 {
571 	const char	*p, *pp;
572 
573 	pp = h->base_man;
574 
575 	/* LINTED */
576 	while (NULL != (p = strchr(pp, '%'))) {
577 		bufncat(h, pp, (size_t)(p - pp));
578 		switch (*(p + 1)) {
579 		case('S'):
580 			bufcat(h, sec ? sec : "1");
581 			break;
582 		case('N'):
583 			buffmt(h, name);
584 			break;
585 		default:
586 			bufncat(h, p, 2);
587 			break;
588 		}
589 		pp = p + 2;
590 	}
591 	if (pp)
592 		bufcat(h, pp);
593 }
594 
595 
596 void
597 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
598 {
599 	double		 v;
600 	const char	*u;
601 
602 	v = su->scale;
603 
604 	switch (su->unit) {
605 	case (SCALE_CM):
606 		u = "cm";
607 		break;
608 	case (SCALE_IN):
609 		u = "in";
610 		break;
611 	case (SCALE_PC):
612 		u = "pc";
613 		break;
614 	case (SCALE_PT):
615 		u = "pt";
616 		break;
617 	case (SCALE_EM):
618 		u = "em";
619 		break;
620 	case (SCALE_MM):
621 		if (0 == (v /= 100))
622 			v = 1;
623 		u = "em";
624 		break;
625 	case (SCALE_EN):
626 		u = "ex";
627 		break;
628 	case (SCALE_BU):
629 		u = "ex";
630 		break;
631 	case (SCALE_VS):
632 		u = "em";
633 		break;
634 	default:
635 		u = "ex";
636 		break;
637 	}
638 
639 	if (su->pt)
640 		buffmt(h, "%s: %f%s;", p, v, u);
641 	else
642 		/* LINTED */
643 		buffmt(h, "%s: %d%s;", p, (int)v, u);
644 }
645 
646