xref: /openbsd-src/usr.bin/mandoc/html.c (revision 2584ca0b0c079044b412124fefd2e9be6e9a2447)
1 /*	$OpenBSD: html.c,v 1.124 2019/03/03 13:01:47 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2011-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 
21 #include <assert.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stddef.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "out.h"
36 #include "html.h"
37 #include "manconf.h"
38 #include "main.h"
39 
40 struct	htmldata {
41 	const char	 *name;
42 	int		  flags;
43 #define	HTML_NOSTACK	 (1 << 0)
44 #define	HTML_AUTOCLOSE	 (1 << 1)
45 #define	HTML_NLBEFORE	 (1 << 2)
46 #define	HTML_NLBEGIN	 (1 << 3)
47 #define	HTML_NLEND	 (1 << 4)
48 #define	HTML_NLAFTER	 (1 << 5)
49 #define	HTML_NLAROUND	 (HTML_NLBEFORE | HTML_NLAFTER)
50 #define	HTML_NLINSIDE	 (HTML_NLBEGIN | HTML_NLEND)
51 #define	HTML_NLALL	 (HTML_NLAROUND | HTML_NLINSIDE)
52 #define	HTML_INDENT	 (1 << 6)
53 #define	HTML_NOINDENT	 (1 << 7)
54 };
55 
56 static	const struct htmldata htmltags[TAG_MAX] = {
57 	{"html",	HTML_NLALL},
58 	{"head",	HTML_NLALL | HTML_INDENT},
59 	{"body",	HTML_NLALL},
60 	{"meta",	HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
61 	{"title",	HTML_NLAROUND},
62 	{"div",		HTML_NLAROUND},
63 	{"div",		0},
64 	{"section",	HTML_NLALL},
65 	{"h1",		HTML_NLAROUND},
66 	{"h2",		HTML_NLAROUND},
67 	{"span",	0},
68 	{"link",	HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
69 	{"br",		HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
70 	{"a",		0},
71 	{"table",	HTML_NLALL | HTML_INDENT},
72 	{"tr",		HTML_NLALL | HTML_INDENT},
73 	{"td",		HTML_NLAROUND},
74 	{"li",		HTML_NLAROUND | HTML_INDENT},
75 	{"ul",		HTML_NLALL | HTML_INDENT},
76 	{"ol",		HTML_NLALL | HTML_INDENT},
77 	{"dl",		HTML_NLALL | HTML_INDENT},
78 	{"dt",		HTML_NLAROUND},
79 	{"dd",		HTML_NLAROUND | HTML_INDENT},
80 	{"p",		HTML_NLAROUND | HTML_INDENT},
81 	{"pre",		HTML_NLALL | HTML_NOINDENT},
82 	{"var",		0},
83 	{"cite",	0},
84 	{"b",		0},
85 	{"i",		0},
86 	{"code",	0},
87 	{"small",	0},
88 	{"style",	HTML_NLALL | HTML_INDENT},
89 	{"math",	HTML_NLALL | HTML_INDENT},
90 	{"mrow",	0},
91 	{"mi",		0},
92 	{"mn",		0},
93 	{"mo",		0},
94 	{"msup",	0},
95 	{"msub",	0},
96 	{"msubsup",	0},
97 	{"mfrac",	0},
98 	{"msqrt",	0},
99 	{"mfenced",	0},
100 	{"mtable",	0},
101 	{"mtr",		0},
102 	{"mtd",		0},
103 	{"munderover",	0},
104 	{"munder",	0},
105 	{"mover",	0},
106 };
107 
108 /* Avoid duplicate HTML id= attributes. */
109 static	struct ohash	 id_unique;
110 
111 static	void	 html_reset_internal(struct html *);
112 static	void	 print_byte(struct html *, char);
113 static	void	 print_endword(struct html *);
114 static	void	 print_indent(struct html *);
115 static	void	 print_word(struct html *, const char *);
116 
117 static	void	 print_ctag(struct html *, struct tag *);
118 static	int	 print_escape(struct html *, char);
119 static	int	 print_encode(struct html *, const char *, const char *, int);
120 static	void	 print_href(struct html *, const char *, const char *, int);
121 
122 
123 void *
124 html_alloc(const struct manoutput *outopts)
125 {
126 	struct html	*h;
127 
128 	h = mandoc_calloc(1, sizeof(struct html));
129 
130 	h->tag = NULL;
131 	h->style = outopts->style;
132 	if ((h->base_man1 = outopts->man) == NULL)
133 		h->base_man2 = NULL;
134 	else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
135 		*h->base_man2++ = '\0';
136 	h->base_includes = outopts->includes;
137 	if (outopts->fragment)
138 		h->oflags |= HTML_FRAGMENT;
139 	if (outopts->toc)
140 		h->oflags |= HTML_TOC;
141 
142 	mandoc_ohash_init(&id_unique, 4, 0);
143 
144 	return h;
145 }
146 
147 static void
148 html_reset_internal(struct html *h)
149 {
150 	struct tag	*tag;
151 	char		*cp;
152 	unsigned int	 slot;
153 
154 	while ((tag = h->tag) != NULL) {
155 		h->tag = tag->next;
156 		free(tag);
157 	}
158 	cp = ohash_first(&id_unique, &slot);
159 	while (cp != NULL) {
160 		free(cp);
161 		cp = ohash_next(&id_unique, &slot);
162 	}
163 	ohash_delete(&id_unique);
164 }
165 
166 void
167 html_reset(void *p)
168 {
169 	html_reset_internal(p);
170 	mandoc_ohash_init(&id_unique, 4, 0);
171 }
172 
173 void
174 html_free(void *p)
175 {
176 	html_reset_internal(p);
177 	free(p);
178 }
179 
180 void
181 print_gen_head(struct html *h)
182 {
183 	struct tag	*t;
184 
185 	print_otag(h, TAG_META, "?", "charset", "utf-8");
186 	if (h->style != NULL) {
187 		print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
188 		    h->style, "type", "text/css", "media", "all");
189 		return;
190 	}
191 
192 	/*
193 	 * Print a minimal embedded style sheet.
194 	 */
195 
196 	t = print_otag(h, TAG_STYLE, "");
197 	print_text(h, "table.head, table.foot { width: 100%; }");
198 	print_endline(h);
199 	print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
200 	print_endline(h);
201 	print_text(h, "td.head-vol { text-align: center; }");
202 	print_endline(h);
203 	print_text(h, "div.Pp { margin: 1ex 0ex; }");
204 	print_endline(h);
205 	print_text(h, "div.Nd, div.Bf, div.Op { display: inline; }");
206 	print_endline(h);
207 	print_text(h, "span.Pa, span.Ad { font-style: italic; }");
208 	print_endline(h);
209 	print_text(h, "span.Ms { font-weight: bold; }");
210 	print_endline(h);
211 	print_text(h, "dl.Bl-diag ");
212 	print_byte(h, '>');
213 	print_text(h, " dt { font-weight: bold; }");
214 	print_endline(h);
215 	print_text(h, "code.Nm, code.Fl, code.Cm, code.Ic, "
216 	    "code.In, code.Fd, code.Fn,");
217 	print_endline(h);
218 	print_text(h, "code.Cd { font-weight: bold; "
219 	    "font-family: inherit; }");
220 	print_tagq(h, t);
221 }
222 
223 void
224 print_metaf(struct html *h, enum mandoc_esc deco)
225 {
226 	enum htmlfont	 font;
227 
228 	switch (deco) {
229 	case ESCAPE_FONTPREV:
230 		font = h->metal;
231 		break;
232 	case ESCAPE_FONTITALIC:
233 		font = HTMLFONT_ITALIC;
234 		break;
235 	case ESCAPE_FONTBOLD:
236 		font = HTMLFONT_BOLD;
237 		break;
238 	case ESCAPE_FONTBI:
239 		font = HTMLFONT_BI;
240 		break;
241 	case ESCAPE_FONTCW:
242 		font = HTMLFONT_CW;
243 		break;
244 	case ESCAPE_FONT:
245 	case ESCAPE_FONTROMAN:
246 		font = HTMLFONT_NONE;
247 		break;
248 	default:
249 		return;
250 	}
251 
252 	if (h->metaf) {
253 		print_tagq(h, h->metaf);
254 		h->metaf = NULL;
255 	}
256 
257 	h->metal = h->metac;
258 	h->metac = font;
259 
260 	switch (font) {
261 	case HTMLFONT_ITALIC:
262 		h->metaf = print_otag(h, TAG_I, "");
263 		break;
264 	case HTMLFONT_BOLD:
265 		h->metaf = print_otag(h, TAG_B, "");
266 		break;
267 	case HTMLFONT_BI:
268 		h->metaf = print_otag(h, TAG_B, "");
269 		print_otag(h, TAG_I, "");
270 		break;
271 	case HTMLFONT_CW:
272 		h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
273 		break;
274 	default:
275 		break;
276 	}
277 }
278 
279 void
280 html_close_paragraph(struct html *h)
281 {
282 	struct tag	*t;
283 
284 	for (t = h->tag; t != NULL && t->closed == 0; t = t->next) {
285 		switch(t->tag) {
286 		case TAG_P:
287 		case TAG_PRE:
288 			print_tagq(h, t);
289 			break;
290 		case TAG_A:
291 			print_tagq(h, t);
292 			continue;
293 		default:
294 			continue;
295 		}
296 		break;
297 	}
298 }
299 
300 /*
301  * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
302  * TOKEN_NONE does not switch.  The old mode is returned.
303  */
304 enum roff_tok
305 html_fillmode(struct html *h, enum roff_tok want)
306 {
307 	struct tag	*t;
308 	enum roff_tok	 had;
309 
310 	for (t = h->tag; t != NULL; t = t->next)
311 		if (t->tag == TAG_PRE)
312 			break;
313 
314 	had = t == NULL ? ROFF_fi : ROFF_nf;
315 
316 	if (want != had) {
317 		switch (want) {
318 		case ROFF_fi:
319 			print_tagq(h, t);
320 			break;
321 		case ROFF_nf:
322 			html_close_paragraph(h);
323 			print_otag(h, TAG_PRE, "");
324 			break;
325 		case TOKEN_NONE:
326 			break;
327 		default:
328 			abort();
329 		}
330 	}
331 	return had;
332 }
333 
334 char *
335 html_make_id(const struct roff_node *n, int unique)
336 {
337 	const struct roff_node	*nch;
338 	char			*buf, *bufs, *cp;
339 	unsigned int		 slot;
340 	int			 suffix;
341 
342 	for (nch = n->child; nch != NULL; nch = nch->next)
343 		if (nch->type != ROFFT_TEXT)
344 			return NULL;
345 
346 	buf = NULL;
347 	deroff(&buf, n);
348 	if (buf == NULL)
349 		return NULL;
350 
351 	/*
352 	 * In ID attributes, only use ASCII characters that are
353 	 * permitted in URL-fragment strings according to the
354 	 * explicit list at:
355 	 * https://url.spec.whatwg.org/#url-fragment-string
356 	 */
357 
358 	for (cp = buf; *cp != '\0'; cp++)
359 		if (isalnum((unsigned char)*cp) == 0 &&
360 		    strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL)
361 			*cp = '_';
362 
363 	if (unique == 0)
364 		return buf;
365 
366 	/* Avoid duplicate HTML id= attributes. */
367 
368 	bufs = NULL;
369 	suffix = 1;
370 	slot = ohash_qlookup(&id_unique, buf);
371 	cp = ohash_find(&id_unique, slot);
372 	if (cp != NULL) {
373 		while (cp != NULL) {
374 			free(bufs);
375 			if (++suffix > 127) {
376 				free(buf);
377 				return NULL;
378 			}
379 			mandoc_asprintf(&bufs, "%s_%d", buf, suffix);
380 			slot = ohash_qlookup(&id_unique, bufs);
381 			cp = ohash_find(&id_unique, slot);
382 		}
383 		free(buf);
384 		buf = bufs;
385 	}
386 	ohash_insert(&id_unique, slot, buf);
387 	return buf;
388 }
389 
390 static int
391 print_escape(struct html *h, char c)
392 {
393 
394 	switch (c) {
395 	case '<':
396 		print_word(h, "&lt;");
397 		break;
398 	case '>':
399 		print_word(h, "&gt;");
400 		break;
401 	case '&':
402 		print_word(h, "&amp;");
403 		break;
404 	case '"':
405 		print_word(h, "&quot;");
406 		break;
407 	case ASCII_NBRSP:
408 		print_word(h, "&nbsp;");
409 		break;
410 	case ASCII_HYPH:
411 		print_byte(h, '-');
412 		break;
413 	case ASCII_BREAK:
414 		break;
415 	default:
416 		return 0;
417 	}
418 	return 1;
419 }
420 
421 static int
422 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
423 {
424 	char		 numbuf[16];
425 	const char	*seq;
426 	size_t		 sz;
427 	int		 c, len, breakline, nospace;
428 	enum mandoc_esc	 esc;
429 	static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
430 		ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
431 
432 	if (pend == NULL)
433 		pend = strchr(p, '\0');
434 
435 	breakline = 0;
436 	nospace = 0;
437 
438 	while (p < pend) {
439 		if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
440 			h->flags &= ~HTML_SKIPCHAR;
441 			p++;
442 			continue;
443 		}
444 
445 		for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
446 			print_byte(h, *p);
447 
448 		if (breakline &&
449 		    (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
450 			print_otag(h, TAG_BR, "");
451 			breakline = 0;
452 			while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
453 				p++;
454 			continue;
455 		}
456 
457 		if (p >= pend)
458 			break;
459 
460 		if (*p == ' ') {
461 			print_endword(h);
462 			p++;
463 			continue;
464 		}
465 
466 		if (print_escape(h, *p++))
467 			continue;
468 
469 		esc = mandoc_escape(&p, &seq, &len);
470 		switch (esc) {
471 		case ESCAPE_FONT:
472 		case ESCAPE_FONTPREV:
473 		case ESCAPE_FONTBOLD:
474 		case ESCAPE_FONTITALIC:
475 		case ESCAPE_FONTBI:
476 		case ESCAPE_FONTCW:
477 		case ESCAPE_FONTROMAN:
478 			if (0 == norecurse) {
479 				h->flags |= HTML_NOSPACE;
480 				print_metaf(h, esc);
481 				h->flags &= ~HTML_NOSPACE;
482 			}
483 			continue;
484 		case ESCAPE_SKIPCHAR:
485 			h->flags |= HTML_SKIPCHAR;
486 			continue;
487 		case ESCAPE_ERROR:
488 			continue;
489 		default:
490 			break;
491 		}
492 
493 		if (h->flags & HTML_SKIPCHAR) {
494 			h->flags &= ~HTML_SKIPCHAR;
495 			continue;
496 		}
497 
498 		switch (esc) {
499 		case ESCAPE_UNICODE:
500 			/* Skip past "u" header. */
501 			c = mchars_num2uc(seq + 1, len - 1);
502 			break;
503 		case ESCAPE_NUMBERED:
504 			c = mchars_num2char(seq, len);
505 			if (c < 0)
506 				continue;
507 			break;
508 		case ESCAPE_SPECIAL:
509 			c = mchars_spec2cp(seq, len);
510 			if (c <= 0)
511 				continue;
512 			break;
513 		case ESCAPE_UNDEF:
514 			c = *seq;
515 			break;
516 		case ESCAPE_DEVICE:
517 			print_word(h, "html");
518 			continue;
519 		case ESCAPE_BREAK:
520 			breakline = 1;
521 			continue;
522 		case ESCAPE_NOSPACE:
523 			if ('\0' == *p)
524 				nospace = 1;
525 			continue;
526 		case ESCAPE_OVERSTRIKE:
527 			if (len == 0)
528 				continue;
529 			c = seq[len - 1];
530 			break;
531 		default:
532 			continue;
533 		}
534 		if ((c < 0x20 && c != 0x09) ||
535 		    (c > 0x7E && c < 0xA0))
536 			c = 0xFFFD;
537 		if (c > 0x7E) {
538 			(void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
539 			print_word(h, numbuf);
540 		} else if (print_escape(h, c) == 0)
541 			print_byte(h, c);
542 	}
543 
544 	return nospace;
545 }
546 
547 static void
548 print_href(struct html *h, const char *name, const char *sec, int man)
549 {
550 	struct stat	 sb;
551 	const char	*p, *pp;
552 	char		*filename;
553 
554 	if (man) {
555 		pp = h->base_man1;
556 		if (h->base_man2 != NULL) {
557 			mandoc_asprintf(&filename, "%s.%s", name, sec);
558 			if (stat(filename, &sb) == -1)
559 				pp = h->base_man2;
560 			free(filename);
561 		}
562 	} else
563 		pp = h->base_includes;
564 
565 	while ((p = strchr(pp, '%')) != NULL) {
566 		print_encode(h, pp, p, 1);
567 		if (man && p[1] == 'S') {
568 			if (sec == NULL)
569 				print_byte(h, '1');
570 			else
571 				print_encode(h, sec, NULL, 1);
572 		} else if ((man && p[1] == 'N') ||
573 		    (man == 0 && p[1] == 'I'))
574 			print_encode(h, name, NULL, 1);
575 		else
576 			print_encode(h, p, p + 2, 1);
577 		pp = p + 2;
578 	}
579 	if (*pp != '\0')
580 		print_encode(h, pp, NULL, 1);
581 }
582 
583 struct tag *
584 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
585 {
586 	va_list		 ap;
587 	struct tag	*t;
588 	const char	*attr;
589 	char		*arg1, *arg2;
590 	int		 style_written, tflags;
591 
592 	tflags = htmltags[tag].flags;
593 
594 	/* Push this tag onto the stack of open scopes. */
595 
596 	if ((tflags & HTML_NOSTACK) == 0) {
597 		t = mandoc_malloc(sizeof(struct tag));
598 		t->tag = tag;
599 		t->next = h->tag;
600 		t->refcnt = 0;
601 		t->closed = 0;
602 		h->tag = t;
603 	} else
604 		t = NULL;
605 
606 	if (tflags & HTML_NLBEFORE)
607 		print_endline(h);
608 	if (h->col == 0)
609 		print_indent(h);
610 	else if ((h->flags & HTML_NOSPACE) == 0) {
611 		if (h->flags & HTML_KEEP)
612 			print_word(h, "&#x00A0;");
613 		else {
614 			if (h->flags & HTML_PREKEEP)
615 				h->flags |= HTML_KEEP;
616 			print_endword(h);
617 		}
618 	}
619 
620 	if ( ! (h->flags & HTML_NONOSPACE))
621 		h->flags &= ~HTML_NOSPACE;
622 	else
623 		h->flags |= HTML_NOSPACE;
624 
625 	/* Print out the tag name and attributes. */
626 
627 	print_byte(h, '<');
628 	print_word(h, htmltags[tag].name);
629 
630 	va_start(ap, fmt);
631 
632 	while (*fmt != '\0' && *fmt != 's') {
633 
634 		/* Parse attributes and arguments. */
635 
636 		arg1 = va_arg(ap, char *);
637 		arg2 = NULL;
638 		switch (*fmt++) {
639 		case 'c':
640 			attr = "class";
641 			break;
642 		case 'h':
643 			attr = "href";
644 			break;
645 		case 'i':
646 			attr = "id";
647 			break;
648 		case '?':
649 			attr = arg1;
650 			arg1 = va_arg(ap, char *);
651 			break;
652 		default:
653 			abort();
654 		}
655 		if (*fmt == 'M')
656 			arg2 = va_arg(ap, char *);
657 		if (arg1 == NULL)
658 			continue;
659 
660 		/* Print the attributes. */
661 
662 		print_byte(h, ' ');
663 		print_word(h, attr);
664 		print_byte(h, '=');
665 		print_byte(h, '"');
666 		switch (*fmt) {
667 		case 'I':
668 			print_href(h, arg1, NULL, 0);
669 			fmt++;
670 			break;
671 		case 'M':
672 			print_href(h, arg1, arg2, 1);
673 			fmt++;
674 			break;
675 		case 'R':
676 			print_byte(h, '#');
677 			print_encode(h, arg1, NULL, 1);
678 			fmt++;
679 			break;
680 		default:
681 			print_encode(h, arg1, NULL, 1);
682 			break;
683 		}
684 		print_byte(h, '"');
685 	}
686 
687 	style_written = 0;
688 	while (*fmt++ == 's') {
689 		arg1 = va_arg(ap, char *);
690 		arg2 = va_arg(ap, char *);
691 		if (arg2 == NULL)
692 			continue;
693 		print_byte(h, ' ');
694 		if (style_written == 0) {
695 			print_word(h, "style=\"");
696 			style_written = 1;
697 		}
698 		print_word(h, arg1);
699 		print_byte(h, ':');
700 		print_byte(h, ' ');
701 		print_word(h, arg2);
702 		print_byte(h, ';');
703 	}
704 	if (style_written)
705 		print_byte(h, '"');
706 
707 	va_end(ap);
708 
709 	/* Accommodate for "well-formed" singleton escaping. */
710 
711 	if (HTML_AUTOCLOSE & htmltags[tag].flags)
712 		print_byte(h, '/');
713 
714 	print_byte(h, '>');
715 
716 	if (tflags & HTML_NLBEGIN)
717 		print_endline(h);
718 	else
719 		h->flags |= HTML_NOSPACE;
720 
721 	if (tflags & HTML_INDENT)
722 		h->indent++;
723 	if (tflags & HTML_NOINDENT)
724 		h->noindent++;
725 
726 	return t;
727 }
728 
729 static void
730 print_ctag(struct html *h, struct tag *tag)
731 {
732 	int	 tflags;
733 
734 	if (tag->closed == 0) {
735 		tag->closed = 1;
736 		if (tag == h->metaf)
737 			h->metaf = NULL;
738 		if (tag == h->tblt)
739 			h->tblt = NULL;
740 
741 		tflags = htmltags[tag->tag].flags;
742 		if (tflags & HTML_INDENT)
743 			h->indent--;
744 		if (tflags & HTML_NOINDENT)
745 			h->noindent--;
746 		if (tflags & HTML_NLEND)
747 			print_endline(h);
748 		print_indent(h);
749 		print_byte(h, '<');
750 		print_byte(h, '/');
751 		print_word(h, htmltags[tag->tag].name);
752 		print_byte(h, '>');
753 		if (tflags & HTML_NLAFTER)
754 			print_endline(h);
755 	}
756 	if (tag->refcnt == 0) {
757 		h->tag = tag->next;
758 		free(tag);
759 	}
760 }
761 
762 void
763 print_gen_decls(struct html *h)
764 {
765 	print_word(h, "<!DOCTYPE html>");
766 	print_endline(h);
767 }
768 
769 void
770 print_gen_comment(struct html *h, struct roff_node *n)
771 {
772 	int	 wantblank;
773 
774 	print_word(h, "<!-- This is an automatically generated file."
775 	    "  Do not edit.");
776 	h->indent = 1;
777 	wantblank = 0;
778 	while (n != NULL && n->type == ROFFT_COMMENT) {
779 		if (strstr(n->string, "-->") == NULL &&
780 		    (wantblank || *n->string != '\0')) {
781 			print_endline(h);
782 			print_indent(h);
783 			print_word(h, n->string);
784 			wantblank = *n->string != '\0';
785 		}
786 		n = n->next;
787 	}
788 	if (wantblank)
789 		print_endline(h);
790 	print_word(h, " -->");
791 	print_endline(h);
792 	h->indent = 0;
793 }
794 
795 void
796 print_text(struct html *h, const char *word)
797 {
798 	if (h->col && (h->flags & HTML_NOSPACE) == 0) {
799 		if ( ! (HTML_KEEP & h->flags)) {
800 			if (HTML_PREKEEP & h->flags)
801 				h->flags |= HTML_KEEP;
802 			print_endword(h);
803 		} else
804 			print_word(h, "&#x00A0;");
805 	}
806 
807 	assert(NULL == h->metaf);
808 	switch (h->metac) {
809 	case HTMLFONT_ITALIC:
810 		h->metaf = print_otag(h, TAG_I, "");
811 		break;
812 	case HTMLFONT_BOLD:
813 		h->metaf = print_otag(h, TAG_B, "");
814 		break;
815 	case HTMLFONT_BI:
816 		h->metaf = print_otag(h, TAG_B, "");
817 		print_otag(h, TAG_I, "");
818 		break;
819 	case HTMLFONT_CW:
820 		h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
821 		break;
822 	default:
823 		print_indent(h);
824 		break;
825 	}
826 
827 	assert(word);
828 	if ( ! print_encode(h, word, NULL, 0)) {
829 		if ( ! (h->flags & HTML_NONOSPACE))
830 			h->flags &= ~HTML_NOSPACE;
831 		h->flags &= ~HTML_NONEWLINE;
832 	} else
833 		h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
834 
835 	if (h->metaf) {
836 		print_tagq(h, h->metaf);
837 		h->metaf = NULL;
838 	}
839 
840 	h->flags &= ~HTML_IGNDELIM;
841 }
842 
843 void
844 print_tagq(struct html *h, const struct tag *until)
845 {
846 	struct tag	*this, *next;
847 
848 	for (this = h->tag; this != NULL; this = next) {
849 		next = this == until ? NULL : this->next;
850 		print_ctag(h, this);
851 	}
852 }
853 
854 /*
855  * Close out all open elements up to but excluding suntil.
856  * Note that a paragraph just inside stays open together with it
857  * because paragraphs include subsequent phrasing content.
858  */
859 void
860 print_stagq(struct html *h, const struct tag *suntil)
861 {
862 	struct tag	*this, *next;
863 
864 	for (this = h->tag; this != NULL; this = next) {
865 		next = this->next;
866 		if (this == suntil || (next == suntil &&
867 		    (this->tag == TAG_P || this->tag == TAG_PRE)))
868 			break;
869 		print_ctag(h, this);
870 	}
871 }
872 
873 
874 /***********************************************************************
875  * Low level output functions.
876  * They implement line breaking using a short static buffer.
877  ***********************************************************************/
878 
879 /*
880  * Buffer one HTML output byte.
881  * If the buffer is full, flush and deactivate it and start a new line.
882  * If the buffer is inactive, print directly.
883  */
884 static void
885 print_byte(struct html *h, char c)
886 {
887 	if ((h->flags & HTML_BUFFER) == 0) {
888 		putchar(c);
889 		h->col++;
890 		return;
891 	}
892 
893 	if (h->col + h->bufcol < sizeof(h->buf)) {
894 		h->buf[h->bufcol++] = c;
895 		return;
896 	}
897 
898 	putchar('\n');
899 	h->col = 0;
900 	print_indent(h);
901 	putchar(' ');
902 	putchar(' ');
903 	fwrite(h->buf, h->bufcol, 1, stdout);
904 	putchar(c);
905 	h->col = (h->indent + 1) * 2 + h->bufcol + 1;
906 	h->bufcol = 0;
907 	h->flags &= ~HTML_BUFFER;
908 }
909 
910 /*
911  * If something was printed on the current output line, end it.
912  * Not to be called right after print_indent().
913  */
914 void
915 print_endline(struct html *h)
916 {
917 	if (h->col == 0)
918 		return;
919 
920 	if (h->bufcol) {
921 		putchar(' ');
922 		fwrite(h->buf, h->bufcol, 1, stdout);
923 		h->bufcol = 0;
924 	}
925 	putchar('\n');
926 	h->col = 0;
927 	h->flags |= HTML_NOSPACE;
928 	h->flags &= ~HTML_BUFFER;
929 }
930 
931 /*
932  * Flush the HTML output buffer.
933  * If it is inactive, activate it.
934  */
935 static void
936 print_endword(struct html *h)
937 {
938 	if (h->noindent) {
939 		print_byte(h, ' ');
940 		return;
941 	}
942 
943 	if ((h->flags & HTML_BUFFER) == 0) {
944 		h->col++;
945 		h->flags |= HTML_BUFFER;
946 	} else if (h->bufcol) {
947 		putchar(' ');
948 		fwrite(h->buf, h->bufcol, 1, stdout);
949 		h->col += h->bufcol + 1;
950 	}
951 	h->bufcol = 0;
952 }
953 
954 /*
955  * If at the beginning of a new output line,
956  * perform indentation and mark the line as containing output.
957  * Make sure to really produce some output right afterwards,
958  * but do not use print_otag() for producing it.
959  */
960 static void
961 print_indent(struct html *h)
962 {
963 	size_t	 i;
964 
965 	if (h->col)
966 		return;
967 
968 	if (h->noindent == 0) {
969 		h->col = h->indent * 2;
970 		for (i = 0; i < h->col; i++)
971 			putchar(' ');
972 	}
973 	h->flags &= ~HTML_NOSPACE;
974 }
975 
976 /*
977  * Print or buffer some characters
978  * depending on the current HTML output buffer state.
979  */
980 static void
981 print_word(struct html *h, const char *cp)
982 {
983 	while (*cp != '\0')
984 		print_byte(h, *cp++);
985 }
986