xref: /openbsd-src/usr.bin/mandoc/html.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*	$OpenBSD: html.c,v 1.125 2019/04/30 15:52:42 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2011-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 
21 #include <assert.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stddef.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "out.h"
36 #include "html.h"
37 #include "manconf.h"
38 #include "main.h"
39 
40 struct	htmldata {
41 	const char	 *name;
42 	int		  flags;
43 #define	HTML_NOSTACK	 (1 << 0)
44 #define	HTML_AUTOCLOSE	 (1 << 1)
45 #define	HTML_NLBEFORE	 (1 << 2)
46 #define	HTML_NLBEGIN	 (1 << 3)
47 #define	HTML_NLEND	 (1 << 4)
48 #define	HTML_NLAFTER	 (1 << 5)
49 #define	HTML_NLAROUND	 (HTML_NLBEFORE | HTML_NLAFTER)
50 #define	HTML_NLINSIDE	 (HTML_NLBEGIN | HTML_NLEND)
51 #define	HTML_NLALL	 (HTML_NLAROUND | HTML_NLINSIDE)
52 #define	HTML_INDENT	 (1 << 6)
53 #define	HTML_NOINDENT	 (1 << 7)
54 };
55 
56 static	const struct htmldata htmltags[TAG_MAX] = {
57 	{"html",	HTML_NLALL},
58 	{"head",	HTML_NLALL | HTML_INDENT},
59 	{"body",	HTML_NLALL},
60 	{"meta",	HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
61 	{"title",	HTML_NLAROUND},
62 	{"div",		HTML_NLAROUND},
63 	{"div",		0},
64 	{"section",	HTML_NLALL},
65 	{"h1",		HTML_NLAROUND},
66 	{"h2",		HTML_NLAROUND},
67 	{"span",	0},
68 	{"link",	HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
69 	{"br",		HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
70 	{"a",		0},
71 	{"table",	HTML_NLALL | HTML_INDENT},
72 	{"tr",		HTML_NLALL | HTML_INDENT},
73 	{"td",		HTML_NLAROUND},
74 	{"li",		HTML_NLAROUND | HTML_INDENT},
75 	{"ul",		HTML_NLALL | HTML_INDENT},
76 	{"ol",		HTML_NLALL | HTML_INDENT},
77 	{"dl",		HTML_NLALL | HTML_INDENT},
78 	{"dt",		HTML_NLAROUND},
79 	{"dd",		HTML_NLAROUND | HTML_INDENT},
80 	{"p",		HTML_NLAROUND | HTML_INDENT},
81 	{"pre",		HTML_NLALL | HTML_NOINDENT},
82 	{"var",		0},
83 	{"cite",	0},
84 	{"b",		0},
85 	{"i",		0},
86 	{"code",	0},
87 	{"small",	0},
88 	{"style",	HTML_NLALL | HTML_INDENT},
89 	{"math",	HTML_NLALL | HTML_INDENT},
90 	{"mrow",	0},
91 	{"mi",		0},
92 	{"mn",		0},
93 	{"mo",		0},
94 	{"msup",	0},
95 	{"msub",	0},
96 	{"msubsup",	0},
97 	{"mfrac",	0},
98 	{"msqrt",	0},
99 	{"mfenced",	0},
100 	{"mtable",	0},
101 	{"mtr",		0},
102 	{"mtd",		0},
103 	{"munderover",	0},
104 	{"munder",	0},
105 	{"mover",	0},
106 };
107 
108 /* Avoid duplicate HTML id= attributes. */
109 static	struct ohash	 id_unique;
110 
111 static	void	 html_reset_internal(struct html *);
112 static	void	 print_byte(struct html *, char);
113 static	void	 print_endword(struct html *);
114 static	void	 print_indent(struct html *);
115 static	void	 print_word(struct html *, const char *);
116 
117 static	void	 print_ctag(struct html *, struct tag *);
118 static	int	 print_escape(struct html *, char);
119 static	int	 print_encode(struct html *, const char *, const char *, int);
120 static	void	 print_href(struct html *, const char *, const char *, int);
121 static	void	 print_metaf(struct html *);
122 
123 
124 void *
125 html_alloc(const struct manoutput *outopts)
126 {
127 	struct html	*h;
128 
129 	h = mandoc_calloc(1, sizeof(struct html));
130 
131 	h->tag = NULL;
132 	h->style = outopts->style;
133 	if ((h->base_man1 = outopts->man) == NULL)
134 		h->base_man2 = NULL;
135 	else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
136 		*h->base_man2++ = '\0';
137 	h->base_includes = outopts->includes;
138 	if (outopts->fragment)
139 		h->oflags |= HTML_FRAGMENT;
140 	if (outopts->toc)
141 		h->oflags |= HTML_TOC;
142 
143 	mandoc_ohash_init(&id_unique, 4, 0);
144 
145 	return h;
146 }
147 
148 static void
149 html_reset_internal(struct html *h)
150 {
151 	struct tag	*tag;
152 	char		*cp;
153 	unsigned int	 slot;
154 
155 	while ((tag = h->tag) != NULL) {
156 		h->tag = tag->next;
157 		free(tag);
158 	}
159 	cp = ohash_first(&id_unique, &slot);
160 	while (cp != NULL) {
161 		free(cp);
162 		cp = ohash_next(&id_unique, &slot);
163 	}
164 	ohash_delete(&id_unique);
165 }
166 
167 void
168 html_reset(void *p)
169 {
170 	html_reset_internal(p);
171 	mandoc_ohash_init(&id_unique, 4, 0);
172 }
173 
174 void
175 html_free(void *p)
176 {
177 	html_reset_internal(p);
178 	free(p);
179 }
180 
181 void
182 print_gen_head(struct html *h)
183 {
184 	struct tag	*t;
185 
186 	print_otag(h, TAG_META, "?", "charset", "utf-8");
187 	if (h->style != NULL) {
188 		print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
189 		    h->style, "type", "text/css", "media", "all");
190 		return;
191 	}
192 
193 	/*
194 	 * Print a minimal embedded style sheet.
195 	 */
196 
197 	t = print_otag(h, TAG_STYLE, "");
198 	print_text(h, "table.head, table.foot { width: 100%; }");
199 	print_endline(h);
200 	print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
201 	print_endline(h);
202 	print_text(h, "td.head-vol { text-align: center; }");
203 	print_endline(h);
204 	print_text(h, "div.Pp { margin: 1ex 0ex; }");
205 	print_endline(h);
206 	print_text(h, "div.Nd, div.Bf, div.Op { display: inline; }");
207 	print_endline(h);
208 	print_text(h, "span.Pa, span.Ad { font-style: italic; }");
209 	print_endline(h);
210 	print_text(h, "span.Ms { font-weight: bold; }");
211 	print_endline(h);
212 	print_text(h, "dl.Bl-diag ");
213 	print_byte(h, '>');
214 	print_text(h, " dt { font-weight: bold; }");
215 	print_endline(h);
216 	print_text(h, "code.Nm, code.Fl, code.Cm, code.Ic, "
217 	    "code.In, code.Fd, code.Fn,");
218 	print_endline(h);
219 	print_text(h, "code.Cd { font-weight: bold; "
220 	    "font-family: inherit; }");
221 	print_tagq(h, t);
222 }
223 
224 int
225 html_setfont(struct html *h, enum mandoc_esc font)
226 {
227 	switch (font) {
228 	case ESCAPE_FONTPREV:
229 		font = h->metal;
230 		break;
231 	case ESCAPE_FONTITALIC:
232 	case ESCAPE_FONTBOLD:
233 	case ESCAPE_FONTBI:
234 	case ESCAPE_FONTCW:
235 	case ESCAPE_FONTROMAN:
236 		break;
237 	case ESCAPE_FONT:
238 		font = ESCAPE_FONTROMAN;
239 		break;
240 	default:
241 		return 0;
242 	}
243 	h->metal = h->metac;
244 	h->metac = font;
245 	return 1;
246 }
247 
248 static void
249 print_metaf(struct html *h)
250 {
251 	if (h->metaf) {
252 		print_tagq(h, h->metaf);
253 		h->metaf = NULL;
254 	}
255 	switch (h->metac) {
256 	case ESCAPE_FONTITALIC:
257 		h->metaf = print_otag(h, TAG_I, "");
258 		break;
259 	case ESCAPE_FONTBOLD:
260 		h->metaf = print_otag(h, TAG_B, "");
261 		break;
262 	case ESCAPE_FONTBI:
263 		h->metaf = print_otag(h, TAG_B, "");
264 		print_otag(h, TAG_I, "");
265 		break;
266 	case ESCAPE_FONTCW:
267 		h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
268 		break;
269 	default:
270 		break;
271 	}
272 }
273 
274 void
275 html_close_paragraph(struct html *h)
276 {
277 	struct tag	*t;
278 
279 	for (t = h->tag; t != NULL && t->closed == 0; t = t->next) {
280 		switch(t->tag) {
281 		case TAG_P:
282 		case TAG_PRE:
283 			print_tagq(h, t);
284 			break;
285 		case TAG_A:
286 			print_tagq(h, t);
287 			continue;
288 		default:
289 			continue;
290 		}
291 		break;
292 	}
293 }
294 
295 /*
296  * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
297  * TOKEN_NONE does not switch.  The old mode is returned.
298  */
299 enum roff_tok
300 html_fillmode(struct html *h, enum roff_tok want)
301 {
302 	struct tag	*t;
303 	enum roff_tok	 had;
304 
305 	for (t = h->tag; t != NULL; t = t->next)
306 		if (t->tag == TAG_PRE)
307 			break;
308 
309 	had = t == NULL ? ROFF_fi : ROFF_nf;
310 
311 	if (want != had) {
312 		switch (want) {
313 		case ROFF_fi:
314 			print_tagq(h, t);
315 			break;
316 		case ROFF_nf:
317 			html_close_paragraph(h);
318 			print_otag(h, TAG_PRE, "");
319 			break;
320 		case TOKEN_NONE:
321 			break;
322 		default:
323 			abort();
324 		}
325 	}
326 	return had;
327 }
328 
329 char *
330 html_make_id(const struct roff_node *n, int unique)
331 {
332 	const struct roff_node	*nch;
333 	char			*buf, *bufs, *cp;
334 	unsigned int		 slot;
335 	int			 suffix;
336 
337 	for (nch = n->child; nch != NULL; nch = nch->next)
338 		if (nch->type != ROFFT_TEXT)
339 			return NULL;
340 
341 	buf = NULL;
342 	deroff(&buf, n);
343 	if (buf == NULL)
344 		return NULL;
345 
346 	/*
347 	 * In ID attributes, only use ASCII characters that are
348 	 * permitted in URL-fragment strings according to the
349 	 * explicit list at:
350 	 * https://url.spec.whatwg.org/#url-fragment-string
351 	 */
352 
353 	for (cp = buf; *cp != '\0'; cp++)
354 		if (isalnum((unsigned char)*cp) == 0 &&
355 		    strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL)
356 			*cp = '_';
357 
358 	if (unique == 0)
359 		return buf;
360 
361 	/* Avoid duplicate HTML id= attributes. */
362 
363 	bufs = NULL;
364 	suffix = 1;
365 	slot = ohash_qlookup(&id_unique, buf);
366 	cp = ohash_find(&id_unique, slot);
367 	if (cp != NULL) {
368 		while (cp != NULL) {
369 			free(bufs);
370 			if (++suffix > 127) {
371 				free(buf);
372 				return NULL;
373 			}
374 			mandoc_asprintf(&bufs, "%s_%d", buf, suffix);
375 			slot = ohash_qlookup(&id_unique, bufs);
376 			cp = ohash_find(&id_unique, slot);
377 		}
378 		free(buf);
379 		buf = bufs;
380 	}
381 	ohash_insert(&id_unique, slot, buf);
382 	return buf;
383 }
384 
385 static int
386 print_escape(struct html *h, char c)
387 {
388 
389 	switch (c) {
390 	case '<':
391 		print_word(h, "&lt;");
392 		break;
393 	case '>':
394 		print_word(h, "&gt;");
395 		break;
396 	case '&':
397 		print_word(h, "&amp;");
398 		break;
399 	case '"':
400 		print_word(h, "&quot;");
401 		break;
402 	case ASCII_NBRSP:
403 		print_word(h, "&nbsp;");
404 		break;
405 	case ASCII_HYPH:
406 		print_byte(h, '-');
407 		break;
408 	case ASCII_BREAK:
409 		break;
410 	default:
411 		return 0;
412 	}
413 	return 1;
414 }
415 
416 static int
417 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
418 {
419 	char		 numbuf[16];
420 	const char	*seq;
421 	size_t		 sz;
422 	int		 c, len, breakline, nospace;
423 	enum mandoc_esc	 esc;
424 	static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
425 		ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
426 
427 	if (pend == NULL)
428 		pend = strchr(p, '\0');
429 
430 	breakline = 0;
431 	nospace = 0;
432 
433 	while (p < pend) {
434 		if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
435 			h->flags &= ~HTML_SKIPCHAR;
436 			p++;
437 			continue;
438 		}
439 
440 		for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
441 			print_byte(h, *p);
442 
443 		if (breakline &&
444 		    (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
445 			print_otag(h, TAG_BR, "");
446 			breakline = 0;
447 			while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
448 				p++;
449 			continue;
450 		}
451 
452 		if (p >= pend)
453 			break;
454 
455 		if (*p == ' ') {
456 			print_endword(h);
457 			p++;
458 			continue;
459 		}
460 
461 		if (print_escape(h, *p++))
462 			continue;
463 
464 		esc = mandoc_escape(&p, &seq, &len);
465 		switch (esc) {
466 		case ESCAPE_FONT:
467 		case ESCAPE_FONTPREV:
468 		case ESCAPE_FONTBOLD:
469 		case ESCAPE_FONTITALIC:
470 		case ESCAPE_FONTBI:
471 		case ESCAPE_FONTCW:
472 		case ESCAPE_FONTROMAN:
473 			if (0 == norecurse) {
474 				h->flags |= HTML_NOSPACE;
475 				if (html_setfont(h, esc))
476 					print_metaf(h);
477 				h->flags &= ~HTML_NOSPACE;
478 			}
479 			continue;
480 		case ESCAPE_SKIPCHAR:
481 			h->flags |= HTML_SKIPCHAR;
482 			continue;
483 		case ESCAPE_ERROR:
484 			continue;
485 		default:
486 			break;
487 		}
488 
489 		if (h->flags & HTML_SKIPCHAR) {
490 			h->flags &= ~HTML_SKIPCHAR;
491 			continue;
492 		}
493 
494 		switch (esc) {
495 		case ESCAPE_UNICODE:
496 			/* Skip past "u" header. */
497 			c = mchars_num2uc(seq + 1, len - 1);
498 			break;
499 		case ESCAPE_NUMBERED:
500 			c = mchars_num2char(seq, len);
501 			if (c < 0)
502 				continue;
503 			break;
504 		case ESCAPE_SPECIAL:
505 			c = mchars_spec2cp(seq, len);
506 			if (c <= 0)
507 				continue;
508 			break;
509 		case ESCAPE_UNDEF:
510 			c = *seq;
511 			break;
512 		case ESCAPE_DEVICE:
513 			print_word(h, "html");
514 			continue;
515 		case ESCAPE_BREAK:
516 			breakline = 1;
517 			continue;
518 		case ESCAPE_NOSPACE:
519 			if ('\0' == *p)
520 				nospace = 1;
521 			continue;
522 		case ESCAPE_OVERSTRIKE:
523 			if (len == 0)
524 				continue;
525 			c = seq[len - 1];
526 			break;
527 		default:
528 			continue;
529 		}
530 		if ((c < 0x20 && c != 0x09) ||
531 		    (c > 0x7E && c < 0xA0))
532 			c = 0xFFFD;
533 		if (c > 0x7E) {
534 			(void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
535 			print_word(h, numbuf);
536 		} else if (print_escape(h, c) == 0)
537 			print_byte(h, c);
538 	}
539 
540 	return nospace;
541 }
542 
543 static void
544 print_href(struct html *h, const char *name, const char *sec, int man)
545 {
546 	struct stat	 sb;
547 	const char	*p, *pp;
548 	char		*filename;
549 
550 	if (man) {
551 		pp = h->base_man1;
552 		if (h->base_man2 != NULL) {
553 			mandoc_asprintf(&filename, "%s.%s", name, sec);
554 			if (stat(filename, &sb) == -1)
555 				pp = h->base_man2;
556 			free(filename);
557 		}
558 	} else
559 		pp = h->base_includes;
560 
561 	while ((p = strchr(pp, '%')) != NULL) {
562 		print_encode(h, pp, p, 1);
563 		if (man && p[1] == 'S') {
564 			if (sec == NULL)
565 				print_byte(h, '1');
566 			else
567 				print_encode(h, sec, NULL, 1);
568 		} else if ((man && p[1] == 'N') ||
569 		    (man == 0 && p[1] == 'I'))
570 			print_encode(h, name, NULL, 1);
571 		else
572 			print_encode(h, p, p + 2, 1);
573 		pp = p + 2;
574 	}
575 	if (*pp != '\0')
576 		print_encode(h, pp, NULL, 1);
577 }
578 
579 struct tag *
580 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
581 {
582 	va_list		 ap;
583 	struct tag	*t;
584 	const char	*attr;
585 	char		*arg1, *arg2;
586 	int		 style_written, tflags;
587 
588 	tflags = htmltags[tag].flags;
589 
590 	/* Push this tag onto the stack of open scopes. */
591 
592 	if ((tflags & HTML_NOSTACK) == 0) {
593 		t = mandoc_malloc(sizeof(struct tag));
594 		t->tag = tag;
595 		t->next = h->tag;
596 		t->refcnt = 0;
597 		t->closed = 0;
598 		h->tag = t;
599 	} else
600 		t = NULL;
601 
602 	if (tflags & HTML_NLBEFORE)
603 		print_endline(h);
604 	if (h->col == 0)
605 		print_indent(h);
606 	else if ((h->flags & HTML_NOSPACE) == 0) {
607 		if (h->flags & HTML_KEEP)
608 			print_word(h, "&#x00A0;");
609 		else {
610 			if (h->flags & HTML_PREKEEP)
611 				h->flags |= HTML_KEEP;
612 			print_endword(h);
613 		}
614 	}
615 
616 	if ( ! (h->flags & HTML_NONOSPACE))
617 		h->flags &= ~HTML_NOSPACE;
618 	else
619 		h->flags |= HTML_NOSPACE;
620 
621 	/* Print out the tag name and attributes. */
622 
623 	print_byte(h, '<');
624 	print_word(h, htmltags[tag].name);
625 
626 	va_start(ap, fmt);
627 
628 	while (*fmt != '\0' && *fmt != 's') {
629 
630 		/* Parse attributes and arguments. */
631 
632 		arg1 = va_arg(ap, char *);
633 		arg2 = NULL;
634 		switch (*fmt++) {
635 		case 'c':
636 			attr = "class";
637 			break;
638 		case 'h':
639 			attr = "href";
640 			break;
641 		case 'i':
642 			attr = "id";
643 			break;
644 		case '?':
645 			attr = arg1;
646 			arg1 = va_arg(ap, char *);
647 			break;
648 		default:
649 			abort();
650 		}
651 		if (*fmt == 'M')
652 			arg2 = va_arg(ap, char *);
653 		if (arg1 == NULL)
654 			continue;
655 
656 		/* Print the attributes. */
657 
658 		print_byte(h, ' ');
659 		print_word(h, attr);
660 		print_byte(h, '=');
661 		print_byte(h, '"');
662 		switch (*fmt) {
663 		case 'I':
664 			print_href(h, arg1, NULL, 0);
665 			fmt++;
666 			break;
667 		case 'M':
668 			print_href(h, arg1, arg2, 1);
669 			fmt++;
670 			break;
671 		case 'R':
672 			print_byte(h, '#');
673 			print_encode(h, arg1, NULL, 1);
674 			fmt++;
675 			break;
676 		default:
677 			print_encode(h, arg1, NULL, 1);
678 			break;
679 		}
680 		print_byte(h, '"');
681 	}
682 
683 	style_written = 0;
684 	while (*fmt++ == 's') {
685 		arg1 = va_arg(ap, char *);
686 		arg2 = va_arg(ap, char *);
687 		if (arg2 == NULL)
688 			continue;
689 		print_byte(h, ' ');
690 		if (style_written == 0) {
691 			print_word(h, "style=\"");
692 			style_written = 1;
693 		}
694 		print_word(h, arg1);
695 		print_byte(h, ':');
696 		print_byte(h, ' ');
697 		print_word(h, arg2);
698 		print_byte(h, ';');
699 	}
700 	if (style_written)
701 		print_byte(h, '"');
702 
703 	va_end(ap);
704 
705 	/* Accommodate for "well-formed" singleton escaping. */
706 
707 	if (HTML_AUTOCLOSE & htmltags[tag].flags)
708 		print_byte(h, '/');
709 
710 	print_byte(h, '>');
711 
712 	if (tflags & HTML_NLBEGIN)
713 		print_endline(h);
714 	else
715 		h->flags |= HTML_NOSPACE;
716 
717 	if (tflags & HTML_INDENT)
718 		h->indent++;
719 	if (tflags & HTML_NOINDENT)
720 		h->noindent++;
721 
722 	return t;
723 }
724 
725 static void
726 print_ctag(struct html *h, struct tag *tag)
727 {
728 	int	 tflags;
729 
730 	if (tag->closed == 0) {
731 		tag->closed = 1;
732 		if (tag == h->metaf)
733 			h->metaf = NULL;
734 		if (tag == h->tblt)
735 			h->tblt = NULL;
736 
737 		tflags = htmltags[tag->tag].flags;
738 		if (tflags & HTML_INDENT)
739 			h->indent--;
740 		if (tflags & HTML_NOINDENT)
741 			h->noindent--;
742 		if (tflags & HTML_NLEND)
743 			print_endline(h);
744 		print_indent(h);
745 		print_byte(h, '<');
746 		print_byte(h, '/');
747 		print_word(h, htmltags[tag->tag].name);
748 		print_byte(h, '>');
749 		if (tflags & HTML_NLAFTER)
750 			print_endline(h);
751 	}
752 	if (tag->refcnt == 0) {
753 		h->tag = tag->next;
754 		free(tag);
755 	}
756 }
757 
758 void
759 print_gen_decls(struct html *h)
760 {
761 	print_word(h, "<!DOCTYPE html>");
762 	print_endline(h);
763 }
764 
765 void
766 print_gen_comment(struct html *h, struct roff_node *n)
767 {
768 	int	 wantblank;
769 
770 	print_word(h, "<!-- This is an automatically generated file."
771 	    "  Do not edit.");
772 	h->indent = 1;
773 	wantblank = 0;
774 	while (n != NULL && n->type == ROFFT_COMMENT) {
775 		if (strstr(n->string, "-->") == NULL &&
776 		    (wantblank || *n->string != '\0')) {
777 			print_endline(h);
778 			print_indent(h);
779 			print_word(h, n->string);
780 			wantblank = *n->string != '\0';
781 		}
782 		n = n->next;
783 	}
784 	if (wantblank)
785 		print_endline(h);
786 	print_word(h, " -->");
787 	print_endline(h);
788 	h->indent = 0;
789 }
790 
791 void
792 print_text(struct html *h, const char *word)
793 {
794 	if (h->col && (h->flags & HTML_NOSPACE) == 0) {
795 		if ( ! (HTML_KEEP & h->flags)) {
796 			if (HTML_PREKEEP & h->flags)
797 				h->flags |= HTML_KEEP;
798 			print_endword(h);
799 		} else
800 			print_word(h, "&#x00A0;");
801 	}
802 
803 	assert(h->metaf == NULL);
804 	print_metaf(h);
805 	print_indent(h);
806 	if ( ! print_encode(h, word, NULL, 0)) {
807 		if ( ! (h->flags & HTML_NONOSPACE))
808 			h->flags &= ~HTML_NOSPACE;
809 		h->flags &= ~HTML_NONEWLINE;
810 	} else
811 		h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
812 
813 	if (h->metaf != NULL) {
814 		print_tagq(h, h->metaf);
815 		h->metaf = NULL;
816 	}
817 
818 	h->flags &= ~HTML_IGNDELIM;
819 }
820 
821 void
822 print_tagq(struct html *h, const struct tag *until)
823 {
824 	struct tag	*this, *next;
825 
826 	for (this = h->tag; this != NULL; this = next) {
827 		next = this == until ? NULL : this->next;
828 		print_ctag(h, this);
829 	}
830 }
831 
832 /*
833  * Close out all open elements up to but excluding suntil.
834  * Note that a paragraph just inside stays open together with it
835  * because paragraphs include subsequent phrasing content.
836  */
837 void
838 print_stagq(struct html *h, const struct tag *suntil)
839 {
840 	struct tag	*this, *next;
841 
842 	for (this = h->tag; this != NULL; this = next) {
843 		next = this->next;
844 		if (this == suntil || (next == suntil &&
845 		    (this->tag == TAG_P || this->tag == TAG_PRE)))
846 			break;
847 		print_ctag(h, this);
848 	}
849 }
850 
851 
852 /***********************************************************************
853  * Low level output functions.
854  * They implement line breaking using a short static buffer.
855  ***********************************************************************/
856 
857 /*
858  * Buffer one HTML output byte.
859  * If the buffer is full, flush and deactivate it and start a new line.
860  * If the buffer is inactive, print directly.
861  */
862 static void
863 print_byte(struct html *h, char c)
864 {
865 	if ((h->flags & HTML_BUFFER) == 0) {
866 		putchar(c);
867 		h->col++;
868 		return;
869 	}
870 
871 	if (h->col + h->bufcol < sizeof(h->buf)) {
872 		h->buf[h->bufcol++] = c;
873 		return;
874 	}
875 
876 	putchar('\n');
877 	h->col = 0;
878 	print_indent(h);
879 	putchar(' ');
880 	putchar(' ');
881 	fwrite(h->buf, h->bufcol, 1, stdout);
882 	putchar(c);
883 	h->col = (h->indent + 1) * 2 + h->bufcol + 1;
884 	h->bufcol = 0;
885 	h->flags &= ~HTML_BUFFER;
886 }
887 
888 /*
889  * If something was printed on the current output line, end it.
890  * Not to be called right after print_indent().
891  */
892 void
893 print_endline(struct html *h)
894 {
895 	if (h->col == 0)
896 		return;
897 
898 	if (h->bufcol) {
899 		putchar(' ');
900 		fwrite(h->buf, h->bufcol, 1, stdout);
901 		h->bufcol = 0;
902 	}
903 	putchar('\n');
904 	h->col = 0;
905 	h->flags |= HTML_NOSPACE;
906 	h->flags &= ~HTML_BUFFER;
907 }
908 
909 /*
910  * Flush the HTML output buffer.
911  * If it is inactive, activate it.
912  */
913 static void
914 print_endword(struct html *h)
915 {
916 	if (h->noindent) {
917 		print_byte(h, ' ');
918 		return;
919 	}
920 
921 	if ((h->flags & HTML_BUFFER) == 0) {
922 		h->col++;
923 		h->flags |= HTML_BUFFER;
924 	} else if (h->bufcol) {
925 		putchar(' ');
926 		fwrite(h->buf, h->bufcol, 1, stdout);
927 		h->col += h->bufcol + 1;
928 	}
929 	h->bufcol = 0;
930 }
931 
932 /*
933  * If at the beginning of a new output line,
934  * perform indentation and mark the line as containing output.
935  * Make sure to really produce some output right afterwards,
936  * but do not use print_otag() for producing it.
937  */
938 static void
939 print_indent(struct html *h)
940 {
941 	size_t	 i;
942 
943 	if (h->col)
944 		return;
945 
946 	if (h->noindent == 0) {
947 		h->col = h->indent * 2;
948 		for (i = 0; i < h->col; i++)
949 			putchar(' ');
950 	}
951 	h->flags &= ~HTML_NOSPACE;
952 }
953 
954 /*
955  * Print or buffer some characters
956  * depending on the current HTML output buffer state.
957  */
958 static void
959 print_word(struct html *h, const char *cp)
960 {
961 	while (*cp != '\0')
962 		print_byte(h, *cp++);
963 }
964