xref: /openbsd-src/usr.bin/mandoc/html.c (revision 46035553bfdd96e63c94e32da0210227ec2e3cf1)
1 /* $OpenBSD: html.c,v 1.142 2020/10/16 17:22:38 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Common functions for mandoc(1) HTML formatters.
19  * For use by individual formatters and by the main program.
20  */
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <stdarg.h>
27 #include <stddef.h>
28 #include <stdio.h>
29 #include <stdint.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33 
34 #include "mandoc_aux.h"
35 #include "mandoc_ohash.h"
36 #include "mandoc.h"
37 #include "roff.h"
38 #include "out.h"
39 #include "html.h"
40 #include "manconf.h"
41 #include "main.h"
42 
43 struct	htmldata {
44 	const char	 *name;
45 	int		  flags;
46 #define	HTML_INPHRASE	 (1 << 0)  /* Can appear in phrasing context. */
47 #define	HTML_TOPHRASE	 (1 << 1)  /* Establishes phrasing context. */
48 #define	HTML_NOSTACK	 (1 << 2)  /* Does not have an end tag. */
49 #define	HTML_NLBEFORE	 (1 << 3)  /* Output line break before opening. */
50 #define	HTML_NLBEGIN	 (1 << 4)  /* Output line break after opening. */
51 #define	HTML_NLEND	 (1 << 5)  /* Output line break before closing. */
52 #define	HTML_NLAFTER	 (1 << 6)  /* Output line break after closing. */
53 #define	HTML_NLAROUND	 (HTML_NLBEFORE | HTML_NLAFTER)
54 #define	HTML_NLINSIDE	 (HTML_NLBEGIN | HTML_NLEND)
55 #define	HTML_NLALL	 (HTML_NLAROUND | HTML_NLINSIDE)
56 #define	HTML_INDENT	 (1 << 7)  /* Indent content by two spaces. */
57 #define	HTML_NOINDENT	 (1 << 8)  /* Exception: never indent content. */
58 };
59 
60 static	const struct htmldata htmltags[TAG_MAX] = {
61 	{"html",	HTML_NLALL},
62 	{"head",	HTML_NLALL | HTML_INDENT},
63 	{"meta",	HTML_NOSTACK | HTML_NLALL},
64 	{"link",	HTML_NOSTACK | HTML_NLALL},
65 	{"style",	HTML_NLALL | HTML_INDENT},
66 	{"title",	HTML_NLAROUND},
67 	{"body",	HTML_NLALL},
68 	{"div",		HTML_NLAROUND},
69 	{"section",	HTML_NLALL},
70 	{"table",	HTML_NLALL | HTML_INDENT},
71 	{"tr",		HTML_NLALL | HTML_INDENT},
72 	{"td",		HTML_NLAROUND},
73 	{"li",		HTML_NLAROUND | HTML_INDENT},
74 	{"ul",		HTML_NLALL | HTML_INDENT},
75 	{"ol",		HTML_NLALL | HTML_INDENT},
76 	{"dl",		HTML_NLALL | HTML_INDENT},
77 	{"dt",		HTML_NLAROUND},
78 	{"dd",		HTML_NLAROUND | HTML_INDENT},
79 	{"h1",		HTML_TOPHRASE | HTML_NLAROUND},
80 	{"h2",		HTML_TOPHRASE | HTML_NLAROUND},
81 	{"p",		HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
82 	{"pre",		HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT},
83 	{"a",		HTML_INPHRASE | HTML_TOPHRASE},
84 	{"b",		HTML_INPHRASE | HTML_TOPHRASE},
85 	{"cite",	HTML_INPHRASE | HTML_TOPHRASE},
86 	{"code",	HTML_INPHRASE | HTML_TOPHRASE},
87 	{"i",		HTML_INPHRASE | HTML_TOPHRASE},
88 	{"small",	HTML_INPHRASE | HTML_TOPHRASE},
89 	{"span",	HTML_INPHRASE | HTML_TOPHRASE},
90 	{"var",		HTML_INPHRASE | HTML_TOPHRASE},
91 	{"br",		HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
92 	{"mark",	HTML_INPHRASE },
93 	{"math",	HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
94 	{"mrow",	0},
95 	{"mi",		0},
96 	{"mn",		0},
97 	{"mo",		0},
98 	{"msup",	0},
99 	{"msub",	0},
100 	{"msubsup",	0},
101 	{"mfrac",	0},
102 	{"msqrt",	0},
103 	{"mfenced",	0},
104 	{"mtable",	0},
105 	{"mtr",		0},
106 	{"mtd",		0},
107 	{"munderover",	0},
108 	{"munder",	0},
109 	{"mover",	0},
110 };
111 
112 /* Avoid duplicate HTML id= attributes. */
113 
114 struct	id_entry {
115 	int	 ord;	/* Ordinal number of the latest occurrence. */
116 	char	 id[];	/* The id= attribute without any ordinal suffix. */
117 };
118 static	struct ohash	 id_unique;
119 
120 static	void	 html_reset_internal(struct html *);
121 static	void	 print_byte(struct html *, char);
122 static	void	 print_endword(struct html *);
123 static	void	 print_indent(struct html *);
124 static	void	 print_word(struct html *, const char *);
125 
126 static	void	 print_ctag(struct html *, struct tag *);
127 static	int	 print_escape(struct html *, char);
128 static	int	 print_encode(struct html *, const char *, const char *, int);
129 static	void	 print_href(struct html *, const char *, const char *, int);
130 static	void	 print_metaf(struct html *);
131 
132 
133 void *
134 html_alloc(const struct manoutput *outopts)
135 {
136 	struct html	*h;
137 
138 	h = mandoc_calloc(1, sizeof(struct html));
139 
140 	h->tag = NULL;
141 	h->style = outopts->style;
142 	if ((h->base_man1 = outopts->man) == NULL)
143 		h->base_man2 = NULL;
144 	else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
145 		*h->base_man2++ = '\0';
146 	h->base_includes = outopts->includes;
147 	if (outopts->fragment)
148 		h->oflags |= HTML_FRAGMENT;
149 	if (outopts->toc)
150 		h->oflags |= HTML_TOC;
151 
152 	mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
153 
154 	return h;
155 }
156 
157 static void
158 html_reset_internal(struct html *h)
159 {
160 	struct tag	*tag;
161 	struct id_entry	*entry;
162 	unsigned int	 slot;
163 
164 	while ((tag = h->tag) != NULL) {
165 		h->tag = tag->next;
166 		free(tag);
167 	}
168 	entry = ohash_first(&id_unique, &slot);
169 	while (entry != NULL) {
170 		free(entry);
171 		entry = ohash_next(&id_unique, &slot);
172 	}
173 	ohash_delete(&id_unique);
174 }
175 
176 void
177 html_reset(void *p)
178 {
179 	html_reset_internal(p);
180 	mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
181 }
182 
183 void
184 html_free(void *p)
185 {
186 	html_reset_internal(p);
187 	free(p);
188 }
189 
190 void
191 print_gen_head(struct html *h)
192 {
193 	struct tag	*t;
194 
195 	print_otag(h, TAG_META, "?", "charset", "utf-8");
196 	if (h->style != NULL) {
197 		print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
198 		    h->style, "type", "text/css", "media", "all");
199 		return;
200 	}
201 
202 	/*
203 	 * Print a minimal embedded style sheet.
204 	 */
205 
206 	t = print_otag(h, TAG_STYLE, "");
207 	print_text(h, "table.head, table.foot { width: 100%; }");
208 	print_endline(h);
209 	print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
210 	print_endline(h);
211 	print_text(h, "td.head-vol { text-align: center; }");
212 	print_endline(h);
213 	print_text(h, ".Nd, .Bf, .Op { display: inline; }");
214 	print_endline(h);
215 	print_text(h, ".Pa, .Ad { font-style: italic; }");
216 	print_endline(h);
217 	print_text(h, ".Ms { font-weight: bold; }");
218 	print_endline(h);
219 	print_text(h, ".Bl-diag ");
220 	print_byte(h, '>');
221 	print_text(h, " dt { font-weight: bold; }");
222 	print_endline(h);
223 	print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
224 	    "{ font-weight: bold; font-family: inherit; }");
225 	print_tagq(h, t);
226 }
227 
228 int
229 html_setfont(struct html *h, enum mandoc_esc font)
230 {
231 	switch (font) {
232 	case ESCAPE_FONTPREV:
233 		font = h->metal;
234 		break;
235 	case ESCAPE_FONTITALIC:
236 	case ESCAPE_FONTBOLD:
237 	case ESCAPE_FONTBI:
238 	case ESCAPE_FONTCW:
239 	case ESCAPE_FONTROMAN:
240 		break;
241 	case ESCAPE_FONT:
242 		font = ESCAPE_FONTROMAN;
243 		break;
244 	default:
245 		return 0;
246 	}
247 	h->metal = h->metac;
248 	h->metac = font;
249 	return 1;
250 }
251 
252 static void
253 print_metaf(struct html *h)
254 {
255 	if (h->metaf) {
256 		print_tagq(h, h->metaf);
257 		h->metaf = NULL;
258 	}
259 	switch (h->metac) {
260 	case ESCAPE_FONTITALIC:
261 		h->metaf = print_otag(h, TAG_I, "");
262 		break;
263 	case ESCAPE_FONTBOLD:
264 		h->metaf = print_otag(h, TAG_B, "");
265 		break;
266 	case ESCAPE_FONTBI:
267 		h->metaf = print_otag(h, TAG_B, "");
268 		print_otag(h, TAG_I, "");
269 		break;
270 	case ESCAPE_FONTCW:
271 		h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
272 		break;
273 	default:
274 		break;
275 	}
276 }
277 
278 void
279 html_close_paragraph(struct html *h)
280 {
281 	struct tag	*this, *next;
282 	int		 flags;
283 
284 	this = h->tag;
285 	for (;;) {
286 		next = this->next;
287 		flags = htmltags[this->tag].flags;
288 		if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
289 			print_ctag(h, this);
290 		if ((flags & HTML_INPHRASE) == 0)
291 			break;
292 		this = next;
293 	}
294 }
295 
296 /*
297  * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
298  * TOKEN_NONE does not switch.  The old mode is returned.
299  */
300 enum roff_tok
301 html_fillmode(struct html *h, enum roff_tok want)
302 {
303 	struct tag	*t;
304 	enum roff_tok	 had;
305 
306 	for (t = h->tag; t != NULL; t = t->next)
307 		if (t->tag == TAG_PRE)
308 			break;
309 
310 	had = t == NULL ? ROFF_fi : ROFF_nf;
311 
312 	if (want != had) {
313 		switch (want) {
314 		case ROFF_fi:
315 			print_tagq(h, t);
316 			break;
317 		case ROFF_nf:
318 			html_close_paragraph(h);
319 			print_otag(h, TAG_PRE, "");
320 			break;
321 		case TOKEN_NONE:
322 			break;
323 		default:
324 			abort();
325 		}
326 	}
327 	return had;
328 }
329 
330 /*
331  * Allocate a string to be used for the "id=" attribute of an HTML
332  * element and/or as a segment identifier for a URI in an <a> element.
333  * The function may fail and return NULL if the node lacks text data
334  * to create the attribute from.
335  * The caller is responsible for free(3)ing the returned string.
336  *
337  * If the "unique" argument is non-zero, the "id_unique" ohash table
338  * is used for de-duplication.  If the "unique" argument is 1,
339  * it is the first time the function is called for this tag and
340  * location, so if an ordinal suffix is needed, it is incremented.
341  * If the "unique" argument is 2, it is the second time the function
342  * is called for this tag and location, so the ordinal suffix
343  * remains unchanged.
344  */
345 char *
346 html_make_id(const struct roff_node *n, int unique)
347 {
348 	const struct roff_node	*nch;
349 	struct id_entry		*entry;
350 	char			*buf, *cp;
351 	size_t			 len;
352 	unsigned int		 slot;
353 
354 	if (n->tag != NULL)
355 		buf = mandoc_strdup(n->tag);
356 	else {
357 		switch (n->tok) {
358 		case MDOC_Sh:
359 		case MDOC_Ss:
360 		case MDOC_Sx:
361 		case MAN_SH:
362 		case MAN_SS:
363 			for (nch = n->child; nch != NULL; nch = nch->next)
364 				if (nch->type != ROFFT_TEXT)
365 					return NULL;
366 			buf = NULL;
367 			deroff(&buf, n);
368 			if (buf == NULL)
369 				return NULL;
370 			break;
371 		default:
372 			if (n->child == NULL || n->child->type != ROFFT_TEXT)
373 				return NULL;
374 			buf = mandoc_strdup(n->child->string);
375 			break;
376 		}
377 	}
378 
379 	/*
380 	 * In ID attributes, only use ASCII characters that are
381 	 * permitted in URL-fragment strings according to the
382 	 * explicit list at:
383 	 * https://url.spec.whatwg.org/#url-fragment-string
384 	 * In addition, reserve '~' for ordinal suffixes.
385 	 */
386 
387 	for (cp = buf; *cp != '\0'; cp++)
388 		if (isalnum((unsigned char)*cp) == 0 &&
389 		    strchr("!$&'()*+,-./:;=?@_", *cp) == NULL)
390 			*cp = '_';
391 
392 	if (unique == 0)
393 		return buf;
394 
395 	/* Avoid duplicate HTML id= attributes. */
396 
397 	slot = ohash_qlookup(&id_unique, buf);
398 	if ((entry = ohash_find(&id_unique, slot)) == NULL) {
399 		len = strlen(buf) + 1;
400 		entry = mandoc_malloc(sizeof(*entry) + len);
401 		entry->ord = 1;
402 		memcpy(entry->id, buf, len);
403 		ohash_insert(&id_unique, slot, entry);
404 	} else if (unique == 1)
405 		entry->ord++;
406 
407 	if (entry->ord > 1) {
408 		cp = buf;
409 		mandoc_asprintf(&buf, "%s~%d", cp, entry->ord);
410 		free(cp);
411 	}
412 	return buf;
413 }
414 
415 static int
416 print_escape(struct html *h, char c)
417 {
418 
419 	switch (c) {
420 	case '<':
421 		print_word(h, "&lt;");
422 		break;
423 	case '>':
424 		print_word(h, "&gt;");
425 		break;
426 	case '&':
427 		print_word(h, "&amp;");
428 		break;
429 	case '"':
430 		print_word(h, "&quot;");
431 		break;
432 	case ASCII_NBRSP:
433 		print_word(h, "&nbsp;");
434 		break;
435 	case ASCII_HYPH:
436 		print_byte(h, '-');
437 		break;
438 	case ASCII_BREAK:
439 		break;
440 	default:
441 		return 0;
442 	}
443 	return 1;
444 }
445 
446 static int
447 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
448 {
449 	char		 numbuf[16];
450 	const char	*seq;
451 	size_t		 sz;
452 	int		 c, len, breakline, nospace;
453 	enum mandoc_esc	 esc;
454 	static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
455 		ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
456 
457 	if (pend == NULL)
458 		pend = strchr(p, '\0');
459 
460 	breakline = 0;
461 	nospace = 0;
462 
463 	while (p < pend) {
464 		if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
465 			h->flags &= ~HTML_SKIPCHAR;
466 			p++;
467 			continue;
468 		}
469 
470 		for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
471 			print_byte(h, *p);
472 
473 		if (breakline &&
474 		    (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
475 			print_otag(h, TAG_BR, "");
476 			breakline = 0;
477 			while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
478 				p++;
479 			continue;
480 		}
481 
482 		if (p >= pend)
483 			break;
484 
485 		if (*p == ' ') {
486 			print_endword(h);
487 			p++;
488 			continue;
489 		}
490 
491 		if (print_escape(h, *p++))
492 			continue;
493 
494 		esc = mandoc_escape(&p, &seq, &len);
495 		switch (esc) {
496 		case ESCAPE_FONT:
497 		case ESCAPE_FONTPREV:
498 		case ESCAPE_FONTBOLD:
499 		case ESCAPE_FONTITALIC:
500 		case ESCAPE_FONTBI:
501 		case ESCAPE_FONTCW:
502 		case ESCAPE_FONTROMAN:
503 			if (0 == norecurse) {
504 				h->flags |= HTML_NOSPACE;
505 				if (html_setfont(h, esc))
506 					print_metaf(h);
507 				h->flags &= ~HTML_NOSPACE;
508 			}
509 			continue;
510 		case ESCAPE_SKIPCHAR:
511 			h->flags |= HTML_SKIPCHAR;
512 			continue;
513 		case ESCAPE_ERROR:
514 			continue;
515 		default:
516 			break;
517 		}
518 
519 		if (h->flags & HTML_SKIPCHAR) {
520 			h->flags &= ~HTML_SKIPCHAR;
521 			continue;
522 		}
523 
524 		switch (esc) {
525 		case ESCAPE_UNICODE:
526 			/* Skip past "u" header. */
527 			c = mchars_num2uc(seq + 1, len - 1);
528 			break;
529 		case ESCAPE_NUMBERED:
530 			c = mchars_num2char(seq, len);
531 			if (c < 0)
532 				continue;
533 			break;
534 		case ESCAPE_SPECIAL:
535 			c = mchars_spec2cp(seq, len);
536 			if (c <= 0)
537 				continue;
538 			break;
539 		case ESCAPE_UNDEF:
540 			c = *seq;
541 			break;
542 		case ESCAPE_DEVICE:
543 			print_word(h, "html");
544 			continue;
545 		case ESCAPE_BREAK:
546 			breakline = 1;
547 			continue;
548 		case ESCAPE_NOSPACE:
549 			if ('\0' == *p)
550 				nospace = 1;
551 			continue;
552 		case ESCAPE_OVERSTRIKE:
553 			if (len == 0)
554 				continue;
555 			c = seq[len - 1];
556 			break;
557 		default:
558 			continue;
559 		}
560 		if ((c < 0x20 && c != 0x09) ||
561 		    (c > 0x7E && c < 0xA0))
562 			c = 0xFFFD;
563 		if (c > 0x7E) {
564 			(void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
565 			print_word(h, numbuf);
566 		} else if (print_escape(h, c) == 0)
567 			print_byte(h, c);
568 	}
569 
570 	return nospace;
571 }
572 
573 static void
574 print_href(struct html *h, const char *name, const char *sec, int man)
575 {
576 	struct stat	 sb;
577 	const char	*p, *pp;
578 	char		*filename;
579 
580 	if (man) {
581 		pp = h->base_man1;
582 		if (h->base_man2 != NULL) {
583 			mandoc_asprintf(&filename, "%s.%s", name, sec);
584 			if (stat(filename, &sb) == -1)
585 				pp = h->base_man2;
586 			free(filename);
587 		}
588 	} else
589 		pp = h->base_includes;
590 
591 	while ((p = strchr(pp, '%')) != NULL) {
592 		print_encode(h, pp, p, 1);
593 		if (man && p[1] == 'S') {
594 			if (sec == NULL)
595 				print_byte(h, '1');
596 			else
597 				print_encode(h, sec, NULL, 1);
598 		} else if ((man && p[1] == 'N') ||
599 		    (man == 0 && p[1] == 'I'))
600 			print_encode(h, name, NULL, 1);
601 		else
602 			print_encode(h, p, p + 2, 1);
603 		pp = p + 2;
604 	}
605 	if (*pp != '\0')
606 		print_encode(h, pp, NULL, 1);
607 }
608 
609 struct tag *
610 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
611 {
612 	va_list		 ap;
613 	struct tag	*t;
614 	const char	*attr;
615 	char		*arg1, *arg2;
616 	int		 style_written, tflags;
617 
618 	tflags = htmltags[tag].flags;
619 
620 	/* Flow content is not allowed in phrasing context. */
621 
622 	if ((tflags & HTML_INPHRASE) == 0) {
623 		for (t = h->tag; t != NULL; t = t->next) {
624 			if (t->closed)
625 				continue;
626 			assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
627 			break;
628 		}
629 
630 	/*
631 	 * Always wrap phrasing elements in a paragraph
632 	 * unless already contained in some flow container;
633 	 * never put them directly into a section.
634 	 */
635 
636 	} else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
637 		print_otag(h, TAG_P, "c", "Pp");
638 
639 	/* Push this tag onto the stack of open scopes. */
640 
641 	if ((tflags & HTML_NOSTACK) == 0) {
642 		t = mandoc_malloc(sizeof(struct tag));
643 		t->tag = tag;
644 		t->next = h->tag;
645 		t->refcnt = 0;
646 		t->closed = 0;
647 		h->tag = t;
648 	} else
649 		t = NULL;
650 
651 	if (tflags & HTML_NLBEFORE)
652 		print_endline(h);
653 	if (h->col == 0)
654 		print_indent(h);
655 	else if ((h->flags & HTML_NOSPACE) == 0) {
656 		if (h->flags & HTML_KEEP)
657 			print_word(h, "&#x00A0;");
658 		else {
659 			if (h->flags & HTML_PREKEEP)
660 				h->flags |= HTML_KEEP;
661 			print_endword(h);
662 		}
663 	}
664 
665 	if ( ! (h->flags & HTML_NONOSPACE))
666 		h->flags &= ~HTML_NOSPACE;
667 	else
668 		h->flags |= HTML_NOSPACE;
669 
670 	/* Print out the tag name and attributes. */
671 
672 	print_byte(h, '<');
673 	print_word(h, htmltags[tag].name);
674 
675 	va_start(ap, fmt);
676 
677 	while (*fmt != '\0' && *fmt != 's') {
678 
679 		/* Parse attributes and arguments. */
680 
681 		arg1 = va_arg(ap, char *);
682 		arg2 = NULL;
683 		switch (*fmt++) {
684 		case 'c':
685 			attr = "class";
686 			break;
687 		case 'h':
688 			attr = "href";
689 			break;
690 		case 'i':
691 			attr = "id";
692 			break;
693 		case '?':
694 			attr = arg1;
695 			arg1 = va_arg(ap, char *);
696 			break;
697 		default:
698 			abort();
699 		}
700 		if (*fmt == 'M')
701 			arg2 = va_arg(ap, char *);
702 		if (arg1 == NULL)
703 			continue;
704 
705 		/* Print the attributes. */
706 
707 		print_byte(h, ' ');
708 		print_word(h, attr);
709 		print_byte(h, '=');
710 		print_byte(h, '"');
711 		switch (*fmt) {
712 		case 'I':
713 			print_href(h, arg1, NULL, 0);
714 			fmt++;
715 			break;
716 		case 'M':
717 			print_href(h, arg1, arg2, 1);
718 			fmt++;
719 			break;
720 		case 'R':
721 			print_byte(h, '#');
722 			print_encode(h, arg1, NULL, 1);
723 			fmt++;
724 			break;
725 		default:
726 			print_encode(h, arg1, NULL, 1);
727 			break;
728 		}
729 		print_byte(h, '"');
730 	}
731 
732 	style_written = 0;
733 	while (*fmt++ == 's') {
734 		arg1 = va_arg(ap, char *);
735 		arg2 = va_arg(ap, char *);
736 		if (arg2 == NULL)
737 			continue;
738 		print_byte(h, ' ');
739 		if (style_written == 0) {
740 			print_word(h, "style=\"");
741 			style_written = 1;
742 		}
743 		print_word(h, arg1);
744 		print_byte(h, ':');
745 		print_byte(h, ' ');
746 		print_word(h, arg2);
747 		print_byte(h, ';');
748 	}
749 	if (style_written)
750 		print_byte(h, '"');
751 
752 	va_end(ap);
753 
754 	/* Accommodate for "well-formed" singleton escaping. */
755 
756 	if (htmltags[tag].flags & HTML_NOSTACK)
757 		print_byte(h, '/');
758 
759 	print_byte(h, '>');
760 
761 	if (tflags & HTML_NLBEGIN)
762 		print_endline(h);
763 	else
764 		h->flags |= HTML_NOSPACE;
765 
766 	if (tflags & HTML_INDENT)
767 		h->indent++;
768 	if (tflags & HTML_NOINDENT)
769 		h->noindent++;
770 
771 	return t;
772 }
773 
774 /*
775  * Print an element with an optional "id=" attribute.
776  * If the element has phrasing content and an "id=" attribute,
777  * also add a permalink: outside if it can be in phrasing context,
778  * inside otherwise.
779  */
780 struct tag *
781 print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
782     struct roff_node *n)
783 {
784 	struct roff_node *nch;
785 	struct tag	*ret, *t;
786 	char		*id, *href;
787 
788 	ret = NULL;
789 	id = href = NULL;
790 	if (n->flags & NODE_ID)
791 		id = html_make_id(n, 1);
792 	if (n->flags & NODE_HREF)
793 		href = id == NULL ? html_make_id(n, 2) : id;
794 	if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE)
795 		ret = print_otag(h, TAG_A, "chR", "permalink", href);
796 	t = print_otag(h, elemtype, "ci", cattr, id);
797 	if (ret == NULL) {
798 		ret = t;
799 		if (href != NULL && (nch = n->child) != NULL) {
800 			/* man(7) is safe, it tags phrasing content only. */
801 			if (n->tok > MDOC_MAX ||
802 			    htmltags[elemtype].flags & HTML_TOPHRASE)
803 				nch = NULL;
804 			else  /* For mdoc(7), beware of nested blocks. */
805 				while (nch != NULL && nch->type == ROFFT_TEXT)
806 					nch = nch->next;
807 			if (nch == NULL)
808 				print_otag(h, TAG_A, "chR", "permalink", href);
809 		}
810 	}
811 	free(id);
812 	if (id == NULL)
813 		free(href);
814 	return ret;
815 }
816 
817 static void
818 print_ctag(struct html *h, struct tag *tag)
819 {
820 	int	 tflags;
821 
822 	if (tag->closed == 0) {
823 		tag->closed = 1;
824 		if (tag == h->metaf)
825 			h->metaf = NULL;
826 		if (tag == h->tblt)
827 			h->tblt = NULL;
828 
829 		tflags = htmltags[tag->tag].flags;
830 		if (tflags & HTML_INDENT)
831 			h->indent--;
832 		if (tflags & HTML_NOINDENT)
833 			h->noindent--;
834 		if (tflags & HTML_NLEND)
835 			print_endline(h);
836 		print_indent(h);
837 		print_byte(h, '<');
838 		print_byte(h, '/');
839 		print_word(h, htmltags[tag->tag].name);
840 		print_byte(h, '>');
841 		if (tflags & HTML_NLAFTER)
842 			print_endline(h);
843 	}
844 	if (tag->refcnt == 0) {
845 		h->tag = tag->next;
846 		free(tag);
847 	}
848 }
849 
850 void
851 print_gen_decls(struct html *h)
852 {
853 	print_word(h, "<!DOCTYPE html>");
854 	print_endline(h);
855 }
856 
857 void
858 print_gen_comment(struct html *h, struct roff_node *n)
859 {
860 	int	 wantblank;
861 
862 	print_word(h, "<!-- This is an automatically generated file."
863 	    "  Do not edit.");
864 	h->indent = 1;
865 	wantblank = 0;
866 	while (n != NULL && n->type == ROFFT_COMMENT) {
867 		if (strstr(n->string, "-->") == NULL &&
868 		    (wantblank || *n->string != '\0')) {
869 			print_endline(h);
870 			print_indent(h);
871 			print_word(h, n->string);
872 			wantblank = *n->string != '\0';
873 		}
874 		n = n->next;
875 	}
876 	if (wantblank)
877 		print_endline(h);
878 	print_word(h, " -->");
879 	print_endline(h);
880 	h->indent = 0;
881 }
882 
883 void
884 print_text(struct html *h, const char *word)
885 {
886 	print_tagged_text(h, word, NULL);
887 }
888 
889 void
890 print_tagged_text(struct html *h, const char *word, struct roff_node *n)
891 {
892 	struct tag	*t;
893 	char		*href;
894 
895 	/*
896 	 * Always wrap text in a paragraph unless already contained in
897 	 * some flow container; never put it directly into a section.
898 	 */
899 
900 	if (h->tag->tag == TAG_SECTION)
901 		print_otag(h, TAG_P, "c", "Pp");
902 
903 	/* Output whitespace before this text? */
904 
905 	if (h->col && (h->flags & HTML_NOSPACE) == 0) {
906 		if ( ! (HTML_KEEP & h->flags)) {
907 			if (HTML_PREKEEP & h->flags)
908 				h->flags |= HTML_KEEP;
909 			print_endword(h);
910 		} else
911 			print_word(h, "&#x00A0;");
912 	}
913 
914 	/*
915 	 * Optionally switch fonts, optionally write a permalink, then
916 	 * print the text, optionally surrounded by HTML whitespace.
917 	 */
918 
919 	assert(h->metaf == NULL);
920 	print_metaf(h);
921 	print_indent(h);
922 
923 	if (n != NULL && (href = html_make_id(n, 2)) != NULL) {
924 		t = print_otag(h, TAG_A, "chR", "permalink", href);
925 		free(href);
926 	} else
927 		t = NULL;
928 
929 	if ( ! print_encode(h, word, NULL, 0)) {
930 		if ( ! (h->flags & HTML_NONOSPACE))
931 			h->flags &= ~HTML_NOSPACE;
932 		h->flags &= ~HTML_NONEWLINE;
933 	} else
934 		h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
935 
936 	if (h->metaf != NULL) {
937 		print_tagq(h, h->metaf);
938 		h->metaf = NULL;
939 	} else if (t != NULL)
940 		print_tagq(h, t);
941 
942 	h->flags &= ~HTML_IGNDELIM;
943 }
944 
945 void
946 print_tagq(struct html *h, const struct tag *until)
947 {
948 	struct tag	*this, *next;
949 
950 	for (this = h->tag; this != NULL; this = next) {
951 		next = this == until ? NULL : this->next;
952 		print_ctag(h, this);
953 	}
954 }
955 
956 /*
957  * Close out all open elements up to but excluding suntil.
958  * Note that a paragraph just inside stays open together with it
959  * because paragraphs include subsequent phrasing content.
960  */
961 void
962 print_stagq(struct html *h, const struct tag *suntil)
963 {
964 	struct tag	*this, *next;
965 
966 	for (this = h->tag; this != NULL; this = next) {
967 		next = this->next;
968 		if (this == suntil || (next == suntil &&
969 		    (this->tag == TAG_P || this->tag == TAG_PRE)))
970 			break;
971 		print_ctag(h, this);
972 	}
973 }
974 
975 
976 /***********************************************************************
977  * Low level output functions.
978  * They implement line breaking using a short static buffer.
979  ***********************************************************************/
980 
981 /*
982  * Buffer one HTML output byte.
983  * If the buffer is full, flush and deactivate it and start a new line.
984  * If the buffer is inactive, print directly.
985  */
986 static void
987 print_byte(struct html *h, char c)
988 {
989 	if ((h->flags & HTML_BUFFER) == 0) {
990 		putchar(c);
991 		h->col++;
992 		return;
993 	}
994 
995 	if (h->col + h->bufcol < sizeof(h->buf)) {
996 		h->buf[h->bufcol++] = c;
997 		return;
998 	}
999 
1000 	putchar('\n');
1001 	h->col = 0;
1002 	print_indent(h);
1003 	putchar(' ');
1004 	putchar(' ');
1005 	fwrite(h->buf, h->bufcol, 1, stdout);
1006 	putchar(c);
1007 	h->col = (h->indent + 1) * 2 + h->bufcol + 1;
1008 	h->bufcol = 0;
1009 	h->flags &= ~HTML_BUFFER;
1010 }
1011 
1012 /*
1013  * If something was printed on the current output line, end it.
1014  * Not to be called right after print_indent().
1015  */
1016 void
1017 print_endline(struct html *h)
1018 {
1019 	if (h->col == 0)
1020 		return;
1021 
1022 	if (h->bufcol) {
1023 		putchar(' ');
1024 		fwrite(h->buf, h->bufcol, 1, stdout);
1025 		h->bufcol = 0;
1026 	}
1027 	putchar('\n');
1028 	h->col = 0;
1029 	h->flags |= HTML_NOSPACE;
1030 	h->flags &= ~HTML_BUFFER;
1031 }
1032 
1033 /*
1034  * Flush the HTML output buffer.
1035  * If it is inactive, activate it.
1036  */
1037 static void
1038 print_endword(struct html *h)
1039 {
1040 	if (h->noindent) {
1041 		print_byte(h, ' ');
1042 		return;
1043 	}
1044 
1045 	if ((h->flags & HTML_BUFFER) == 0) {
1046 		h->col++;
1047 		h->flags |= HTML_BUFFER;
1048 	} else if (h->bufcol) {
1049 		putchar(' ');
1050 		fwrite(h->buf, h->bufcol, 1, stdout);
1051 		h->col += h->bufcol + 1;
1052 	}
1053 	h->bufcol = 0;
1054 }
1055 
1056 /*
1057  * If at the beginning of a new output line,
1058  * perform indentation and mark the line as containing output.
1059  * Make sure to really produce some output right afterwards,
1060  * but do not use print_otag() for producing it.
1061  */
1062 static void
1063 print_indent(struct html *h)
1064 {
1065 	size_t	 i;
1066 
1067 	if (h->col || h->noindent)
1068 		return;
1069 
1070 	h->col = h->indent * 2;
1071 	for (i = 0; i < h->col; i++)
1072 		putchar(' ');
1073 }
1074 
1075 /*
1076  * Print or buffer some characters
1077  * depending on the current HTML output buffer state.
1078  */
1079 static void
1080 print_word(struct html *h, const char *cp)
1081 {
1082 	while (*cp != '\0')
1083 		print_byte(h, *cp++);
1084 }
1085