xref: /openbsd-src/usr.bin/mandoc/html.c (revision f1dd7b858388b4a23f4f67a4957ec5ff656ebbe8)
1 /* $OpenBSD: html.c,v 1.143 2021/05/16 18:08:37 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Common functions for mandoc(1) HTML formatters.
19  * For use by individual formatters and by the main program.
20  */
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <stdarg.h>
27 #include <stddef.h>
28 #include <stdio.h>
29 #include <stdint.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33 
34 #include "mandoc_aux.h"
35 #include "mandoc_ohash.h"
36 #include "mandoc.h"
37 #include "roff.h"
38 #include "out.h"
39 #include "html.h"
40 #include "manconf.h"
41 #include "main.h"
42 
43 struct	htmldata {
44 	const char	 *name;
45 	int		  flags;
46 #define	HTML_INPHRASE	 (1 << 0)  /* Can appear in phrasing context. */
47 #define	HTML_TOPHRASE	 (1 << 1)  /* Establishes phrasing context. */
48 #define	HTML_NOSTACK	 (1 << 2)  /* Does not have an end tag. */
49 #define	HTML_NLBEFORE	 (1 << 3)  /* Output line break before opening. */
50 #define	HTML_NLBEGIN	 (1 << 4)  /* Output line break after opening. */
51 #define	HTML_NLEND	 (1 << 5)  /* Output line break before closing. */
52 #define	HTML_NLAFTER	 (1 << 6)  /* Output line break after closing. */
53 #define	HTML_NLAROUND	 (HTML_NLBEFORE | HTML_NLAFTER)
54 #define	HTML_NLINSIDE	 (HTML_NLBEGIN | HTML_NLEND)
55 #define	HTML_NLALL	 (HTML_NLAROUND | HTML_NLINSIDE)
56 #define	HTML_INDENT	 (1 << 7)  /* Indent content by two spaces. */
57 #define	HTML_NOINDENT	 (1 << 8)  /* Exception: never indent content. */
58 };
59 
60 static	const struct htmldata htmltags[TAG_MAX] = {
61 	{"html",	HTML_NLALL},
62 	{"head",	HTML_NLALL | HTML_INDENT},
63 	{"meta",	HTML_NOSTACK | HTML_NLALL},
64 	{"link",	HTML_NOSTACK | HTML_NLALL},
65 	{"style",	HTML_NLALL | HTML_INDENT},
66 	{"title",	HTML_NLAROUND},
67 	{"body",	HTML_NLALL},
68 	{"div",		HTML_NLAROUND},
69 	{"section",	HTML_NLALL},
70 	{"table",	HTML_NLALL | HTML_INDENT},
71 	{"tr",		HTML_NLALL | HTML_INDENT},
72 	{"td",		HTML_NLAROUND},
73 	{"li",		HTML_NLAROUND | HTML_INDENT},
74 	{"ul",		HTML_NLALL | HTML_INDENT},
75 	{"ol",		HTML_NLALL | HTML_INDENT},
76 	{"dl",		HTML_NLALL | HTML_INDENT},
77 	{"dt",		HTML_NLAROUND},
78 	{"dd",		HTML_NLAROUND | HTML_INDENT},
79 	{"h1",		HTML_TOPHRASE | HTML_NLAROUND},
80 	{"h2",		HTML_TOPHRASE | HTML_NLAROUND},
81 	{"p",		HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
82 	{"pre",		HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT},
83 	{"a",		HTML_INPHRASE | HTML_TOPHRASE},
84 	{"b",		HTML_INPHRASE | HTML_TOPHRASE},
85 	{"cite",	HTML_INPHRASE | HTML_TOPHRASE},
86 	{"code",	HTML_INPHRASE | HTML_TOPHRASE},
87 	{"i",		HTML_INPHRASE | HTML_TOPHRASE},
88 	{"small",	HTML_INPHRASE | HTML_TOPHRASE},
89 	{"span",	HTML_INPHRASE | HTML_TOPHRASE},
90 	{"var",		HTML_INPHRASE | HTML_TOPHRASE},
91 	{"br",		HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
92 	{"mark",	HTML_INPHRASE },
93 	{"math",	HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
94 	{"mrow",	0},
95 	{"mi",		0},
96 	{"mn",		0},
97 	{"mo",		0},
98 	{"msup",	0},
99 	{"msub",	0},
100 	{"msubsup",	0},
101 	{"mfrac",	0},
102 	{"msqrt",	0},
103 	{"mfenced",	0},
104 	{"mtable",	0},
105 	{"mtr",		0},
106 	{"mtd",		0},
107 	{"munderover",	0},
108 	{"munder",	0},
109 	{"mover",	0},
110 };
111 
112 /* Avoid duplicate HTML id= attributes. */
113 
114 struct	id_entry {
115 	int	 ord;	/* Ordinal number of the latest occurrence. */
116 	char	 id[];	/* The id= attribute without any ordinal suffix. */
117 };
118 static	struct ohash	 id_unique;
119 
120 static	void	 html_reset_internal(struct html *);
121 static	void	 print_byte(struct html *, char);
122 static	void	 print_endword(struct html *);
123 static	void	 print_indent(struct html *);
124 static	void	 print_word(struct html *, const char *);
125 
126 static	void	 print_ctag(struct html *, struct tag *);
127 static	int	 print_escape(struct html *, char);
128 static	int	 print_encode(struct html *, const char *, const char *, int);
129 static	void	 print_href(struct html *, const char *, const char *, int);
130 static	void	 print_metaf(struct html *);
131 
132 
133 void *
134 html_alloc(const struct manoutput *outopts)
135 {
136 	struct html	*h;
137 
138 	h = mandoc_calloc(1, sizeof(struct html));
139 
140 	h->tag = NULL;
141 	h->metac = h->metal = ESCAPE_FONTROMAN;
142 	h->style = outopts->style;
143 	if ((h->base_man1 = outopts->man) == NULL)
144 		h->base_man2 = NULL;
145 	else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
146 		*h->base_man2++ = '\0';
147 	h->base_includes = outopts->includes;
148 	if (outopts->fragment)
149 		h->oflags |= HTML_FRAGMENT;
150 	if (outopts->toc)
151 		h->oflags |= HTML_TOC;
152 
153 	mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
154 
155 	return h;
156 }
157 
158 static void
159 html_reset_internal(struct html *h)
160 {
161 	struct tag	*tag;
162 	struct id_entry	*entry;
163 	unsigned int	 slot;
164 
165 	while ((tag = h->tag) != NULL) {
166 		h->tag = tag->next;
167 		free(tag);
168 	}
169 	entry = ohash_first(&id_unique, &slot);
170 	while (entry != NULL) {
171 		free(entry);
172 		entry = ohash_next(&id_unique, &slot);
173 	}
174 	ohash_delete(&id_unique);
175 }
176 
177 void
178 html_reset(void *p)
179 {
180 	html_reset_internal(p);
181 	mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
182 }
183 
184 void
185 html_free(void *p)
186 {
187 	html_reset_internal(p);
188 	free(p);
189 }
190 
191 void
192 print_gen_head(struct html *h)
193 {
194 	struct tag	*t;
195 
196 	print_otag(h, TAG_META, "?", "charset", "utf-8");
197 	if (h->style != NULL) {
198 		print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
199 		    h->style, "type", "text/css", "media", "all");
200 		return;
201 	}
202 
203 	/*
204 	 * Print a minimal embedded style sheet.
205 	 */
206 
207 	t = print_otag(h, TAG_STYLE, "");
208 	print_text(h, "table.head, table.foot { width: 100%; }");
209 	print_endline(h);
210 	print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
211 	print_endline(h);
212 	print_text(h, "td.head-vol { text-align: center; }");
213 	print_endline(h);
214 	print_text(h, ".Nd, .Bf, .Op { display: inline; }");
215 	print_endline(h);
216 	print_text(h, ".Pa, .Ad { font-style: italic; }");
217 	print_endline(h);
218 	print_text(h, ".Ms { font-weight: bold; }");
219 	print_endline(h);
220 	print_text(h, ".Bl-diag ");
221 	print_byte(h, '>');
222 	print_text(h, " dt { font-weight: bold; }");
223 	print_endline(h);
224 	print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
225 	    "{ font-weight: bold; font-family: inherit; }");
226 	print_tagq(h, t);
227 }
228 
229 int
230 html_setfont(struct html *h, enum mandoc_esc font)
231 {
232 	switch (font) {
233 	case ESCAPE_FONTPREV:
234 		font = h->metal;
235 		break;
236 	case ESCAPE_FONTITALIC:
237 	case ESCAPE_FONTBOLD:
238 	case ESCAPE_FONTBI:
239 	case ESCAPE_FONTCW:
240 	case ESCAPE_FONTROMAN:
241 		break;
242 	case ESCAPE_FONT:
243 		font = ESCAPE_FONTROMAN;
244 		break;
245 	default:
246 		return 0;
247 	}
248 	h->metal = h->metac;
249 	h->metac = font;
250 	return 1;
251 }
252 
253 static void
254 print_metaf(struct html *h)
255 {
256 	if (h->metaf) {
257 		print_tagq(h, h->metaf);
258 		h->metaf = NULL;
259 	}
260 	switch (h->metac) {
261 	case ESCAPE_FONTITALIC:
262 		h->metaf = print_otag(h, TAG_I, "");
263 		break;
264 	case ESCAPE_FONTBOLD:
265 		h->metaf = print_otag(h, TAG_B, "");
266 		break;
267 	case ESCAPE_FONTBI:
268 		h->metaf = print_otag(h, TAG_B, "");
269 		print_otag(h, TAG_I, "");
270 		break;
271 	case ESCAPE_FONTCW:
272 		h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
273 		break;
274 	default:
275 		break;
276 	}
277 }
278 
279 void
280 html_close_paragraph(struct html *h)
281 {
282 	struct tag	*this, *next;
283 	int		 flags;
284 
285 	this = h->tag;
286 	for (;;) {
287 		next = this->next;
288 		flags = htmltags[this->tag].flags;
289 		if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
290 			print_ctag(h, this);
291 		if ((flags & HTML_INPHRASE) == 0)
292 			break;
293 		this = next;
294 	}
295 }
296 
297 /*
298  * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
299  * TOKEN_NONE does not switch.  The old mode is returned.
300  */
301 enum roff_tok
302 html_fillmode(struct html *h, enum roff_tok want)
303 {
304 	struct tag	*t;
305 	enum roff_tok	 had;
306 
307 	for (t = h->tag; t != NULL; t = t->next)
308 		if (t->tag == TAG_PRE)
309 			break;
310 
311 	had = t == NULL ? ROFF_fi : ROFF_nf;
312 
313 	if (want != had) {
314 		switch (want) {
315 		case ROFF_fi:
316 			print_tagq(h, t);
317 			break;
318 		case ROFF_nf:
319 			html_close_paragraph(h);
320 			print_otag(h, TAG_PRE, "");
321 			break;
322 		case TOKEN_NONE:
323 			break;
324 		default:
325 			abort();
326 		}
327 	}
328 	return had;
329 }
330 
331 /*
332  * Allocate a string to be used for the "id=" attribute of an HTML
333  * element and/or as a segment identifier for a URI in an <a> element.
334  * The function may fail and return NULL if the node lacks text data
335  * to create the attribute from.
336  * The caller is responsible for free(3)ing the returned string.
337  *
338  * If the "unique" argument is non-zero, the "id_unique" ohash table
339  * is used for de-duplication.  If the "unique" argument is 1,
340  * it is the first time the function is called for this tag and
341  * location, so if an ordinal suffix is needed, it is incremented.
342  * If the "unique" argument is 2, it is the second time the function
343  * is called for this tag and location, so the ordinal suffix
344  * remains unchanged.
345  */
346 char *
347 html_make_id(const struct roff_node *n, int unique)
348 {
349 	const struct roff_node	*nch;
350 	struct id_entry		*entry;
351 	char			*buf, *cp;
352 	size_t			 len;
353 	unsigned int		 slot;
354 
355 	if (n->tag != NULL)
356 		buf = mandoc_strdup(n->tag);
357 	else {
358 		switch (n->tok) {
359 		case MDOC_Sh:
360 		case MDOC_Ss:
361 		case MDOC_Sx:
362 		case MAN_SH:
363 		case MAN_SS:
364 			for (nch = n->child; nch != NULL; nch = nch->next)
365 				if (nch->type != ROFFT_TEXT)
366 					return NULL;
367 			buf = NULL;
368 			deroff(&buf, n);
369 			if (buf == NULL)
370 				return NULL;
371 			break;
372 		default:
373 			if (n->child == NULL || n->child->type != ROFFT_TEXT)
374 				return NULL;
375 			buf = mandoc_strdup(n->child->string);
376 			break;
377 		}
378 	}
379 
380 	/*
381 	 * In ID attributes, only use ASCII characters that are
382 	 * permitted in URL-fragment strings according to the
383 	 * explicit list at:
384 	 * https://url.spec.whatwg.org/#url-fragment-string
385 	 * In addition, reserve '~' for ordinal suffixes.
386 	 */
387 
388 	for (cp = buf; *cp != '\0'; cp++)
389 		if (isalnum((unsigned char)*cp) == 0 &&
390 		    strchr("!$&'()*+,-./:;=?@_", *cp) == NULL)
391 			*cp = '_';
392 
393 	if (unique == 0)
394 		return buf;
395 
396 	/* Avoid duplicate HTML id= attributes. */
397 
398 	slot = ohash_qlookup(&id_unique, buf);
399 	if ((entry = ohash_find(&id_unique, slot)) == NULL) {
400 		len = strlen(buf) + 1;
401 		entry = mandoc_malloc(sizeof(*entry) + len);
402 		entry->ord = 1;
403 		memcpy(entry->id, buf, len);
404 		ohash_insert(&id_unique, slot, entry);
405 	} else if (unique == 1)
406 		entry->ord++;
407 
408 	if (entry->ord > 1) {
409 		cp = buf;
410 		mandoc_asprintf(&buf, "%s~%d", cp, entry->ord);
411 		free(cp);
412 	}
413 	return buf;
414 }
415 
416 static int
417 print_escape(struct html *h, char c)
418 {
419 
420 	switch (c) {
421 	case '<':
422 		print_word(h, "&lt;");
423 		break;
424 	case '>':
425 		print_word(h, "&gt;");
426 		break;
427 	case '&':
428 		print_word(h, "&amp;");
429 		break;
430 	case '"':
431 		print_word(h, "&quot;");
432 		break;
433 	case ASCII_NBRSP:
434 		print_word(h, "&nbsp;");
435 		break;
436 	case ASCII_HYPH:
437 		print_byte(h, '-');
438 		break;
439 	case ASCII_BREAK:
440 		break;
441 	default:
442 		return 0;
443 	}
444 	return 1;
445 }
446 
447 static int
448 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
449 {
450 	char		 numbuf[16];
451 	const char	*seq;
452 	size_t		 sz;
453 	int		 c, len, breakline, nospace;
454 	enum mandoc_esc	 esc;
455 	static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
456 		ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
457 
458 	if (pend == NULL)
459 		pend = strchr(p, '\0');
460 
461 	breakline = 0;
462 	nospace = 0;
463 
464 	while (p < pend) {
465 		if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
466 			h->flags &= ~HTML_SKIPCHAR;
467 			p++;
468 			continue;
469 		}
470 
471 		for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
472 			print_byte(h, *p);
473 
474 		if (breakline &&
475 		    (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
476 			print_otag(h, TAG_BR, "");
477 			breakline = 0;
478 			while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
479 				p++;
480 			continue;
481 		}
482 
483 		if (p >= pend)
484 			break;
485 
486 		if (*p == ' ') {
487 			print_endword(h);
488 			p++;
489 			continue;
490 		}
491 
492 		if (print_escape(h, *p++))
493 			continue;
494 
495 		esc = mandoc_escape(&p, &seq, &len);
496 		switch (esc) {
497 		case ESCAPE_FONT:
498 		case ESCAPE_FONTPREV:
499 		case ESCAPE_FONTBOLD:
500 		case ESCAPE_FONTITALIC:
501 		case ESCAPE_FONTBI:
502 		case ESCAPE_FONTCW:
503 		case ESCAPE_FONTROMAN:
504 			if (0 == norecurse) {
505 				h->flags |= HTML_NOSPACE;
506 				if (html_setfont(h, esc))
507 					print_metaf(h);
508 				h->flags &= ~HTML_NOSPACE;
509 			}
510 			continue;
511 		case ESCAPE_SKIPCHAR:
512 			h->flags |= HTML_SKIPCHAR;
513 			continue;
514 		case ESCAPE_ERROR:
515 			continue;
516 		default:
517 			break;
518 		}
519 
520 		if (h->flags & HTML_SKIPCHAR) {
521 			h->flags &= ~HTML_SKIPCHAR;
522 			continue;
523 		}
524 
525 		switch (esc) {
526 		case ESCAPE_UNICODE:
527 			/* Skip past "u" header. */
528 			c = mchars_num2uc(seq + 1, len - 1);
529 			break;
530 		case ESCAPE_NUMBERED:
531 			c = mchars_num2char(seq, len);
532 			if (c < 0)
533 				continue;
534 			break;
535 		case ESCAPE_SPECIAL:
536 			c = mchars_spec2cp(seq, len);
537 			if (c <= 0)
538 				continue;
539 			break;
540 		case ESCAPE_UNDEF:
541 			c = *seq;
542 			break;
543 		case ESCAPE_DEVICE:
544 			print_word(h, "html");
545 			continue;
546 		case ESCAPE_BREAK:
547 			breakline = 1;
548 			continue;
549 		case ESCAPE_NOSPACE:
550 			if ('\0' == *p)
551 				nospace = 1;
552 			continue;
553 		case ESCAPE_OVERSTRIKE:
554 			if (len == 0)
555 				continue;
556 			c = seq[len - 1];
557 			break;
558 		default:
559 			continue;
560 		}
561 		if ((c < 0x20 && c != 0x09) ||
562 		    (c > 0x7E && c < 0xA0))
563 			c = 0xFFFD;
564 		if (c > 0x7E) {
565 			(void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
566 			print_word(h, numbuf);
567 		} else if (print_escape(h, c) == 0)
568 			print_byte(h, c);
569 	}
570 
571 	return nospace;
572 }
573 
574 static void
575 print_href(struct html *h, const char *name, const char *sec, int man)
576 {
577 	struct stat	 sb;
578 	const char	*p, *pp;
579 	char		*filename;
580 
581 	if (man) {
582 		pp = h->base_man1;
583 		if (h->base_man2 != NULL) {
584 			mandoc_asprintf(&filename, "%s.%s", name, sec);
585 			if (stat(filename, &sb) == -1)
586 				pp = h->base_man2;
587 			free(filename);
588 		}
589 	} else
590 		pp = h->base_includes;
591 
592 	while ((p = strchr(pp, '%')) != NULL) {
593 		print_encode(h, pp, p, 1);
594 		if (man && p[1] == 'S') {
595 			if (sec == NULL)
596 				print_byte(h, '1');
597 			else
598 				print_encode(h, sec, NULL, 1);
599 		} else if ((man && p[1] == 'N') ||
600 		    (man == 0 && p[1] == 'I'))
601 			print_encode(h, name, NULL, 1);
602 		else
603 			print_encode(h, p, p + 2, 1);
604 		pp = p + 2;
605 	}
606 	if (*pp != '\0')
607 		print_encode(h, pp, NULL, 1);
608 }
609 
610 struct tag *
611 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
612 {
613 	va_list		 ap;
614 	struct tag	*t;
615 	const char	*attr;
616 	char		*arg1, *arg2;
617 	int		 style_written, tflags;
618 
619 	tflags = htmltags[tag].flags;
620 
621 	/* Flow content is not allowed in phrasing context. */
622 
623 	if ((tflags & HTML_INPHRASE) == 0) {
624 		for (t = h->tag; t != NULL; t = t->next) {
625 			if (t->closed)
626 				continue;
627 			assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
628 			break;
629 		}
630 
631 	/*
632 	 * Always wrap phrasing elements in a paragraph
633 	 * unless already contained in some flow container;
634 	 * never put them directly into a section.
635 	 */
636 
637 	} else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
638 		print_otag(h, TAG_P, "c", "Pp");
639 
640 	/* Push this tag onto the stack of open scopes. */
641 
642 	if ((tflags & HTML_NOSTACK) == 0) {
643 		t = mandoc_malloc(sizeof(struct tag));
644 		t->tag = tag;
645 		t->next = h->tag;
646 		t->refcnt = 0;
647 		t->closed = 0;
648 		h->tag = t;
649 	} else
650 		t = NULL;
651 
652 	if (tflags & HTML_NLBEFORE)
653 		print_endline(h);
654 	if (h->col == 0)
655 		print_indent(h);
656 	else if ((h->flags & HTML_NOSPACE) == 0) {
657 		if (h->flags & HTML_KEEP)
658 			print_word(h, "&#x00A0;");
659 		else {
660 			if (h->flags & HTML_PREKEEP)
661 				h->flags |= HTML_KEEP;
662 			print_endword(h);
663 		}
664 	}
665 
666 	if ( ! (h->flags & HTML_NONOSPACE))
667 		h->flags &= ~HTML_NOSPACE;
668 	else
669 		h->flags |= HTML_NOSPACE;
670 
671 	/* Print out the tag name and attributes. */
672 
673 	print_byte(h, '<');
674 	print_word(h, htmltags[tag].name);
675 
676 	va_start(ap, fmt);
677 
678 	while (*fmt != '\0' && *fmt != 's') {
679 
680 		/* Parse attributes and arguments. */
681 
682 		arg1 = va_arg(ap, char *);
683 		arg2 = NULL;
684 		switch (*fmt++) {
685 		case 'c':
686 			attr = "class";
687 			break;
688 		case 'h':
689 			attr = "href";
690 			break;
691 		case 'i':
692 			attr = "id";
693 			break;
694 		case '?':
695 			attr = arg1;
696 			arg1 = va_arg(ap, char *);
697 			break;
698 		default:
699 			abort();
700 		}
701 		if (*fmt == 'M')
702 			arg2 = va_arg(ap, char *);
703 		if (arg1 == NULL)
704 			continue;
705 
706 		/* Print the attributes. */
707 
708 		print_byte(h, ' ');
709 		print_word(h, attr);
710 		print_byte(h, '=');
711 		print_byte(h, '"');
712 		switch (*fmt) {
713 		case 'I':
714 			print_href(h, arg1, NULL, 0);
715 			fmt++;
716 			break;
717 		case 'M':
718 			print_href(h, arg1, arg2, 1);
719 			fmt++;
720 			break;
721 		case 'R':
722 			print_byte(h, '#');
723 			print_encode(h, arg1, NULL, 1);
724 			fmt++;
725 			break;
726 		default:
727 			print_encode(h, arg1, NULL, 1);
728 			break;
729 		}
730 		print_byte(h, '"');
731 	}
732 
733 	style_written = 0;
734 	while (*fmt++ == 's') {
735 		arg1 = va_arg(ap, char *);
736 		arg2 = va_arg(ap, char *);
737 		if (arg2 == NULL)
738 			continue;
739 		print_byte(h, ' ');
740 		if (style_written == 0) {
741 			print_word(h, "style=\"");
742 			style_written = 1;
743 		}
744 		print_word(h, arg1);
745 		print_byte(h, ':');
746 		print_byte(h, ' ');
747 		print_word(h, arg2);
748 		print_byte(h, ';');
749 	}
750 	if (style_written)
751 		print_byte(h, '"');
752 
753 	va_end(ap);
754 
755 	/* Accommodate for "well-formed" singleton escaping. */
756 
757 	if (htmltags[tag].flags & HTML_NOSTACK)
758 		print_byte(h, '/');
759 
760 	print_byte(h, '>');
761 
762 	if (tflags & HTML_NLBEGIN)
763 		print_endline(h);
764 	else
765 		h->flags |= HTML_NOSPACE;
766 
767 	if (tflags & HTML_INDENT)
768 		h->indent++;
769 	if (tflags & HTML_NOINDENT)
770 		h->noindent++;
771 
772 	return t;
773 }
774 
775 /*
776  * Print an element with an optional "id=" attribute.
777  * If the element has phrasing content and an "id=" attribute,
778  * also add a permalink: outside if it can be in phrasing context,
779  * inside otherwise.
780  */
781 struct tag *
782 print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
783     struct roff_node *n)
784 {
785 	struct roff_node *nch;
786 	struct tag	*ret, *t;
787 	char		*id, *href;
788 
789 	ret = NULL;
790 	id = href = NULL;
791 	if (n->flags & NODE_ID)
792 		id = html_make_id(n, 1);
793 	if (n->flags & NODE_HREF)
794 		href = id == NULL ? html_make_id(n, 2) : id;
795 	if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE)
796 		ret = print_otag(h, TAG_A, "chR", "permalink", href);
797 	t = print_otag(h, elemtype, "ci", cattr, id);
798 	if (ret == NULL) {
799 		ret = t;
800 		if (href != NULL && (nch = n->child) != NULL) {
801 			/* man(7) is safe, it tags phrasing content only. */
802 			if (n->tok > MDOC_MAX ||
803 			    htmltags[elemtype].flags & HTML_TOPHRASE)
804 				nch = NULL;
805 			else  /* For mdoc(7), beware of nested blocks. */
806 				while (nch != NULL && nch->type == ROFFT_TEXT)
807 					nch = nch->next;
808 			if (nch == NULL)
809 				print_otag(h, TAG_A, "chR", "permalink", href);
810 		}
811 	}
812 	free(id);
813 	if (id == NULL)
814 		free(href);
815 	return ret;
816 }
817 
818 static void
819 print_ctag(struct html *h, struct tag *tag)
820 {
821 	int	 tflags;
822 
823 	if (tag->closed == 0) {
824 		tag->closed = 1;
825 		if (tag == h->metaf)
826 			h->metaf = NULL;
827 		if (tag == h->tblt)
828 			h->tblt = NULL;
829 
830 		tflags = htmltags[tag->tag].flags;
831 		if (tflags & HTML_INDENT)
832 			h->indent--;
833 		if (tflags & HTML_NOINDENT)
834 			h->noindent--;
835 		if (tflags & HTML_NLEND)
836 			print_endline(h);
837 		print_indent(h);
838 		print_byte(h, '<');
839 		print_byte(h, '/');
840 		print_word(h, htmltags[tag->tag].name);
841 		print_byte(h, '>');
842 		if (tflags & HTML_NLAFTER)
843 			print_endline(h);
844 	}
845 	if (tag->refcnt == 0) {
846 		h->tag = tag->next;
847 		free(tag);
848 	}
849 }
850 
851 void
852 print_gen_decls(struct html *h)
853 {
854 	print_word(h, "<!DOCTYPE html>");
855 	print_endline(h);
856 }
857 
858 void
859 print_gen_comment(struct html *h, struct roff_node *n)
860 {
861 	int	 wantblank;
862 
863 	print_word(h, "<!-- This is an automatically generated file."
864 	    "  Do not edit.");
865 	h->indent = 1;
866 	wantblank = 0;
867 	while (n != NULL && n->type == ROFFT_COMMENT) {
868 		if (strstr(n->string, "-->") == NULL &&
869 		    (wantblank || *n->string != '\0')) {
870 			print_endline(h);
871 			print_indent(h);
872 			print_word(h, n->string);
873 			wantblank = *n->string != '\0';
874 		}
875 		n = n->next;
876 	}
877 	if (wantblank)
878 		print_endline(h);
879 	print_word(h, " -->");
880 	print_endline(h);
881 	h->indent = 0;
882 }
883 
884 void
885 print_text(struct html *h, const char *word)
886 {
887 	print_tagged_text(h, word, NULL);
888 }
889 
890 void
891 print_tagged_text(struct html *h, const char *word, struct roff_node *n)
892 {
893 	struct tag	*t;
894 	char		*href;
895 
896 	/*
897 	 * Always wrap text in a paragraph unless already contained in
898 	 * some flow container; never put it directly into a section.
899 	 */
900 
901 	if (h->tag->tag == TAG_SECTION)
902 		print_otag(h, TAG_P, "c", "Pp");
903 
904 	/* Output whitespace before this text? */
905 
906 	if (h->col && (h->flags & HTML_NOSPACE) == 0) {
907 		if ( ! (HTML_KEEP & h->flags)) {
908 			if (HTML_PREKEEP & h->flags)
909 				h->flags |= HTML_KEEP;
910 			print_endword(h);
911 		} else
912 			print_word(h, "&#x00A0;");
913 	}
914 
915 	/*
916 	 * Optionally switch fonts, optionally write a permalink, then
917 	 * print the text, optionally surrounded by HTML whitespace.
918 	 */
919 
920 	assert(h->metaf == NULL);
921 	print_metaf(h);
922 	print_indent(h);
923 
924 	if (n != NULL && (href = html_make_id(n, 2)) != NULL) {
925 		t = print_otag(h, TAG_A, "chR", "permalink", href);
926 		free(href);
927 	} else
928 		t = NULL;
929 
930 	if ( ! print_encode(h, word, NULL, 0)) {
931 		if ( ! (h->flags & HTML_NONOSPACE))
932 			h->flags &= ~HTML_NOSPACE;
933 		h->flags &= ~HTML_NONEWLINE;
934 	} else
935 		h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
936 
937 	if (h->metaf != NULL) {
938 		print_tagq(h, h->metaf);
939 		h->metaf = NULL;
940 	} else if (t != NULL)
941 		print_tagq(h, t);
942 
943 	h->flags &= ~HTML_IGNDELIM;
944 }
945 
946 void
947 print_tagq(struct html *h, const struct tag *until)
948 {
949 	struct tag	*this, *next;
950 
951 	for (this = h->tag; this != NULL; this = next) {
952 		next = this == until ? NULL : this->next;
953 		print_ctag(h, this);
954 	}
955 }
956 
957 /*
958  * Close out all open elements up to but excluding suntil.
959  * Note that a paragraph just inside stays open together with it
960  * because paragraphs include subsequent phrasing content.
961  */
962 void
963 print_stagq(struct html *h, const struct tag *suntil)
964 {
965 	struct tag	*this, *next;
966 
967 	for (this = h->tag; this != NULL; this = next) {
968 		next = this->next;
969 		if (this == suntil || (next == suntil &&
970 		    (this->tag == TAG_P || this->tag == TAG_PRE)))
971 			break;
972 		print_ctag(h, this);
973 	}
974 }
975 
976 
977 /***********************************************************************
978  * Low level output functions.
979  * They implement line breaking using a short static buffer.
980  ***********************************************************************/
981 
982 /*
983  * Buffer one HTML output byte.
984  * If the buffer is full, flush and deactivate it and start a new line.
985  * If the buffer is inactive, print directly.
986  */
987 static void
988 print_byte(struct html *h, char c)
989 {
990 	if ((h->flags & HTML_BUFFER) == 0) {
991 		putchar(c);
992 		h->col++;
993 		return;
994 	}
995 
996 	if (h->col + h->bufcol < sizeof(h->buf)) {
997 		h->buf[h->bufcol++] = c;
998 		return;
999 	}
1000 
1001 	putchar('\n');
1002 	h->col = 0;
1003 	print_indent(h);
1004 	putchar(' ');
1005 	putchar(' ');
1006 	fwrite(h->buf, h->bufcol, 1, stdout);
1007 	putchar(c);
1008 	h->col = (h->indent + 1) * 2 + h->bufcol + 1;
1009 	h->bufcol = 0;
1010 	h->flags &= ~HTML_BUFFER;
1011 }
1012 
1013 /*
1014  * If something was printed on the current output line, end it.
1015  * Not to be called right after print_indent().
1016  */
1017 void
1018 print_endline(struct html *h)
1019 {
1020 	if (h->col == 0)
1021 		return;
1022 
1023 	if (h->bufcol) {
1024 		putchar(' ');
1025 		fwrite(h->buf, h->bufcol, 1, stdout);
1026 		h->bufcol = 0;
1027 	}
1028 	putchar('\n');
1029 	h->col = 0;
1030 	h->flags |= HTML_NOSPACE;
1031 	h->flags &= ~HTML_BUFFER;
1032 }
1033 
1034 /*
1035  * Flush the HTML output buffer.
1036  * If it is inactive, activate it.
1037  */
1038 static void
1039 print_endword(struct html *h)
1040 {
1041 	if (h->noindent) {
1042 		print_byte(h, ' ');
1043 		return;
1044 	}
1045 
1046 	if ((h->flags & HTML_BUFFER) == 0) {
1047 		h->col++;
1048 		h->flags |= HTML_BUFFER;
1049 	} else if (h->bufcol) {
1050 		putchar(' ');
1051 		fwrite(h->buf, h->bufcol, 1, stdout);
1052 		h->col += h->bufcol + 1;
1053 	}
1054 	h->bufcol = 0;
1055 }
1056 
1057 /*
1058  * If at the beginning of a new output line,
1059  * perform indentation and mark the line as containing output.
1060  * Make sure to really produce some output right afterwards,
1061  * but do not use print_otag() for producing it.
1062  */
1063 static void
1064 print_indent(struct html *h)
1065 {
1066 	size_t	 i;
1067 
1068 	if (h->col || h->noindent)
1069 		return;
1070 
1071 	h->col = h->indent * 2;
1072 	for (i = 0; i < h->col; i++)
1073 		putchar(' ');
1074 }
1075 
1076 /*
1077  * Print or buffer some characters
1078  * depending on the current HTML output buffer state.
1079  */
1080 static void
1081 print_word(struct html *h, const char *cp)
1082 {
1083 	while (*cp != '\0')
1084 		print_byte(h, *cp++);
1085 }
1086