xref: /openbsd-src/usr.bin/mandoc/html.c (revision 3cc45a9d38cefca347754ff6ad1ee07b3f5fadb9)
1*3cc45a9dSschwarze /* $OpenBSD: html.c,v 1.150 2022/08/09 11:21:50 schwarze Exp $ */
24175bdabSschwarze /*
30ac7e6ecSschwarze  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
47d063611Sschwarze  * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org>
56774f271Sschwarze  * Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in>
64175bdabSschwarze  *
74175bdabSschwarze  * Permission to use, copy, modify, and distribute this software for any
84175bdabSschwarze  * purpose with or without fee is hereby granted, provided that the above
94175bdabSschwarze  * copyright notice and this permission notice appear in all copies.
104175bdabSschwarze  *
112ccd0917Sschwarze  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
124175bdabSschwarze  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
132ccd0917Sschwarze  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
144175bdabSschwarze  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
154175bdabSschwarze  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
164175bdabSschwarze  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
174175bdabSschwarze  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
180ac7e6ecSschwarze  *
190ac7e6ecSschwarze  * Common functions for mandoc(1) HTML formatters.
200ac7e6ecSschwarze  * For use by individual formatters and by the main program.
214175bdabSschwarze  */
224175bdabSschwarze #include <sys/types.h>
233f160fd1Sschwarze #include <sys/stat.h>
244175bdabSschwarze 
254175bdabSschwarze #include <assert.h>
26a66b65d0Sschwarze #include <ctype.h>
274175bdabSschwarze #include <stdarg.h>
28762c1016Sschwarze #include <stddef.h>
29aa2d850aSschwarze #include <stdio.h>
304175bdabSschwarze #include <stdint.h>
314175bdabSschwarze #include <stdlib.h>
324175bdabSschwarze #include <string.h>
334175bdabSschwarze #include <unistd.h>
344175bdabSschwarze 
354f4f7972Sschwarze #include "mandoc_aux.h"
36762c1016Sschwarze #include "mandoc_ohash.h"
376ef173c4Sschwarze #include "mandoc.h"
386ef173c4Sschwarze #include "roff.h"
394175bdabSschwarze #include "out.h"
404175bdabSschwarze #include "html.h"
412ccd0917Sschwarze #include "manconf.h"
424175bdabSschwarze #include "main.h"
434175bdabSschwarze 
444175bdabSschwarze struct	htmldata {
454175bdabSschwarze 	const char	 *name;
464175bdabSschwarze 	int		  flags;
47635fddedSschwarze #define	HTML_INPHRASE	 (1 << 0)  /* Can appear in phrasing context. */
48635fddedSschwarze #define	HTML_TOPHRASE	 (1 << 1)  /* Establishes phrasing context. */
49635fddedSschwarze #define	HTML_NOSTACK	 (1 << 2)  /* Does not have an end tag. */
50635fddedSschwarze #define	HTML_NLBEFORE	 (1 << 3)  /* Output line break before opening. */
51635fddedSschwarze #define	HTML_NLBEGIN	 (1 << 4)  /* Output line break after opening. */
52635fddedSschwarze #define	HTML_NLEND	 (1 << 5)  /* Output line break before closing. */
53635fddedSschwarze #define	HTML_NLAFTER	 (1 << 6)  /* Output line break after closing. */
54b1eea027Sschwarze #define	HTML_NLAROUND	 (HTML_NLBEFORE | HTML_NLAFTER)
55b1eea027Sschwarze #define	HTML_NLINSIDE	 (HTML_NLBEGIN | HTML_NLEND)
56b1eea027Sschwarze #define	HTML_NLALL	 (HTML_NLAROUND | HTML_NLINSIDE)
57635fddedSschwarze #define	HTML_INDENT	 (1 << 7)  /* Indent content by two spaces. */
58635fddedSschwarze #define	HTML_NOINDENT	 (1 << 8)  /* Exception: never indent content. */
594175bdabSschwarze };
604175bdabSschwarze 
614175bdabSschwarze static	const struct htmldata htmltags[TAG_MAX] = {
62b1eea027Sschwarze 	{"html",	HTML_NLALL},
63b1eea027Sschwarze 	{"head",	HTML_NLALL | HTML_INDENT},
64635fddedSschwarze 	{"meta",	HTML_NOSTACK | HTML_NLALL},
65635fddedSschwarze 	{"link",	HTML_NOSTACK | HTML_NLALL},
66635fddedSschwarze 	{"style",	HTML_NLALL | HTML_INDENT},
67b1eea027Sschwarze 	{"title",	HTML_NLAROUND},
68635fddedSschwarze 	{"body",	HTML_NLALL},
696774f271Sschwarze 	{"main",	HTML_NLALL},
70b1eea027Sschwarze 	{"div",		HTML_NLAROUND},
711a2b7b3cSschwarze 	{"section",	HTML_NLALL},
7288e033f9Sschwarze 	{"nav",		HTML_NLALL},
73b1eea027Sschwarze 	{"table",	HTML_NLALL | HTML_INDENT},
74b1eea027Sschwarze 	{"tr",		HTML_NLALL | HTML_INDENT},
75b1eea027Sschwarze 	{"td",		HTML_NLAROUND},
76b1eea027Sschwarze 	{"li",		HTML_NLAROUND | HTML_INDENT},
77b1eea027Sschwarze 	{"ul",		HTML_NLALL | HTML_INDENT},
78b1eea027Sschwarze 	{"ol",		HTML_NLALL | HTML_INDENT},
79b1eea027Sschwarze 	{"dl",		HTML_NLALL | HTML_INDENT},
80b1eea027Sschwarze 	{"dt",		HTML_NLAROUND},
81b1eea027Sschwarze 	{"dd",		HTML_NLAROUND | HTML_INDENT},
82635fddedSschwarze 	{"h2",		HTML_TOPHRASE | HTML_NLAROUND},
8300b92a3fSschwarze 	{"h3",		HTML_TOPHRASE | HTML_NLAROUND},
84635fddedSschwarze 	{"p",		HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
8552e71e33Sschwarze 	{"pre",		HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT},
86635fddedSschwarze 	{"a",		HTML_INPHRASE | HTML_TOPHRASE},
87635fddedSschwarze 	{"b",		HTML_INPHRASE | HTML_TOPHRASE},
88635fddedSschwarze 	{"cite",	HTML_INPHRASE | HTML_TOPHRASE},
89635fddedSschwarze 	{"code",	HTML_INPHRASE | HTML_TOPHRASE},
90635fddedSschwarze 	{"i",		HTML_INPHRASE | HTML_TOPHRASE},
91635fddedSschwarze 	{"small",	HTML_INPHRASE | HTML_TOPHRASE},
92635fddedSschwarze 	{"span",	HTML_INPHRASE | HTML_TOPHRASE},
93635fddedSschwarze 	{"var",		HTML_INPHRASE | HTML_TOPHRASE},
94635fddedSschwarze 	{"br",		HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
951ae734e3Sschwarze 	{"hr",		HTML_INPHRASE | HTML_NOSTACK},
9666c5de26Sschwarze 	{"mark",	HTML_INPHRASE },
97635fddedSschwarze 	{"math",	HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
98b1eea027Sschwarze 	{"mrow",	0},
99b1eea027Sschwarze 	{"mi",		0},
10072fef9e3Sschwarze 	{"mn",		0},
101b1eea027Sschwarze 	{"mo",		0},
102b1eea027Sschwarze 	{"msup",	0},
103b1eea027Sschwarze 	{"msub",	0},
104b1eea027Sschwarze 	{"msubsup",	0},
105b1eea027Sschwarze 	{"mfrac",	0},
106b1eea027Sschwarze 	{"msqrt",	0},
107b1eea027Sschwarze 	{"mfenced",	0},
108b1eea027Sschwarze 	{"mtable",	0},
109b1eea027Sschwarze 	{"mtr",		0},
110b1eea027Sschwarze 	{"mtd",		0},
111b1eea027Sschwarze 	{"munderover",	0},
112b1eea027Sschwarze 	{"munder",	0},
113b1eea027Sschwarze 	{"mover",	0},
114fa70b73eSschwarze };
115fa70b73eSschwarze 
116762c1016Sschwarze /* Avoid duplicate HTML id= attributes. */
1176e69d8cfSschwarze 
1186e69d8cfSschwarze struct	id_entry {
1196e69d8cfSschwarze 	int	 ord;	/* Ordinal number of the latest occurrence. */
1206e69d8cfSschwarze 	char	 id[];	/* The id= attribute without any ordinal suffix. */
1216e69d8cfSschwarze };
122762c1016Sschwarze static	struct ohash	 id_unique;
123762c1016Sschwarze 
1241fd1dc02Sschwarze static	void	 html_reset_internal(struct html *);
125d5746e42Sschwarze static	void	 print_byte(struct html *, char);
126d5746e42Sschwarze static	void	 print_endword(struct html *);
127d5746e42Sschwarze static	void	 print_indent(struct html *);
128d5746e42Sschwarze static	void	 print_word(struct html *, const char *);
129d5746e42Sschwarze 
1304dda31b3Sschwarze static	void	 print_ctag(struct html *, struct tag *);
131d5746e42Sschwarze static	int	 print_escape(struct html *, char);
132fef1eecdSschwarze static	int	 print_encode(struct html *, const char *, const char *, int);
133fef1eecdSschwarze static	void	 print_href(struct html *, const char *, const char *, int);
134cefe8974Sschwarze static	void	 print_metaf(struct html *);
135fa70b73eSschwarze 
13649aff9f8Sschwarze 
13772f211f7Sschwarze void *
html_alloc(const struct manoutput * outopts)13816536faaSschwarze html_alloc(const struct manoutput *outopts)
1394175bdabSschwarze {
1404175bdabSschwarze 	struct html	*h;
1414175bdabSschwarze 
14219a69263Sschwarze 	h = mandoc_calloc(1, sizeof(struct html));
1434175bdabSschwarze 
1444614a369Sschwarze 	h->tag = NULL;
14507460cefSschwarze 	h->metac = h->metal = ESCAPE_FONTROMAN;
1462ccd0917Sschwarze 	h->style = outopts->style;
1473f160fd1Sschwarze 	if ((h->base_man1 = outopts->man) == NULL)
1483f160fd1Sschwarze 		h->base_man2 = NULL;
1493f160fd1Sschwarze 	else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
1503f160fd1Sschwarze 		*h->base_man2++ = '\0';
1512ccd0917Sschwarze 	h->base_includes = outopts->includes;
1522ccd0917Sschwarze 	if (outopts->fragment)
153ca0ce676Sschwarze 		h->oflags |= HTML_FRAGMENT;
1543327fa00Sschwarze 	if (outopts->toc)
1553327fa00Sschwarze 		h->oflags |= HTML_TOC;
1564175bdabSschwarze 
1576e69d8cfSschwarze 	mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
158762c1016Sschwarze 
159526e306bSschwarze 	return h;
1604175bdabSschwarze }
1614175bdabSschwarze 
1621fd1dc02Sschwarze static void
html_reset_internal(struct html * h)1631fd1dc02Sschwarze html_reset_internal(struct html *h)
1644175bdabSschwarze {
1654175bdabSschwarze 	struct tag	*tag;
1666e69d8cfSschwarze 	struct id_entry	*entry;
167762c1016Sschwarze 	unsigned int	 slot;
1684175bdabSschwarze 
1694614a369Sschwarze 	while ((tag = h->tag) != NULL) {
1704614a369Sschwarze 		h->tag = tag->next;
1714175bdabSschwarze 		free(tag);
1724175bdabSschwarze 	}
1736e69d8cfSschwarze 	entry = ohash_first(&id_unique, &slot);
1746e69d8cfSschwarze 	while (entry != NULL) {
1756e69d8cfSschwarze 		free(entry);
1766e69d8cfSschwarze 		entry = ohash_next(&id_unique, &slot);
177762c1016Sschwarze 	}
178762c1016Sschwarze 	ohash_delete(&id_unique);
1794175bdabSschwarze }
1804175bdabSschwarze 
1814175bdabSschwarze void
html_reset(void * p)1821fd1dc02Sschwarze html_reset(void *p)
1831fd1dc02Sschwarze {
1841fd1dc02Sschwarze 	html_reset_internal(p);
1856e69d8cfSschwarze 	mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
1861fd1dc02Sschwarze }
1871fd1dc02Sschwarze 
1881fd1dc02Sschwarze void
html_free(void * p)1891fd1dc02Sschwarze html_free(void *p)
1901fd1dc02Sschwarze {
1911fd1dc02Sschwarze 	html_reset_internal(p);
1921fd1dc02Sschwarze 	free(p);
1931fd1dc02Sschwarze }
1941fd1dc02Sschwarze 
1951fd1dc02Sschwarze void
print_gen_head(struct html * h)1964175bdabSschwarze print_gen_head(struct html *h)
1974175bdabSschwarze {
198d649d931Sschwarze 	struct tag	*t;
1994175bdabSschwarze 
200229cc7fdSschwarze 	print_otag(h, TAG_META, "?", "charset", "utf-8");
2019d12dee7Santon 	print_otag(h, TAG_META, "??", "name", "viewport",
2029d12dee7Santon 	    "content", "width=device-width, initial-scale=1.0");
2034579ceb6Sschwarze 	if (h->style != NULL) {
2044579ceb6Sschwarze 		print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
2054579ceb6Sschwarze 		    h->style, "type", "text/css", "media", "all");
2064579ceb6Sschwarze 		return;
2074579ceb6Sschwarze 	}
2084175bdabSschwarze 
209d649d931Sschwarze 	/*
2104579ceb6Sschwarze 	 * Print a minimal embedded style sheet.
211d649d931Sschwarze 	 */
212b1eea027Sschwarze 
213229cc7fdSschwarze 	t = print_otag(h, TAG_STYLE, "");
214b1eea027Sschwarze 	print_text(h, "table.head, table.foot { width: 100%; }");
215d5746e42Sschwarze 	print_endline(h);
216b1eea027Sschwarze 	print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
217d5746e42Sschwarze 	print_endline(h);
218b1eea027Sschwarze 	print_text(h, "td.head-vol { text-align: center; }");
219d5746e42Sschwarze 	print_endline(h);
22090849764Sschwarze 	print_text(h, ".Nd, .Bf, .Op { display: inline; }");
2212f6b432eSschwarze 	print_endline(h);
22290849764Sschwarze 	print_text(h, ".Pa, .Ad { font-style: italic; }");
2232e36b507Sschwarze 	print_endline(h);
22490849764Sschwarze 	print_text(h, ".Ms { font-weight: bold; }");
225d4430aa5Sschwarze 	print_endline(h);
22690849764Sschwarze 	print_text(h, ".Bl-diag ");
227492a74caSschwarze 	print_byte(h, '>');
228492a74caSschwarze 	print_text(h, " dt { font-weight: bold; }");
229492a74caSschwarze 	print_endline(h);
23090849764Sschwarze 	print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
23190849764Sschwarze 	    "{ font-weight: bold; font-family: inherit; }");
232d649d931Sschwarze 	print_tagq(h, t);
2334175bdabSschwarze }
2344175bdabSschwarze 
235cefe8974Sschwarze int
html_setfont(struct html * h,enum mandoc_esc font)236cefe8974Sschwarze html_setfont(struct html *h, enum mandoc_esc font)
237fa70b73eSschwarze {
238cefe8974Sschwarze 	switch (font) {
23949aff9f8Sschwarze 	case ESCAPE_FONTPREV:
240fa70b73eSschwarze 		font = h->metal;
241fa70b73eSschwarze 		break;
24249aff9f8Sschwarze 	case ESCAPE_FONTITALIC:
24349aff9f8Sschwarze 	case ESCAPE_FONTBOLD:
24449aff9f8Sschwarze 	case ESCAPE_FONTBI:
245cefe8974Sschwarze 	case ESCAPE_FONTROMAN:
2467d063611Sschwarze 	case ESCAPE_FONTCR:
2477d063611Sschwarze 	case ESCAPE_FONTCB:
2487d063611Sschwarze 	case ESCAPE_FONTCI:
2491b5bfe78Sschwarze 		break;
25049aff9f8Sschwarze 	case ESCAPE_FONT:
251cefe8974Sschwarze 		font = ESCAPE_FONTROMAN;
252fa70b73eSschwarze 		break;
253fa70b73eSschwarze 	default:
254cefe8974Sschwarze 		return 0;
255cefe8974Sschwarze 	}
256cefe8974Sschwarze 	h->metal = h->metac;
257cefe8974Sschwarze 	h->metac = font;
258cefe8974Sschwarze 	return 1;
259fa70b73eSschwarze }
260fa70b73eSschwarze 
261cefe8974Sschwarze static void
print_metaf(struct html * h)262cefe8974Sschwarze print_metaf(struct html *h)
263cefe8974Sschwarze {
2643a7b861cSschwarze 	if (h->metaf) {
2653a7b861cSschwarze 		print_tagq(h, h->metaf);
2663a7b861cSschwarze 		h->metaf = NULL;
2673a7b861cSschwarze 	}
268cefe8974Sschwarze 	switch (h->metac) {
269cefe8974Sschwarze 	case ESCAPE_FONTITALIC:
270229cc7fdSschwarze 		h->metaf = print_otag(h, TAG_I, "");
27168941ea9Sschwarze 		break;
272cefe8974Sschwarze 	case ESCAPE_FONTBOLD:
273229cc7fdSschwarze 		h->metaf = print_otag(h, TAG_B, "");
27468941ea9Sschwarze 		break;
275cefe8974Sschwarze 	case ESCAPE_FONTBI:
276229cc7fdSschwarze 		h->metaf = print_otag(h, TAG_B, "");
277229cc7fdSschwarze 		print_otag(h, TAG_I, "");
27868941ea9Sschwarze 		break;
2797d063611Sschwarze 	case ESCAPE_FONTCR:
2801b5bfe78Sschwarze 		h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
2811b5bfe78Sschwarze 		break;
2827d063611Sschwarze 	case ESCAPE_FONTCB:
2837d063611Sschwarze 		h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
2847d063611Sschwarze 		print_otag(h, TAG_B, "");
2857d063611Sschwarze 		break;
2867d063611Sschwarze 	case ESCAPE_FONTCI:
2877d063611Sschwarze 		h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
2887d063611Sschwarze 		print_otag(h, TAG_I, "");
2897d063611Sschwarze 		break;
29068941ea9Sschwarze 	default:
29168941ea9Sschwarze 		break;
29268941ea9Sschwarze 	}
293fa70b73eSschwarze }
294fa70b73eSschwarze 
2957f442bfeSschwarze void
html_close_paragraph(struct html * h)2967f442bfeSschwarze html_close_paragraph(struct html *h)
2977f442bfeSschwarze {
2981fdb7b23Sschwarze 	struct tag	*this, *next;
299f1a57f6eSschwarze 	int		 flags;
3007f442bfeSschwarze 
3011fdb7b23Sschwarze 	this = h->tag;
3021fdb7b23Sschwarze 	for (;;) {
3031fdb7b23Sschwarze 		next = this->next;
304f1a57f6eSschwarze 		flags = htmltags[this->tag].flags;
305f1a57f6eSschwarze 		if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
3061fdb7b23Sschwarze 			print_ctag(h, this);
307f1a57f6eSschwarze 		if ((flags & HTML_INPHRASE) == 0)
3087f442bfeSschwarze 			break;
3091fdb7b23Sschwarze 		this = next;
3107f442bfeSschwarze 	}
3117f442bfeSschwarze }
3127f442bfeSschwarze 
313275804acSschwarze /*
314275804acSschwarze  * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
315275804acSschwarze  * TOKEN_NONE does not switch.  The old mode is returned.
316275804acSschwarze  */
317275804acSschwarze enum roff_tok
html_fillmode(struct html * h,enum roff_tok want)318275804acSschwarze html_fillmode(struct html *h, enum roff_tok want)
319275804acSschwarze {
320275804acSschwarze 	struct tag	*t;
321275804acSschwarze 	enum roff_tok	 had;
322275804acSschwarze 
323275804acSschwarze 	for (t = h->tag; t != NULL; t = t->next)
324275804acSschwarze 		if (t->tag == TAG_PRE)
325275804acSschwarze 			break;
326275804acSschwarze 
327275804acSschwarze 	had = t == NULL ? ROFF_fi : ROFF_nf;
328275804acSschwarze 
329275804acSschwarze 	if (want != had) {
330275804acSschwarze 		switch (want) {
331275804acSschwarze 		case ROFF_fi:
332275804acSschwarze 			print_tagq(h, t);
333275804acSschwarze 			break;
334275804acSschwarze 		case ROFF_nf:
3357f442bfeSschwarze 			html_close_paragraph(h);
336275804acSschwarze 			print_otag(h, TAG_PRE, "");
337275804acSschwarze 			break;
338275804acSschwarze 		case TOKEN_NONE:
339275804acSschwarze 			break;
340275804acSschwarze 		default:
341275804acSschwarze 			abort();
342275804acSschwarze 		}
343275804acSschwarze 	}
344275804acSschwarze 	return had;
345275804acSschwarze }
346275804acSschwarze 
3470ac7e6ecSschwarze /*
3480ac7e6ecSschwarze  * Allocate a string to be used for the "id=" attribute of an HTML
3490ac7e6ecSschwarze  * element and/or as a segment identifier for a URI in an <a> element.
3500ac7e6ecSschwarze  * The function may fail and return NULL if the node lacks text data
3510ac7e6ecSschwarze  * to create the attribute from.
3526e69d8cfSschwarze  * The caller is responsible for free(3)ing the returned string.
3536e69d8cfSschwarze  *
3540ac7e6ecSschwarze  * If the "unique" argument is non-zero, the "id_unique" ohash table
3556e69d8cfSschwarze  * is used for de-duplication.  If the "unique" argument is 1,
3566e69d8cfSschwarze  * it is the first time the function is called for this tag and
3576e69d8cfSschwarze  * location, so if an ordinal suffix is needed, it is incremented.
3586e69d8cfSschwarze  * If the "unique" argument is 2, it is the second time the function
3596e69d8cfSschwarze  * is called for this tag and location, so the ordinal suffix
3606e69d8cfSschwarze  * remains unchanged.
3610ac7e6ecSschwarze  */
3626ef173c4Sschwarze char *
html_make_id(const struct roff_node * n,int unique)363762c1016Sschwarze html_make_id(const struct roff_node *n, int unique)
3646ef173c4Sschwarze {
3656ef173c4Sschwarze 	const struct roff_node	*nch;
3666e69d8cfSschwarze 	struct id_entry		*entry;
3676e69d8cfSschwarze 	char			*buf, *cp;
3686e69d8cfSschwarze 	size_t			 len;
369762c1016Sschwarze 	unsigned int		 slot;
3706ef173c4Sschwarze 
371c220f9cfSschwarze 	if (n->tag != NULL)
372c220f9cfSschwarze 		buf = mandoc_strdup(n->tag);
3730ac7e6ecSschwarze 	else {
3740ac7e6ecSschwarze 		switch (n->tok) {
3750ac7e6ecSschwarze 		case MDOC_Sh:
3760ac7e6ecSschwarze 		case MDOC_Ss:
3770ac7e6ecSschwarze 		case MDOC_Sx:
3780ac7e6ecSschwarze 		case MAN_SH:
3790ac7e6ecSschwarze 		case MAN_SS:
3806ef173c4Sschwarze 			for (nch = n->child; nch != NULL; nch = nch->next)
3816ef173c4Sschwarze 				if (nch->type != ROFFT_TEXT)
3826ef173c4Sschwarze 					return NULL;
3836ef173c4Sschwarze 			buf = NULL;
3846ef173c4Sschwarze 			deroff(&buf, n);
3858806236eSschwarze 			if (buf == NULL)
3868806236eSschwarze 				return NULL;
3870ac7e6ecSschwarze 			break;
3880ac7e6ecSschwarze 		default:
389e053e0fdSschwarze 			if (n->child == NULL || n->child->type != ROFFT_TEXT)
3900ac7e6ecSschwarze 				return NULL;
3910ac7e6ecSschwarze 			buf = mandoc_strdup(n->child->string);
3920ac7e6ecSschwarze 			break;
3930ac7e6ecSschwarze 		}
3940ac7e6ecSschwarze 	}
3956ef173c4Sschwarze 
3960b579660Sschwarze 	/*
3970b579660Sschwarze 	 * In ID attributes, only use ASCII characters that are
3980b579660Sschwarze 	 * permitted in URL-fragment strings according to the
3990b579660Sschwarze 	 * explicit list at:
4000b579660Sschwarze 	 * https://url.spec.whatwg.org/#url-fragment-string
40117117656Sschwarze 	 * In addition, reserve '~' for ordinal suffixes.
4020b579660Sschwarze 	 */
4036ef173c4Sschwarze 
404*3cc45a9dSschwarze 	for (cp = buf; *cp != '\0'; cp++) {
405*3cc45a9dSschwarze 		if (*cp == ASCII_HYPH)
406*3cc45a9dSschwarze 			*cp = '-';
407*3cc45a9dSschwarze 		else if (isalnum((unsigned char)*cp) == 0 &&
40817117656Sschwarze 		    strchr("!$&'()*+,-./:;=?@_", *cp) == NULL)
4096ef173c4Sschwarze 			*cp = '_';
410*3cc45a9dSschwarze 	}
4116ef173c4Sschwarze 
412762c1016Sschwarze 	if (unique == 0)
413762c1016Sschwarze 		return buf;
414762c1016Sschwarze 
415762c1016Sschwarze 	/* Avoid duplicate HTML id= attributes. */
416762c1016Sschwarze 
417762c1016Sschwarze 	slot = ohash_qlookup(&id_unique, buf);
4186e69d8cfSschwarze 	if ((entry = ohash_find(&id_unique, slot)) == NULL) {
4196e69d8cfSschwarze 		len = strlen(buf) + 1;
4206e69d8cfSschwarze 		entry = mandoc_malloc(sizeof(*entry) + len);
4216e69d8cfSschwarze 		entry->ord = 1;
4226e69d8cfSschwarze 		memcpy(entry->id, buf, len);
4236e69d8cfSschwarze 		ohash_insert(&id_unique, slot, entry);
4246e69d8cfSschwarze 	} else if (unique == 1)
4256e69d8cfSschwarze 		entry->ord++;
4266e69d8cfSschwarze 
4276e69d8cfSschwarze 	if (entry->ord > 1) {
4286e69d8cfSschwarze 		cp = buf;
42917117656Sschwarze 		mandoc_asprintf(&buf, "%s~%d", cp, entry->ord);
4306e69d8cfSschwarze 		free(cp);
431762c1016Sschwarze 	}
4326ef173c4Sschwarze 	return buf;
4336ef173c4Sschwarze }
4346ef173c4Sschwarze 
435fa70b73eSschwarze static int
print_escape(struct html * h,char c)436d5746e42Sschwarze print_escape(struct html *h, char c)
43771f7ad47Sschwarze {
43871f7ad47Sschwarze 
43971f7ad47Sschwarze 	switch (c) {
44071f7ad47Sschwarze 	case '<':
441d5746e42Sschwarze 		print_word(h, "&lt;");
44271f7ad47Sschwarze 		break;
44371f7ad47Sschwarze 	case '>':
444d5746e42Sschwarze 		print_word(h, "&gt;");
44571f7ad47Sschwarze 		break;
44671f7ad47Sschwarze 	case '&':
447d5746e42Sschwarze 		print_word(h, "&amp;");
44871f7ad47Sschwarze 		break;
44971f7ad47Sschwarze 	case '"':
450d5746e42Sschwarze 		print_word(h, "&quot;");
45171f7ad47Sschwarze 		break;
45271f7ad47Sschwarze 	case ASCII_NBRSP:
453d5746e42Sschwarze 		print_word(h, "&nbsp;");
45471f7ad47Sschwarze 		break;
45571f7ad47Sschwarze 	case ASCII_HYPH:
456d5746e42Sschwarze 		print_byte(h, '-');
4579630d74cSschwarze 		break;
45871f7ad47Sschwarze 	case ASCII_BREAK:
45971f7ad47Sschwarze 		break;
46071f7ad47Sschwarze 	default:
461526e306bSschwarze 		return 0;
46271f7ad47Sschwarze 	}
463526e306bSschwarze 	return 1;
46471f7ad47Sschwarze }
46571f7ad47Sschwarze 
46671f7ad47Sschwarze static int
print_encode(struct html * h,const char * p,const char * pend,int norecurse)467fef1eecdSschwarze print_encode(struct html *h, const char *p, const char *pend, int norecurse)
4684175bdabSschwarze {
469d5746e42Sschwarze 	char		 numbuf[16];
470fa70b73eSschwarze 	const char	*seq;
4716167ec38Sschwarze 	size_t		 sz;
4726167ec38Sschwarze 	int		 c, len, breakline, nospace;
473a5e11edeSschwarze 	enum mandoc_esc	 esc;
4746167ec38Sschwarze 	static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
4751281a50cSschwarze 		ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
476fa70b73eSschwarze 
477fef1eecdSschwarze 	if (pend == NULL)
478fef1eecdSschwarze 		pend = strchr(p, '\0');
479fef1eecdSschwarze 
4806167ec38Sschwarze 	breakline = 0;
481fa70b73eSschwarze 	nospace = 0;
4824175bdabSschwarze 
483fef1eecdSschwarze 	while (p < pend) {
4847eac745dSschwarze 		if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
4857eac745dSschwarze 			h->flags &= ~HTML_SKIPCHAR;
4867eac745dSschwarze 			p++;
4877eac745dSschwarze 			continue;
4887eac745dSschwarze 		}
4897eac745dSschwarze 
490d5746e42Sschwarze 		for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
491d5746e42Sschwarze 			print_byte(h, *p);
492aa2d850aSschwarze 
4936167ec38Sschwarze 		if (breakline &&
4946167ec38Sschwarze 		    (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
495849903d9Sschwarze 			print_otag(h, TAG_BR, "");
4966167ec38Sschwarze 			breakline = 0;
4976167ec38Sschwarze 			while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
4986167ec38Sschwarze 				p++;
4996167ec38Sschwarze 			continue;
5006167ec38Sschwarze 		}
5016167ec38Sschwarze 
502fef1eecdSschwarze 		if (p >= pend)
503a5e11edeSschwarze 			break;
504a5e11edeSschwarze 
5056167ec38Sschwarze 		if (*p == ' ') {
5066167ec38Sschwarze 			print_endword(h);
5076167ec38Sschwarze 			p++;
5086167ec38Sschwarze 			continue;
5096167ec38Sschwarze 		}
5106167ec38Sschwarze 
511d5746e42Sschwarze 		if (print_escape(h, *p++))
512fa70b73eSschwarze 			continue;
513fa70b73eSschwarze 
514a5e11edeSschwarze 		esc = mandoc_escape(&p, &seq, &len);
515a5e11edeSschwarze 		switch (esc) {
51649aff9f8Sschwarze 		case ESCAPE_FONT:
51749aff9f8Sschwarze 		case ESCAPE_FONTPREV:
51849aff9f8Sschwarze 		case ESCAPE_FONTBOLD:
51949aff9f8Sschwarze 		case ESCAPE_FONTITALIC:
52049aff9f8Sschwarze 		case ESCAPE_FONTBI:
52149aff9f8Sschwarze 		case ESCAPE_FONTROMAN:
5227d063611Sschwarze 		case ESCAPE_FONTCR:
5237d063611Sschwarze 		case ESCAPE_FONTCB:
5247d063611Sschwarze 		case ESCAPE_FONTCI:
52565433e6fSschwarze 			if (0 == norecurse) {
52665433e6fSschwarze 				h->flags |= HTML_NOSPACE;
527cefe8974Sschwarze 				if (html_setfont(h, esc))
528cefe8974Sschwarze 					print_metaf(h);
52965433e6fSschwarze 				h->flags &= ~HTML_NOSPACE;
53065433e6fSschwarze 			}
5317eac745dSschwarze 			continue;
53249aff9f8Sschwarze 		case ESCAPE_SKIPCHAR:
5337eac745dSschwarze 			h->flags |= HTML_SKIPCHAR;
5347eac745dSschwarze 			continue;
5356f6722cbSschwarze 		case ESCAPE_ERROR:
5366f6722cbSschwarze 			continue;
5377eac745dSschwarze 		default:
5387eac745dSschwarze 			break;
5397eac745dSschwarze 		}
5407eac745dSschwarze 
5417eac745dSschwarze 		if (h->flags & HTML_SKIPCHAR) {
5427eac745dSschwarze 			h->flags &= ~HTML_SKIPCHAR;
5437eac745dSschwarze 			continue;
5447eac745dSschwarze 		}
5457eac745dSschwarze 
5467eac745dSschwarze 		switch (esc) {
54749aff9f8Sschwarze 		case ESCAPE_UNICODE:
54871f7ad47Sschwarze 			/* Skip past "u" header. */
549a5e11edeSschwarze 			c = mchars_num2uc(seq + 1, len - 1);
550a5e11edeSschwarze 			break;
55149aff9f8Sschwarze 		case ESCAPE_NUMBERED:
552a5e11edeSschwarze 			c = mchars_num2char(seq, len);
553e93f0bfbSschwarze 			if (c < 0)
554e93f0bfbSschwarze 				continue;
555a5e11edeSschwarze 			break;
55649aff9f8Sschwarze 		case ESCAPE_SPECIAL:
55716536faaSschwarze 			c = mchars_spec2cp(seq, len);
558e93f0bfbSschwarze 			if (c <= 0)
559e93f0bfbSschwarze 				continue;
560a5e11edeSschwarze 			break;
5616f6722cbSschwarze 		case ESCAPE_UNDEF:
5626f6722cbSschwarze 			c = *seq;
5636f6722cbSschwarze 			break;
5648138dde8Sschwarze 		case ESCAPE_DEVICE:
5658138dde8Sschwarze 			print_word(h, "html");
5668138dde8Sschwarze 			continue;
5676167ec38Sschwarze 		case ESCAPE_BREAK:
5686167ec38Sschwarze 			breakline = 1;
5696167ec38Sschwarze 			continue;
57049aff9f8Sschwarze 		case ESCAPE_NOSPACE:
571a5e11edeSschwarze 			if ('\0' == *p)
572fa70b73eSschwarze 				nospace = 1;
57316872170Sschwarze 			continue;
57464f4916cSschwarze 		case ESCAPE_OVERSTRIKE:
57564f4916cSschwarze 			if (len == 0)
57664f4916cSschwarze 				continue;
57764f4916cSschwarze 			c = seq[len - 1];
57864f4916cSschwarze 			break;
579a5e11edeSschwarze 		default:
58016872170Sschwarze 			continue;
581a5e11edeSschwarze 		}
582e93f0bfbSschwarze 		if ((c < 0x20 && c != 0x09) ||
583e93f0bfbSschwarze 		    (c > 0x7E && c < 0xA0))
58416872170Sschwarze 			c = 0xFFFD;
585d5746e42Sschwarze 		if (c > 0x7E) {
586497d2755Sbentley 			(void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
587d5746e42Sschwarze 			print_word(h, numbuf);
588d5746e42Sschwarze 		} else if (print_escape(h, c) == 0)
589d5746e42Sschwarze 			print_byte(h, c);
590fa70b73eSschwarze 	}
591fa70b73eSschwarze 
592526e306bSschwarze 	return nospace;
5934175bdabSschwarze }
5944175bdabSschwarze 
5958521b0bcSschwarze static void
print_href(struct html * h,const char * name,const char * sec,int man)596fef1eecdSschwarze print_href(struct html *h, const char *name, const char *sec, int man)
5978521b0bcSschwarze {
5983f160fd1Sschwarze 	struct stat	 sb;
599fef1eecdSschwarze 	const char	*p, *pp;
6003f160fd1Sschwarze 	char		*filename;
601fef1eecdSschwarze 
6023f160fd1Sschwarze 	if (man) {
6033f160fd1Sschwarze 		pp = h->base_man1;
6043f160fd1Sschwarze 		if (h->base_man2 != NULL) {
6053f160fd1Sschwarze 			mandoc_asprintf(&filename, "%s.%s", name, sec);
6063f160fd1Sschwarze 			if (stat(filename, &sb) == -1)
6073f160fd1Sschwarze 				pp = h->base_man2;
6083f160fd1Sschwarze 			free(filename);
6093f160fd1Sschwarze 		}
6103f160fd1Sschwarze 	} else
6113f160fd1Sschwarze 		pp = h->base_includes;
6123f160fd1Sschwarze 
613fef1eecdSschwarze 	while ((p = strchr(pp, '%')) != NULL) {
614fef1eecdSschwarze 		print_encode(h, pp, p, 1);
615fef1eecdSschwarze 		if (man && p[1] == 'S') {
616fef1eecdSschwarze 			if (sec == NULL)
617d5746e42Sschwarze 				print_byte(h, '1');
618fef1eecdSschwarze 			else
619fef1eecdSschwarze 				print_encode(h, sec, NULL, 1);
620fef1eecdSschwarze 		} else if ((man && p[1] == 'N') ||
621fef1eecdSschwarze 		    (man == 0 && p[1] == 'I'))
622fef1eecdSschwarze 			print_encode(h, name, NULL, 1);
623fef1eecdSschwarze 		else
624fef1eecdSschwarze 			print_encode(h, p, p + 2, 1);
625fef1eecdSschwarze 		pp = p + 2;
626fef1eecdSschwarze 	}
627fef1eecdSschwarze 	if (*pp != '\0')
628fef1eecdSschwarze 		print_encode(h, pp, NULL, 1);
6298521b0bcSschwarze }
6308521b0bcSschwarze 
6314175bdabSschwarze struct tag *
print_otag(struct html * h,enum htmltag tag,const char * fmt,...)632229cc7fdSschwarze print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
6334175bdabSschwarze {
634229cc7fdSschwarze 	va_list		 ap;
6354175bdabSschwarze 	struct tag	*t;
636fef1eecdSschwarze 	const char	*attr;
6371e98cf61Sschwarze 	char		*arg1, *arg2;
638373dd5baSschwarze 	int		 style_written, tflags;
639b1eea027Sschwarze 
640b1eea027Sschwarze 	tflags = htmltags[tag].flags;
6414175bdabSschwarze 
642635fddedSschwarze 	/* Flow content is not allowed in phrasing context. */
643635fddedSschwarze 
644635fddedSschwarze 	if ((tflags & HTML_INPHRASE) == 0) {
645635fddedSschwarze 		for (t = h->tag; t != NULL; t = t->next) {
646635fddedSschwarze 			if (t->closed)
647635fddedSschwarze 				continue;
648635fddedSschwarze 			assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
649635fddedSschwarze 			break;
650635fddedSschwarze 		}
6515847b48dSschwarze 
6525847b48dSschwarze 	/*
6535847b48dSschwarze 	 * Always wrap phrasing elements in a paragraph
6545847b48dSschwarze 	 * unless already contained in some flow container;
6555847b48dSschwarze 	 * never put them directly into a section.
6565847b48dSschwarze 	 */
6575847b48dSschwarze 
6585847b48dSschwarze 	} else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
6595847b48dSschwarze 		print_otag(h, TAG_P, "c", "Pp");
660635fddedSschwarze 
6614614a369Sschwarze 	/* Push this tag onto the stack of open scopes. */
6628521b0bcSschwarze 
663b1eea027Sschwarze 	if ((tflags & HTML_NOSTACK) == 0) {
66419a69263Sschwarze 		t = mandoc_malloc(sizeof(struct tag));
6654175bdabSschwarze 		t->tag = tag;
6664614a369Sschwarze 		t->next = h->tag;
667520a575cSschwarze 		t->refcnt = 0;
668520a575cSschwarze 		t->closed = 0;
6694614a369Sschwarze 		h->tag = t;
6704175bdabSschwarze 	} else
6714175bdabSschwarze 		t = NULL;
6724175bdabSschwarze 
673b1eea027Sschwarze 	if (tflags & HTML_NLBEFORE)
674d5746e42Sschwarze 		print_endline(h);
675d5746e42Sschwarze 	if (h->col == 0)
676d5746e42Sschwarze 		print_indent(h);
677b1eea027Sschwarze 	else if ((h->flags & HTML_NOSPACE) == 0) {
678b1eea027Sschwarze 		if (h->flags & HTML_KEEP)
679497d2755Sbentley 			print_word(h, "&#x00A0;");
680b1eea027Sschwarze 		else {
681b1eea027Sschwarze 			if (h->flags & HTML_PREKEEP)
682769ee804Sschwarze 				h->flags |= HTML_KEEP;
683d5746e42Sschwarze 			print_endword(h);
684b1eea027Sschwarze 		}
685769ee804Sschwarze 	}
6864175bdabSschwarze 
687ddce0b0cSschwarze 	if ( ! (h->flags & HTML_NONOSPACE))
688ddce0b0cSschwarze 		h->flags &= ~HTML_NOSPACE;
6898cd724fbSschwarze 	else
6908cd724fbSschwarze 		h->flags |= HTML_NOSPACE;
691ddce0b0cSschwarze 
6928521b0bcSschwarze 	/* Print out the tag name and attributes. */
6938521b0bcSschwarze 
694d5746e42Sschwarze 	print_byte(h, '<');
695d5746e42Sschwarze 	print_word(h, htmltags[tag].name);
696229cc7fdSschwarze 
697229cc7fdSschwarze 	va_start(ap, fmt);
698229cc7fdSschwarze 
699373dd5baSschwarze 	while (*fmt != '\0' && *fmt != 's') {
7001e98cf61Sschwarze 
7015c9781a4Sschwarze 		/* Parse attributes and arguments. */
7021e98cf61Sschwarze 
7031e98cf61Sschwarze 		arg1 = va_arg(ap, char *);
7045c9781a4Sschwarze 		arg2 = NULL;
705229cc7fdSschwarze 		switch (*fmt++) {
706229cc7fdSschwarze 		case 'c':
707fef1eecdSschwarze 			attr = "class";
708229cc7fdSschwarze 			break;
709229cc7fdSschwarze 		case 'h':
710fef1eecdSschwarze 			attr = "href";
711229cc7fdSschwarze 			break;
712229cc7fdSschwarze 		case 'i':
713fef1eecdSschwarze 			attr = "id";
714229cc7fdSschwarze 			break;
71588e033f9Sschwarze 		case 'r':
71688e033f9Sschwarze 			attr = "role";
71788e033f9Sschwarze 			break;
718229cc7fdSschwarze 		case '?':
7191e98cf61Sschwarze 			attr = arg1;
7201e98cf61Sschwarze 			arg1 = va_arg(ap, char *);
721229cc7fdSschwarze 			break;
722229cc7fdSschwarze 		default:
723229cc7fdSschwarze 			abort();
724229cc7fdSschwarze 		}
7251e98cf61Sschwarze 		if (*fmt == 'M')
7261e98cf61Sschwarze 			arg2 = va_arg(ap, char *);
7271e98cf61Sschwarze 		if (arg1 == NULL)
7281e98cf61Sschwarze 			continue;
7291e98cf61Sschwarze 
7305c9781a4Sschwarze 		/* Print the attributes. */
7311e98cf61Sschwarze 
732d5746e42Sschwarze 		print_byte(h, ' ');
733d5746e42Sschwarze 		print_word(h, attr);
734d5746e42Sschwarze 		print_byte(h, '=');
735d5746e42Sschwarze 		print_byte(h, '"');
736fef1eecdSschwarze 		switch (*fmt) {
737fef1eecdSschwarze 		case 'I':
7381e98cf61Sschwarze 			print_href(h, arg1, NULL, 0);
739fef1eecdSschwarze 			fmt++;
740fef1eecdSschwarze 			break;
741e3cc44b0Sschwarze 		case 'M':
742e3cc44b0Sschwarze 			print_href(h, arg1, arg2, 1);
743e3cc44b0Sschwarze 			fmt++;
744e3cc44b0Sschwarze 			break;
745fef1eecdSschwarze 		case 'R':
746d5746e42Sschwarze 			print_byte(h, '#');
747e3cc44b0Sschwarze 			print_encode(h, arg1, NULL, 1);
748fef1eecdSschwarze 			fmt++;
749e3cc44b0Sschwarze 			break;
750fef1eecdSschwarze 		default:
7511e98cf61Sschwarze 			print_encode(h, arg1, NULL, 1);
752373dd5baSschwarze 			break;
753373dd5baSschwarze 		}
754373dd5baSschwarze 		print_byte(h, '"');
755373dd5baSschwarze 	}
756373dd5baSschwarze 
757373dd5baSschwarze 	style_written = 0;
758373dd5baSschwarze 	while (*fmt++ == 's') {
759373dd5baSschwarze 		arg1 = va_arg(ap, char *);
760373dd5baSschwarze 		arg2 = va_arg(ap, char *);
761373dd5baSschwarze 		if (arg2 == NULL)
762373dd5baSschwarze 			continue;
763373dd5baSschwarze 		print_byte(h, ' ');
764373dd5baSschwarze 		if (style_written == 0) {
765373dd5baSschwarze 			print_word(h, "style=\"");
766373dd5baSschwarze 			style_written = 1;
767373dd5baSschwarze 		}
7685c9781a4Sschwarze 		print_word(h, arg1);
7691e98cf61Sschwarze 		print_byte(h, ':');
7701e98cf61Sschwarze 		print_byte(h, ' ');
7715c9781a4Sschwarze 		print_word(h, arg2);
772d5746e42Sschwarze 		print_byte(h, ';');
773229cc7fdSschwarze 	}
774373dd5baSschwarze 	if (style_written)
775d5746e42Sschwarze 		print_byte(h, '"');
776373dd5baSschwarze 
777229cc7fdSschwarze 	va_end(ap);
7788521b0bcSschwarze 
779d649d931Sschwarze 	/* Accommodate for "well-formed" singleton escaping. */
7808521b0bcSschwarze 
781635fddedSschwarze 	if (htmltags[tag].flags & HTML_NOSTACK)
782d5746e42Sschwarze 		print_byte(h, '/');
7838521b0bcSschwarze 
784d5746e42Sschwarze 	print_byte(h, '>');
7854175bdabSschwarze 
786b1eea027Sschwarze 	if (tflags & HTML_NLBEGIN)
787d5746e42Sschwarze 		print_endline(h);
788b1eea027Sschwarze 	else
7894175bdabSschwarze 		h->flags |= HTML_NOSPACE;
790467b61c6Sschwarze 
791b1eea027Sschwarze 	if (tflags & HTML_INDENT)
792b1eea027Sschwarze 		h->indent++;
793b1eea027Sschwarze 	if (tflags & HTML_NOINDENT)
794b1eea027Sschwarze 		h->noindent++;
795467b61c6Sschwarze 
796526e306bSschwarze 	return t;
7974175bdabSschwarze }
7984175bdabSschwarze 
7990ac7e6ecSschwarze /*
8000ac7e6ecSschwarze  * Print an element with an optional "id=" attribute.
801e053e0fdSschwarze  * If the element has phrasing content and an "id=" attribute,
802e053e0fdSschwarze  * also add a permalink: outside if it can be in phrasing context,
803e053e0fdSschwarze  * inside otherwise.
8040ac7e6ecSschwarze  */
8050ac7e6ecSschwarze struct tag *
print_otag_id(struct html * h,enum htmltag elemtype,const char * cattr,struct roff_node * n)8060ac7e6ecSschwarze print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
8070ac7e6ecSschwarze     struct roff_node *n)
8080ac7e6ecSschwarze {
809e053e0fdSschwarze 	struct roff_node *nch;
8100ac7e6ecSschwarze 	struct tag	*ret, *t;
8119226948cSschwarze 	char		*id, *href;
8120ac7e6ecSschwarze 
8130ac7e6ecSschwarze 	ret = NULL;
8149226948cSschwarze 	id = href = NULL;
8150ac7e6ecSschwarze 	if (n->flags & NODE_ID)
8160ac7e6ecSschwarze 		id = html_make_id(n, 1);
8179226948cSschwarze 	if (n->flags & NODE_HREF)
8186e69d8cfSschwarze 		href = id == NULL ? html_make_id(n, 2) : id;
8199226948cSschwarze 	if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE)
8209226948cSschwarze 		ret = print_otag(h, TAG_A, "chR", "permalink", href);
8210ac7e6ecSschwarze 	t = print_otag(h, elemtype, "ci", cattr, id);
8220ac7e6ecSschwarze 	if (ret == NULL) {
8230ac7e6ecSschwarze 		ret = t;
8249226948cSschwarze 		if (href != NULL && (nch = n->child) != NULL) {
825e053e0fdSschwarze 			/* man(7) is safe, it tags phrasing content only. */
826e053e0fdSschwarze 			if (n->tok > MDOC_MAX ||
827e053e0fdSschwarze 			    htmltags[elemtype].flags & HTML_TOPHRASE)
828e053e0fdSschwarze 				nch = NULL;
829e053e0fdSschwarze 			else  /* For mdoc(7), beware of nested blocks. */
830e053e0fdSschwarze 				while (nch != NULL && nch->type == ROFFT_TEXT)
831e053e0fdSschwarze 					nch = nch->next;
832e053e0fdSschwarze 			if (nch == NULL)
8339226948cSschwarze 				print_otag(h, TAG_A, "chR", "permalink", href);
8340ac7e6ecSschwarze 		}
835e053e0fdSschwarze 	}
8366e69d8cfSschwarze 	free(id);
8379226948cSschwarze 	if (id == NULL)
8389226948cSschwarze 		free(href);
8390ac7e6ecSschwarze 	return ret;
8400ac7e6ecSschwarze }
8410ac7e6ecSschwarze 
8424175bdabSschwarze static void
print_ctag(struct html * h,struct tag * tag)8434dda31b3Sschwarze print_ctag(struct html *h, struct tag *tag)
8444175bdabSschwarze {
845b1eea027Sschwarze 	int	 tflags;
8464175bdabSschwarze 
847520a575cSschwarze 	if (tag->closed == 0) {
848520a575cSschwarze 		tag->closed = 1;
8494dda31b3Sschwarze 		if (tag == h->metaf)
8504dda31b3Sschwarze 			h->metaf = NULL;
8514dda31b3Sschwarze 		if (tag == h->tblt)
8524dda31b3Sschwarze 			h->tblt = NULL;
8534dda31b3Sschwarze 
854b1eea027Sschwarze 		tflags = htmltags[tag->tag].flags;
855b1eea027Sschwarze 		if (tflags & HTML_INDENT)
856b1eea027Sschwarze 			h->indent--;
857b1eea027Sschwarze 		if (tflags & HTML_NOINDENT)
858b1eea027Sschwarze 			h->noindent--;
859b1eea027Sschwarze 		if (tflags & HTML_NLEND)
860d5746e42Sschwarze 			print_endline(h);
861d5746e42Sschwarze 		print_indent(h);
862d5746e42Sschwarze 		print_byte(h, '<');
863d5746e42Sschwarze 		print_byte(h, '/');
864d5746e42Sschwarze 		print_word(h, htmltags[tag->tag].name);
865d5746e42Sschwarze 		print_byte(h, '>');
866b1eea027Sschwarze 		if (tflags & HTML_NLAFTER)
867d5746e42Sschwarze 			print_endline(h);
868520a575cSschwarze 	}
869520a575cSschwarze 	if (tag->refcnt == 0) {
8704614a369Sschwarze 		h->tag = tag->next;
8714dda31b3Sschwarze 		free(tag);
8724175bdabSschwarze 	}
873520a575cSschwarze }
8744175bdabSschwarze 
8754175bdabSschwarze void
print_gen_decls(struct html * h)8768521b0bcSschwarze print_gen_decls(struct html *h)
8774175bdabSschwarze {
878d5746e42Sschwarze 	print_word(h, "<!DOCTYPE html>");
879d5746e42Sschwarze 	print_endline(h);
8804175bdabSschwarze }
8814175bdabSschwarze 
8824175bdabSschwarze void
print_gen_comment(struct html * h,struct roff_node * n)883cc202ecaSschwarze print_gen_comment(struct html *h, struct roff_node *n)
884cc202ecaSschwarze {
885cc202ecaSschwarze 	int	 wantblank;
886cc202ecaSschwarze 
887cc202ecaSschwarze 	print_word(h, "<!-- This is an automatically generated file."
888cc202ecaSschwarze 	    "  Do not edit.");
889cc202ecaSschwarze 	h->indent = 1;
890cc202ecaSschwarze 	wantblank = 0;
891cc202ecaSschwarze 	while (n != NULL && n->type == ROFFT_COMMENT) {
892cc202ecaSschwarze 		if (strstr(n->string, "-->") == NULL &&
893cc202ecaSschwarze 		    (wantblank || *n->string != '\0')) {
894cc202ecaSschwarze 			print_endline(h);
895cc202ecaSschwarze 			print_indent(h);
896cc202ecaSschwarze 			print_word(h, n->string);
897cc202ecaSschwarze 			wantblank = *n->string != '\0';
898cc202ecaSschwarze 		}
899cc202ecaSschwarze 		n = n->next;
900cc202ecaSschwarze 	}
901cc202ecaSschwarze 	if (wantblank)
902cc202ecaSschwarze 		print_endline(h);
903cc202ecaSschwarze 	print_word(h, " -->");
904cc202ecaSschwarze 	print_endline(h);
905cc202ecaSschwarze 	h->indent = 0;
906cc202ecaSschwarze }
907cc202ecaSschwarze 
908cc202ecaSschwarze void
print_text(struct html * h,const char * word)909769ee804Sschwarze print_text(struct html *h, const char *word)
9104175bdabSschwarze {
9116e2a0df9Sschwarze 	print_tagged_text(h, word, NULL);
9126e2a0df9Sschwarze }
9136e2a0df9Sschwarze 
9146e2a0df9Sschwarze void
print_tagged_text(struct html * h,const char * word,struct roff_node * n)9156e2a0df9Sschwarze print_tagged_text(struct html *h, const char *word, struct roff_node *n)
9166e2a0df9Sschwarze {
9176e2a0df9Sschwarze 	struct tag	*t;
9186e2a0df9Sschwarze 	char		*href;
9196e2a0df9Sschwarze 
9205847b48dSschwarze 	/*
9215847b48dSschwarze 	 * Always wrap text in a paragraph unless already contained in
9225847b48dSschwarze 	 * some flow container; never put it directly into a section.
9235847b48dSschwarze 	 */
9245847b48dSschwarze 
9255847b48dSschwarze 	if (h->tag->tag == TAG_SECTION)
9265847b48dSschwarze 		print_otag(h, TAG_P, "c", "Pp");
9275847b48dSschwarze 
9285847b48dSschwarze 	/* Output whitespace before this text? */
9295847b48dSschwarze 
930d5746e42Sschwarze 	if (h->col && (h->flags & HTML_NOSPACE) == 0) {
931769ee804Sschwarze 		if ( ! (HTML_KEEP & h->flags)) {
932769ee804Sschwarze 			if (HTML_PREKEEP & h->flags)
933769ee804Sschwarze 				h->flags |= HTML_KEEP;
934d5746e42Sschwarze 			print_endword(h);
935769ee804Sschwarze 		} else
936497d2755Sbentley 			print_word(h, "&#x00A0;");
937769ee804Sschwarze 	}
9384175bdabSschwarze 
9395847b48dSschwarze 	/*
9406e2a0df9Sschwarze 	 * Optionally switch fonts, optionally write a permalink, then
9416e2a0df9Sschwarze 	 * print the text, optionally surrounded by HTML whitespace.
9425847b48dSschwarze 	 */
9435847b48dSschwarze 
944cefe8974Sschwarze 	assert(h->metaf == NULL);
945cefe8974Sschwarze 	print_metaf(h);
946d5746e42Sschwarze 	print_indent(h);
9476e2a0df9Sschwarze 
9486e69d8cfSschwarze 	if (n != NULL && (href = html_make_id(n, 2)) != NULL) {
9496e2a0df9Sschwarze 		t = print_otag(h, TAG_A, "chR", "permalink", href);
9506e2a0df9Sschwarze 		free(href);
9516e2a0df9Sschwarze 	} else
9526e2a0df9Sschwarze 		t = NULL;
9536e2a0df9Sschwarze 
954fef1eecdSschwarze 	if ( ! print_encode(h, word, NULL, 0)) {
955ddce0b0cSschwarze 		if ( ! (h->flags & HTML_NONOSPACE))
9564175bdabSschwarze 			h->flags &= ~HTML_NOSPACE;
95764c3401cSschwarze 		h->flags &= ~HTML_NONEWLINE;
958f7dbc9ddSschwarze 	} else
95964c3401cSschwarze 		h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
9604175bdabSschwarze 
961cefe8974Sschwarze 	if (h->metaf != NULL) {
9623a7b861cSschwarze 		print_tagq(h, h->metaf);
9633a7b861cSschwarze 		h->metaf = NULL;
9646e2a0df9Sschwarze 	} else if (t != NULL)
9656e2a0df9Sschwarze 		print_tagq(h, t);
9663a7b861cSschwarze 
96737e2f24fSschwarze 	h->flags &= ~HTML_IGNDELIM;
9684175bdabSschwarze }
9694175bdabSschwarze 
9704175bdabSschwarze void
print_tagq(struct html * h,const struct tag * until)9714175bdabSschwarze print_tagq(struct html *h, const struct tag *until)
9724175bdabSschwarze {
973520a575cSschwarze 	struct tag	*this, *next;
9744175bdabSschwarze 
975520a575cSschwarze 	for (this = h->tag; this != NULL; this = next) {
976520a575cSschwarze 		next = this == until ? NULL : this->next;
977520a575cSschwarze 		print_ctag(h, this);
9784175bdabSschwarze 	}
9794175bdabSschwarze }
9804175bdabSschwarze 
9812dd33770Sschwarze /*
9822dd33770Sschwarze  * Close out all open elements up to but excluding suntil.
9832dd33770Sschwarze  * Note that a paragraph just inside stays open together with it
9842dd33770Sschwarze  * because paragraphs include subsequent phrasing content.
9852dd33770Sschwarze  */
9864175bdabSschwarze void
print_stagq(struct html * h,const struct tag * suntil)9874175bdabSschwarze print_stagq(struct html *h, const struct tag *suntil)
9884175bdabSschwarze {
989520a575cSschwarze 	struct tag	*this, *next;
9904175bdabSschwarze 
991520a575cSschwarze 	for (this = h->tag; this != NULL; this = next) {
992520a575cSschwarze 		next = this->next;
993520a575cSschwarze 		if (this == suntil || (next == suntil &&
994520a575cSschwarze 		    (this->tag == TAG_P || this->tag == TAG_PRE)))
995520a575cSschwarze 			break;
996520a575cSschwarze 		print_ctag(h, this);
9974175bdabSschwarze 	}
9984175bdabSschwarze }
9994175bdabSschwarze 
1000d5746e42Sschwarze 
1001d5746e42Sschwarze /***********************************************************************
1002d5746e42Sschwarze  * Low level output functions.
1003d5746e42Sschwarze  * They implement line breaking using a short static buffer.
1004d5746e42Sschwarze  ***********************************************************************/
1005d5746e42Sschwarze 
1006b1eea027Sschwarze /*
1007d5746e42Sschwarze  * Buffer one HTML output byte.
1008d5746e42Sschwarze  * If the buffer is full, flush and deactivate it and start a new line.
1009d5746e42Sschwarze  * If the buffer is inactive, print directly.
1010b1eea027Sschwarze  */
1011b1eea027Sschwarze static void
print_byte(struct html * h,char c)1012d5746e42Sschwarze print_byte(struct html *h, char c)
1013b1eea027Sschwarze {
1014d5746e42Sschwarze 	if ((h->flags & HTML_BUFFER) == 0) {
1015d5746e42Sschwarze 		putchar(c);
1016d5746e42Sschwarze 		h->col++;
1017b1eea027Sschwarze 		return;
1018d5746e42Sschwarze 	}
1019d5746e42Sschwarze 
1020d5746e42Sschwarze 	if (h->col + h->bufcol < sizeof(h->buf)) {
1021d5746e42Sschwarze 		h->buf[h->bufcol++] = c;
1022d5746e42Sschwarze 		return;
1023d5746e42Sschwarze 	}
1024b1eea027Sschwarze 
1025b1eea027Sschwarze 	putchar('\n');
1026d5746e42Sschwarze 	h->col = 0;
1027d5746e42Sschwarze 	print_indent(h);
1028d5746e42Sschwarze 	putchar(' ');
1029d5746e42Sschwarze 	putchar(' ');
1030d5746e42Sschwarze 	fwrite(h->buf, h->bufcol, 1, stdout);
1031d5746e42Sschwarze 	putchar(c);
1032d5746e42Sschwarze 	h->col = (h->indent + 1) * 2 + h->bufcol + 1;
1033d5746e42Sschwarze 	h->bufcol = 0;
1034d5746e42Sschwarze 	h->flags &= ~HTML_BUFFER;
1035d5746e42Sschwarze }
1036d5746e42Sschwarze 
1037d5746e42Sschwarze /*
1038d5746e42Sschwarze  * If something was printed on the current output line, end it.
1039d5746e42Sschwarze  * Not to be called right after print_indent().
1040d5746e42Sschwarze  */
10410692510cSschwarze void
print_endline(struct html * h)1042d5746e42Sschwarze print_endline(struct html *h)
1043d5746e42Sschwarze {
1044d5746e42Sschwarze 	if (h->col == 0)
1045d5746e42Sschwarze 		return;
1046d5746e42Sschwarze 
1047d5746e42Sschwarze 	if (h->bufcol) {
1048d5746e42Sschwarze 		putchar(' ');
1049d5746e42Sschwarze 		fwrite(h->buf, h->bufcol, 1, stdout);
1050d5746e42Sschwarze 		h->bufcol = 0;
1051d5746e42Sschwarze 	}
1052d5746e42Sschwarze 	putchar('\n');
1053d5746e42Sschwarze 	h->col = 0;
1054d5746e42Sschwarze 	h->flags |= HTML_NOSPACE;
1055d5746e42Sschwarze 	h->flags &= ~HTML_BUFFER;
1056d5746e42Sschwarze }
1057d5746e42Sschwarze 
1058d5746e42Sschwarze /*
1059d5746e42Sschwarze  * Flush the HTML output buffer.
1060d5746e42Sschwarze  * If it is inactive, activate it.
1061d5746e42Sschwarze  */
1062d5746e42Sschwarze static void
print_endword(struct html * h)1063d5746e42Sschwarze print_endword(struct html *h)
1064d5746e42Sschwarze {
1065d5746e42Sschwarze 	if (h->noindent) {
1066d5746e42Sschwarze 		print_byte(h, ' ');
1067d5746e42Sschwarze 		return;
1068d5746e42Sschwarze 	}
1069d5746e42Sschwarze 
1070d5746e42Sschwarze 	if ((h->flags & HTML_BUFFER) == 0) {
1071d5746e42Sschwarze 		h->col++;
1072d5746e42Sschwarze 		h->flags |= HTML_BUFFER;
1073d5746e42Sschwarze 	} else if (h->bufcol) {
1074d5746e42Sschwarze 		putchar(' ');
1075d5746e42Sschwarze 		fwrite(h->buf, h->bufcol, 1, stdout);
1076d5746e42Sschwarze 		h->col += h->bufcol + 1;
1077d5746e42Sschwarze 	}
1078d5746e42Sschwarze 	h->bufcol = 0;
1079b1eea027Sschwarze }
1080b1eea027Sschwarze 
1081b1eea027Sschwarze /*
1082b1eea027Sschwarze  * If at the beginning of a new output line,
1083b1eea027Sschwarze  * perform indentation and mark the line as containing output.
1084b1eea027Sschwarze  * Make sure to really produce some output right afterwards,
1085b1eea027Sschwarze  * but do not use print_otag() for producing it.
1086b1eea027Sschwarze  */
1087b1eea027Sschwarze static void
print_indent(struct html * h)1088d5746e42Sschwarze print_indent(struct html *h)
1089b1eea027Sschwarze {
1090d5746e42Sschwarze 	size_t	 i;
1091b1eea027Sschwarze 
109287fbaedfSschwarze 	if (h->col || h->noindent)
1093b1eea027Sschwarze 		return;
1094b1eea027Sschwarze 
1095d5746e42Sschwarze 	h->col = h->indent * 2;
1096d5746e42Sschwarze 	for (i = 0; i < h->col; i++)
1097b1eea027Sschwarze 		putchar(' ');
1098d5746e42Sschwarze }
1099d5746e42Sschwarze 
1100d5746e42Sschwarze /*
1101d5746e42Sschwarze  * Print or buffer some characters
1102d5746e42Sschwarze  * depending on the current HTML output buffer state.
1103d5746e42Sschwarze  */
1104d5746e42Sschwarze static void
print_word(struct html * h,const char * cp)1105d5746e42Sschwarze print_word(struct html *h, const char *cp)
1106d5746e42Sschwarze {
1107d5746e42Sschwarze 	while (*cp != '\0')
1108d5746e42Sschwarze 		print_byte(h, *cp++);
1109b1eea027Sschwarze }
1110