xref: /openbsd-src/usr.bin/mandoc/html.c (revision d1df930ffab53da22f3324c32bed7ac5709915e6)
1 /*	$OpenBSD: html.c,v 1.111 2018/10/02 14:56:36 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2011-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 
21 #include <assert.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stddef.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "out.h"
36 #include "html.h"
37 #include "manconf.h"
38 #include "main.h"
39 
40 struct	htmldata {
41 	const char	 *name;
42 	int		  flags;
43 #define	HTML_NOSTACK	 (1 << 0)
44 #define	HTML_AUTOCLOSE	 (1 << 1)
45 #define	HTML_NLBEFORE	 (1 << 2)
46 #define	HTML_NLBEGIN	 (1 << 3)
47 #define	HTML_NLEND	 (1 << 4)
48 #define	HTML_NLAFTER	 (1 << 5)
49 #define	HTML_NLAROUND	 (HTML_NLBEFORE | HTML_NLAFTER)
50 #define	HTML_NLINSIDE	 (HTML_NLBEGIN | HTML_NLEND)
51 #define	HTML_NLALL	 (HTML_NLAROUND | HTML_NLINSIDE)
52 #define	HTML_INDENT	 (1 << 6)
53 #define	HTML_NOINDENT	 (1 << 7)
54 };
55 
56 static	const struct htmldata htmltags[TAG_MAX] = {
57 	{"html",	HTML_NLALL},
58 	{"head",	HTML_NLALL | HTML_INDENT},
59 	{"body",	HTML_NLALL},
60 	{"meta",	HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
61 	{"title",	HTML_NLAROUND},
62 	{"div",		HTML_NLAROUND},
63 	{"div",		0},
64 	{"h1",		HTML_NLAROUND},
65 	{"h2",		HTML_NLAROUND},
66 	{"span",	0},
67 	{"link",	HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
68 	{"br",		HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
69 	{"a",		0},
70 	{"table",	HTML_NLALL | HTML_INDENT},
71 	{"tr",		HTML_NLALL | HTML_INDENT},
72 	{"td",		HTML_NLAROUND},
73 	{"li",		HTML_NLAROUND | HTML_INDENT},
74 	{"ul",		HTML_NLALL | HTML_INDENT},
75 	{"ol",		HTML_NLALL | HTML_INDENT},
76 	{"dl",		HTML_NLALL | HTML_INDENT},
77 	{"dt",		HTML_NLAROUND},
78 	{"dd",		HTML_NLAROUND | HTML_INDENT},
79 	{"pre",		HTML_NLALL | HTML_NOINDENT},
80 	{"var",		0},
81 	{"cite",	0},
82 	{"b",		0},
83 	{"i",		0},
84 	{"code",	0},
85 	{"small",	0},
86 	{"style",	HTML_NLALL | HTML_INDENT},
87 	{"math",	HTML_NLALL | HTML_INDENT},
88 	{"mrow",	0},
89 	{"mi",		0},
90 	{"mn",		0},
91 	{"mo",		0},
92 	{"msup",	0},
93 	{"msub",	0},
94 	{"msubsup",	0},
95 	{"mfrac",	0},
96 	{"msqrt",	0},
97 	{"mfenced",	0},
98 	{"mtable",	0},
99 	{"mtr",		0},
100 	{"mtd",		0},
101 	{"munderover",	0},
102 	{"munder",	0},
103 	{"mover",	0},
104 };
105 
106 /* Avoid duplicate HTML id= attributes. */
107 static	struct ohash	 id_unique;
108 
109 static	void	 print_byte(struct html *, char);
110 static	void	 print_endword(struct html *);
111 static	void	 print_indent(struct html *);
112 static	void	 print_word(struct html *, const char *);
113 
114 static	void	 print_ctag(struct html *, struct tag *);
115 static	int	 print_escape(struct html *, char);
116 static	int	 print_encode(struct html *, const char *, const char *, int);
117 static	void	 print_href(struct html *, const char *, const char *, int);
118 static	void	 print_metaf(struct html *, enum mandoc_esc);
119 
120 
121 void *
122 html_alloc(const struct manoutput *outopts)
123 {
124 	struct html	*h;
125 
126 	h = mandoc_calloc(1, sizeof(struct html));
127 
128 	h->tag = NULL;
129 	h->style = outopts->style;
130 	if ((h->base_man1 = outopts->man) == NULL)
131 		h->base_man2 = NULL;
132 	else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
133 		*h->base_man2++ = '\0';
134 	h->base_includes = outopts->includes;
135 	if (outopts->fragment)
136 		h->oflags |= HTML_FRAGMENT;
137 	if (outopts->toc)
138 		h->oflags |= HTML_TOC;
139 
140 	mandoc_ohash_init(&id_unique, 4, 0);
141 
142 	return h;
143 }
144 
145 void
146 html_free(void *p)
147 {
148 	struct tag	*tag;
149 	struct html	*h;
150 	char		*cp;
151 	unsigned int	 slot;
152 
153 	h = (struct html *)p;
154 	while ((tag = h->tag) != NULL) {
155 		h->tag = tag->next;
156 		free(tag);
157 	}
158 	free(h);
159 
160 	cp = ohash_first(&id_unique, &slot);
161 	while (cp != NULL) {
162 		free(cp);
163 		cp = ohash_next(&id_unique, &slot);
164 	}
165 	ohash_delete(&id_unique);
166 }
167 
168 void
169 print_gen_head(struct html *h)
170 {
171 	struct tag	*t;
172 
173 	print_otag(h, TAG_META, "?", "charset", "utf-8");
174 	if (h->style != NULL) {
175 		print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
176 		    h->style, "type", "text/css", "media", "all");
177 		return;
178 	}
179 
180 	/*
181 	 * Print a minimal embedded style sheet.
182 	 */
183 
184 	t = print_otag(h, TAG_STYLE, "");
185 	print_text(h, "table.head, table.foot { width: 100%; }");
186 	print_endline(h);
187 	print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
188 	print_endline(h);
189 	print_text(h, "td.head-vol { text-align: center; }");
190 	print_endline(h);
191 	print_text(h, "div.Pp { margin: 1ex 0ex; }");
192 	print_endline(h);
193 	print_text(h, "div.Nd, div.Bf, div.Op { display: inline; }");
194 	print_endline(h);
195 	print_text(h, "span.Pa, span.Ad { font-style: italic; }");
196 	print_endline(h);
197 	print_text(h, "span.Ms { font-weight: bold; }");
198 	print_endline(h);
199 	print_text(h, "dl.Bl-diag ");
200 	print_byte(h, '>');
201 	print_text(h, " dt { font-weight: bold; }");
202 	print_endline(h);
203 	print_text(h, "code.Nm, code.Fl, code.Cm, code.Ic, "
204 	    "code.In, code.Fd, code.Fn,");
205 	print_endline(h);
206 	print_text(h, "code.Cd { font-weight: bold; "
207 	    "font-family: inherit; }");
208 	print_tagq(h, t);
209 }
210 
211 static void
212 print_metaf(struct html *h, enum mandoc_esc deco)
213 {
214 	enum htmlfont	 font;
215 
216 	switch (deco) {
217 	case ESCAPE_FONTPREV:
218 		font = h->metal;
219 		break;
220 	case ESCAPE_FONTITALIC:
221 		font = HTMLFONT_ITALIC;
222 		break;
223 	case ESCAPE_FONTBOLD:
224 		font = HTMLFONT_BOLD;
225 		break;
226 	case ESCAPE_FONTBI:
227 		font = HTMLFONT_BI;
228 		break;
229 	case ESCAPE_FONT:
230 	case ESCAPE_FONTROMAN:
231 		font = HTMLFONT_NONE;
232 		break;
233 	default:
234 		abort();
235 	}
236 
237 	if (h->metaf) {
238 		print_tagq(h, h->metaf);
239 		h->metaf = NULL;
240 	}
241 
242 	h->metal = h->metac;
243 	h->metac = font;
244 
245 	switch (font) {
246 	case HTMLFONT_ITALIC:
247 		h->metaf = print_otag(h, TAG_I, "");
248 		break;
249 	case HTMLFONT_BOLD:
250 		h->metaf = print_otag(h, TAG_B, "");
251 		break;
252 	case HTMLFONT_BI:
253 		h->metaf = print_otag(h, TAG_B, "");
254 		print_otag(h, TAG_I, "");
255 		break;
256 	default:
257 		break;
258 	}
259 }
260 
261 char *
262 html_make_id(const struct roff_node *n, int unique)
263 {
264 	const struct roff_node	*nch;
265 	char			*buf, *bufs, *cp;
266 	unsigned int		 slot;
267 	int			 suffix;
268 
269 	for (nch = n->child; nch != NULL; nch = nch->next)
270 		if (nch->type != ROFFT_TEXT)
271 			return NULL;
272 
273 	buf = NULL;
274 	deroff(&buf, n);
275 	if (buf == NULL)
276 		return NULL;
277 
278 	/*
279 	 * In ID attributes, only use ASCII characters that are
280 	 * permitted in URL-fragment strings according to the
281 	 * explicit list at:
282 	 * https://url.spec.whatwg.org/#url-fragment-string
283 	 */
284 
285 	for (cp = buf; *cp != '\0'; cp++)
286 		if (isalnum((unsigned char)*cp) == 0 &&
287 		    strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL)
288 			*cp = '_';
289 
290 	if (unique == 0)
291 		return buf;
292 
293 	/* Avoid duplicate HTML id= attributes. */
294 
295 	bufs = NULL;
296 	suffix = 1;
297 	slot = ohash_qlookup(&id_unique, buf);
298 	cp = ohash_find(&id_unique, slot);
299 	if (cp != NULL) {
300 		while (cp != NULL) {
301 			free(bufs);
302 			if (++suffix > 127) {
303 				free(buf);
304 				return NULL;
305 			}
306 			mandoc_asprintf(&bufs, "%s_%d", buf, suffix);
307 			slot = ohash_qlookup(&id_unique, bufs);
308 			cp = ohash_find(&id_unique, slot);
309 		}
310 		free(buf);
311 		buf = bufs;
312 	}
313 	ohash_insert(&id_unique, slot, buf);
314 	return buf;
315 }
316 
317 static int
318 print_escape(struct html *h, char c)
319 {
320 
321 	switch (c) {
322 	case '<':
323 		print_word(h, "&lt;");
324 		break;
325 	case '>':
326 		print_word(h, "&gt;");
327 		break;
328 	case '&':
329 		print_word(h, "&amp;");
330 		break;
331 	case '"':
332 		print_word(h, "&quot;");
333 		break;
334 	case ASCII_NBRSP:
335 		print_word(h, "&nbsp;");
336 		break;
337 	case ASCII_HYPH:
338 		print_byte(h, '-');
339 		break;
340 	case ASCII_BREAK:
341 		break;
342 	default:
343 		return 0;
344 	}
345 	return 1;
346 }
347 
348 static int
349 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
350 {
351 	char		 numbuf[16];
352 	struct tag	*t;
353 	const char	*seq;
354 	size_t		 sz;
355 	int		 c, len, breakline, nospace;
356 	enum mandoc_esc	 esc;
357 	static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
358 		ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
359 
360 	if (pend == NULL)
361 		pend = strchr(p, '\0');
362 
363 	breakline = 0;
364 	nospace = 0;
365 
366 	while (p < pend) {
367 		if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
368 			h->flags &= ~HTML_SKIPCHAR;
369 			p++;
370 			continue;
371 		}
372 
373 		for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
374 			print_byte(h, *p);
375 
376 		if (breakline &&
377 		    (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
378 			t = print_otag(h, TAG_DIV, "");
379 			print_text(h, "\\~");
380 			print_tagq(h, t);
381 			breakline = 0;
382 			while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
383 				p++;
384 			continue;
385 		}
386 
387 		if (p >= pend)
388 			break;
389 
390 		if (*p == ' ') {
391 			print_endword(h);
392 			p++;
393 			continue;
394 		}
395 
396 		if (print_escape(h, *p++))
397 			continue;
398 
399 		esc = mandoc_escape(&p, &seq, &len);
400 		if (ESCAPE_ERROR == esc)
401 			break;
402 
403 		switch (esc) {
404 		case ESCAPE_FONT:
405 		case ESCAPE_FONTPREV:
406 		case ESCAPE_FONTBOLD:
407 		case ESCAPE_FONTITALIC:
408 		case ESCAPE_FONTBI:
409 		case ESCAPE_FONTROMAN:
410 			if (0 == norecurse)
411 				print_metaf(h, esc);
412 			continue;
413 		case ESCAPE_SKIPCHAR:
414 			h->flags |= HTML_SKIPCHAR;
415 			continue;
416 		default:
417 			break;
418 		}
419 
420 		if (h->flags & HTML_SKIPCHAR) {
421 			h->flags &= ~HTML_SKIPCHAR;
422 			continue;
423 		}
424 
425 		switch (esc) {
426 		case ESCAPE_UNICODE:
427 			/* Skip past "u" header. */
428 			c = mchars_num2uc(seq + 1, len - 1);
429 			break;
430 		case ESCAPE_NUMBERED:
431 			c = mchars_num2char(seq, len);
432 			if (c < 0)
433 				continue;
434 			break;
435 		case ESCAPE_SPECIAL:
436 			c = mchars_spec2cp(seq, len);
437 			if (c <= 0)
438 				continue;
439 			break;
440 		case ESCAPE_DEVICE:
441 			print_word(h, "html");
442 			continue;
443 		case ESCAPE_BREAK:
444 			breakline = 1;
445 			continue;
446 		case ESCAPE_NOSPACE:
447 			if ('\0' == *p)
448 				nospace = 1;
449 			continue;
450 		case ESCAPE_OVERSTRIKE:
451 			if (len == 0)
452 				continue;
453 			c = seq[len - 1];
454 			break;
455 		default:
456 			continue;
457 		}
458 		if ((c < 0x20 && c != 0x09) ||
459 		    (c > 0x7E && c < 0xA0))
460 			c = 0xFFFD;
461 		if (c > 0x7E) {
462 			(void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
463 			print_word(h, numbuf);
464 		} else if (print_escape(h, c) == 0)
465 			print_byte(h, c);
466 	}
467 
468 	return nospace;
469 }
470 
471 static void
472 print_href(struct html *h, const char *name, const char *sec, int man)
473 {
474 	struct stat	 sb;
475 	const char	*p, *pp;
476 	char		*filename;
477 
478 	if (man) {
479 		pp = h->base_man1;
480 		if (h->base_man2 != NULL) {
481 			mandoc_asprintf(&filename, "%s.%s", name, sec);
482 			if (stat(filename, &sb) == -1)
483 				pp = h->base_man2;
484 			free(filename);
485 		}
486 	} else
487 		pp = h->base_includes;
488 
489 	while ((p = strchr(pp, '%')) != NULL) {
490 		print_encode(h, pp, p, 1);
491 		if (man && p[1] == 'S') {
492 			if (sec == NULL)
493 				print_byte(h, '1');
494 			else
495 				print_encode(h, sec, NULL, 1);
496 		} else if ((man && p[1] == 'N') ||
497 		    (man == 0 && p[1] == 'I'))
498 			print_encode(h, name, NULL, 1);
499 		else
500 			print_encode(h, p, p + 2, 1);
501 		pp = p + 2;
502 	}
503 	if (*pp != '\0')
504 		print_encode(h, pp, NULL, 1);
505 }
506 
507 struct tag *
508 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
509 {
510 	va_list		 ap;
511 	struct tag	*t;
512 	const char	*attr;
513 	char		*arg1, *arg2;
514 	int		 tflags;
515 
516 	tflags = htmltags[tag].flags;
517 
518 	/* Push this tag onto the stack of open scopes. */
519 
520 	if ((tflags & HTML_NOSTACK) == 0) {
521 		t = mandoc_malloc(sizeof(struct tag));
522 		t->tag = tag;
523 		t->next = h->tag;
524 		h->tag = t;
525 	} else
526 		t = NULL;
527 
528 	if (tflags & HTML_NLBEFORE)
529 		print_endline(h);
530 	if (h->col == 0)
531 		print_indent(h);
532 	else if ((h->flags & HTML_NOSPACE) == 0) {
533 		if (h->flags & HTML_KEEP)
534 			print_word(h, "&#x00A0;");
535 		else {
536 			if (h->flags & HTML_PREKEEP)
537 				h->flags |= HTML_KEEP;
538 			print_endword(h);
539 		}
540 	}
541 
542 	if ( ! (h->flags & HTML_NONOSPACE))
543 		h->flags &= ~HTML_NOSPACE;
544 	else
545 		h->flags |= HTML_NOSPACE;
546 
547 	/* Print out the tag name and attributes. */
548 
549 	print_byte(h, '<');
550 	print_word(h, htmltags[tag].name);
551 
552 	va_start(ap, fmt);
553 
554 	while (*fmt != '\0') {
555 
556 		/* Parse attributes and arguments. */
557 
558 		arg1 = va_arg(ap, char *);
559 		arg2 = NULL;
560 		switch (*fmt++) {
561 		case 'c':
562 			attr = "class";
563 			break;
564 		case 'h':
565 			attr = "href";
566 			break;
567 		case 'i':
568 			attr = "id";
569 			break;
570 		case 's':
571 			attr = "style";
572 			arg2 = va_arg(ap, char *);
573 			break;
574 		case '?':
575 			attr = arg1;
576 			arg1 = va_arg(ap, char *);
577 			break;
578 		default:
579 			abort();
580 		}
581 		if (*fmt == 'M')
582 			arg2 = va_arg(ap, char *);
583 		if (arg1 == NULL)
584 			continue;
585 
586 		/* Print the attributes. */
587 
588 		print_byte(h, ' ');
589 		print_word(h, attr);
590 		print_byte(h, '=');
591 		print_byte(h, '"');
592 		switch (*fmt) {
593 		case 'I':
594 			print_href(h, arg1, NULL, 0);
595 			fmt++;
596 			break;
597 		case 'M':
598 			print_href(h, arg1, arg2, 1);
599 			fmt++;
600 			break;
601 		case 'R':
602 			print_byte(h, '#');
603 			print_encode(h, arg1, NULL, 1);
604 			fmt++;
605 			break;
606 		case 'T':
607 			print_encode(h, arg1, NULL, 1);
608 			print_word(h, "\" title=\"");
609 			print_encode(h, arg1, NULL, 1);
610 			fmt++;
611 			break;
612 		default:
613 			if (arg2 == NULL)
614 				print_encode(h, arg1, NULL, 1);
615 			else {
616 				print_word(h, arg1);
617 				print_byte(h, ':');
618 				print_byte(h, ' ');
619 				print_word(h, arg2);
620 				print_byte(h, ';');
621 			}
622 			break;
623 		}
624 		print_byte(h, '"');
625 	}
626 	va_end(ap);
627 
628 	/* Accommodate for "well-formed" singleton escaping. */
629 
630 	if (HTML_AUTOCLOSE & htmltags[tag].flags)
631 		print_byte(h, '/');
632 
633 	print_byte(h, '>');
634 
635 	if (tflags & HTML_NLBEGIN)
636 		print_endline(h);
637 	else
638 		h->flags |= HTML_NOSPACE;
639 
640 	if (tflags & HTML_INDENT)
641 		h->indent++;
642 	if (tflags & HTML_NOINDENT)
643 		h->noindent++;
644 
645 	return t;
646 }
647 
648 static void
649 print_ctag(struct html *h, struct tag *tag)
650 {
651 	int	 tflags;
652 
653 	/*
654 	 * Remember to close out and nullify the current
655 	 * meta-font and table, if applicable.
656 	 */
657 	if (tag == h->metaf)
658 		h->metaf = NULL;
659 	if (tag == h->tblt)
660 		h->tblt = NULL;
661 
662 	tflags = htmltags[tag->tag].flags;
663 
664 	if (tflags & HTML_INDENT)
665 		h->indent--;
666 	if (tflags & HTML_NOINDENT)
667 		h->noindent--;
668 	if (tflags & HTML_NLEND)
669 		print_endline(h);
670 	print_indent(h);
671 	print_byte(h, '<');
672 	print_byte(h, '/');
673 	print_word(h, htmltags[tag->tag].name);
674 	print_byte(h, '>');
675 	if (tflags & HTML_NLAFTER)
676 		print_endline(h);
677 
678 	h->tag = tag->next;
679 	free(tag);
680 }
681 
682 void
683 print_gen_decls(struct html *h)
684 {
685 	print_word(h, "<!DOCTYPE html>");
686 	print_endline(h);
687 }
688 
689 void
690 print_gen_comment(struct html *h, struct roff_node *n)
691 {
692 	int	 wantblank;
693 
694 	print_word(h, "<!-- This is an automatically generated file."
695 	    "  Do not edit.");
696 	h->indent = 1;
697 	wantblank = 0;
698 	while (n != NULL && n->type == ROFFT_COMMENT) {
699 		if (strstr(n->string, "-->") == NULL &&
700 		    (wantblank || *n->string != '\0')) {
701 			print_endline(h);
702 			print_indent(h);
703 			print_word(h, n->string);
704 			wantblank = *n->string != '\0';
705 		}
706 		n = n->next;
707 	}
708 	if (wantblank)
709 		print_endline(h);
710 	print_word(h, " -->");
711 	print_endline(h);
712 	h->indent = 0;
713 }
714 
715 void
716 print_text(struct html *h, const char *word)
717 {
718 	if (h->col && (h->flags & HTML_NOSPACE) == 0) {
719 		if ( ! (HTML_KEEP & h->flags)) {
720 			if (HTML_PREKEEP & h->flags)
721 				h->flags |= HTML_KEEP;
722 			print_endword(h);
723 		} else
724 			print_word(h, "&#x00A0;");
725 	}
726 
727 	assert(NULL == h->metaf);
728 	switch (h->metac) {
729 	case HTMLFONT_ITALIC:
730 		h->metaf = print_otag(h, TAG_I, "");
731 		break;
732 	case HTMLFONT_BOLD:
733 		h->metaf = print_otag(h, TAG_B, "");
734 		break;
735 	case HTMLFONT_BI:
736 		h->metaf = print_otag(h, TAG_B, "");
737 		print_otag(h, TAG_I, "");
738 		break;
739 	default:
740 		print_indent(h);
741 		break;
742 	}
743 
744 	assert(word);
745 	if ( ! print_encode(h, word, NULL, 0)) {
746 		if ( ! (h->flags & HTML_NONOSPACE))
747 			h->flags &= ~HTML_NOSPACE;
748 		h->flags &= ~HTML_NONEWLINE;
749 	} else
750 		h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
751 
752 	if (h->metaf) {
753 		print_tagq(h, h->metaf);
754 		h->metaf = NULL;
755 	}
756 
757 	h->flags &= ~HTML_IGNDELIM;
758 }
759 
760 void
761 print_tagq(struct html *h, const struct tag *until)
762 {
763 	struct tag	*tag;
764 
765 	while ((tag = h->tag) != NULL) {
766 		print_ctag(h, tag);
767 		if (until && tag == until)
768 			return;
769 	}
770 }
771 
772 void
773 print_stagq(struct html *h, const struct tag *suntil)
774 {
775 	struct tag	*tag;
776 
777 	while ((tag = h->tag) != NULL) {
778 		if (suntil && tag == suntil)
779 			return;
780 		print_ctag(h, tag);
781 	}
782 }
783 
784 void
785 print_paragraph(struct html *h)
786 {
787 	struct tag	*t;
788 
789 	t = print_otag(h, TAG_DIV, "c", "Pp");
790 	print_tagq(h, t);
791 }
792 
793 
794 /***********************************************************************
795  * Low level output functions.
796  * They implement line breaking using a short static buffer.
797  ***********************************************************************/
798 
799 /*
800  * Buffer one HTML output byte.
801  * If the buffer is full, flush and deactivate it and start a new line.
802  * If the buffer is inactive, print directly.
803  */
804 static void
805 print_byte(struct html *h, char c)
806 {
807 	if ((h->flags & HTML_BUFFER) == 0) {
808 		putchar(c);
809 		h->col++;
810 		return;
811 	}
812 
813 	if (h->col + h->bufcol < sizeof(h->buf)) {
814 		h->buf[h->bufcol++] = c;
815 		return;
816 	}
817 
818 	putchar('\n');
819 	h->col = 0;
820 	print_indent(h);
821 	putchar(' ');
822 	putchar(' ');
823 	fwrite(h->buf, h->bufcol, 1, stdout);
824 	putchar(c);
825 	h->col = (h->indent + 1) * 2 + h->bufcol + 1;
826 	h->bufcol = 0;
827 	h->flags &= ~HTML_BUFFER;
828 }
829 
830 /*
831  * If something was printed on the current output line, end it.
832  * Not to be called right after print_indent().
833  */
834 void
835 print_endline(struct html *h)
836 {
837 	if (h->col == 0)
838 		return;
839 
840 	if (h->bufcol) {
841 		putchar(' ');
842 		fwrite(h->buf, h->bufcol, 1, stdout);
843 		h->bufcol = 0;
844 	}
845 	putchar('\n');
846 	h->col = 0;
847 	h->flags |= HTML_NOSPACE;
848 	h->flags &= ~HTML_BUFFER;
849 }
850 
851 /*
852  * Flush the HTML output buffer.
853  * If it is inactive, activate it.
854  */
855 static void
856 print_endword(struct html *h)
857 {
858 	if (h->noindent) {
859 		print_byte(h, ' ');
860 		return;
861 	}
862 
863 	if ((h->flags & HTML_BUFFER) == 0) {
864 		h->col++;
865 		h->flags |= HTML_BUFFER;
866 	} else if (h->bufcol) {
867 		putchar(' ');
868 		fwrite(h->buf, h->bufcol, 1, stdout);
869 		h->col += h->bufcol + 1;
870 	}
871 	h->bufcol = 0;
872 }
873 
874 /*
875  * If at the beginning of a new output line,
876  * perform indentation and mark the line as containing output.
877  * Make sure to really produce some output right afterwards,
878  * but do not use print_otag() for producing it.
879  */
880 static void
881 print_indent(struct html *h)
882 {
883 	size_t	 i;
884 
885 	if (h->col)
886 		return;
887 
888 	if (h->noindent == 0) {
889 		h->col = h->indent * 2;
890 		for (i = 0; i < h->col; i++)
891 			putchar(' ');
892 	}
893 	h->flags &= ~HTML_NOSPACE;
894 }
895 
896 /*
897  * Print or buffer some characters
898  * depending on the current HTML output buffer state.
899  */
900 static void
901 print_word(struct html *h, const char *cp)
902 {
903 	while (*cp != '\0')
904 		print_byte(h, *cp++);
905 }
906