xref: /openbsd-src/usr.bin/mandoc/man_html.c (revision d2c5a4743fb945f45b034a3a830a96f7e1bc695d)
1 /* $OpenBSD: man_html.c,v 1.132 2020/10/16 17:22:39 schwarze Exp $ */
2 /*
3  * Copyright (c) 2013-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * HTML formatter for man(7) used by mandoc(1).
19  */
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 
28 #include "mandoc_aux.h"
29 #include "mandoc.h"
30 #include "roff.h"
31 #include "man.h"
32 #include "out.h"
33 #include "html.h"
34 #include "main.h"
35 
36 #define	MAN_ARGS	  const struct roff_meta *man, \
37 			  struct roff_node *n, \
38 			  struct html *h
39 
40 struct	man_html_act {
41 	int		(*pre)(MAN_ARGS);
42 	int		(*post)(MAN_ARGS);
43 };
44 
45 static	void		  print_man_head(const struct roff_meta *,
46 				struct html *);
47 static	void		  print_man_nodelist(MAN_ARGS);
48 static	void		  print_man_node(MAN_ARGS);
49 static	char		  list_continues(const struct roff_node *,
50 				const struct roff_node *);
51 static	int		  man_B_pre(MAN_ARGS);
52 static	int		  man_IP_pre(MAN_ARGS);
53 static	int		  man_I_pre(MAN_ARGS);
54 static	int		  man_OP_pre(MAN_ARGS);
55 static	int		  man_PP_pre(MAN_ARGS);
56 static	int		  man_RS_pre(MAN_ARGS);
57 static	int		  man_SH_pre(MAN_ARGS);
58 static	int		  man_SM_pre(MAN_ARGS);
59 static	int		  man_SY_pre(MAN_ARGS);
60 static	int		  man_UR_pre(MAN_ARGS);
61 static	int		  man_abort_pre(MAN_ARGS);
62 static	int		  man_alt_pre(MAN_ARGS);
63 static	int		  man_ign_pre(MAN_ARGS);
64 static	int		  man_in_pre(MAN_ARGS);
65 static	void		  man_root_post(const struct roff_meta *,
66 				struct html *);
67 static	void		  man_root_pre(const struct roff_meta *,
68 				struct html *);
69 
70 static	const struct man_html_act man_html_acts[MAN_MAX - MAN_TH] = {
71 	{ NULL, NULL }, /* TH */
72 	{ man_SH_pre, NULL }, /* SH */
73 	{ man_SH_pre, NULL }, /* SS */
74 	{ man_IP_pre, NULL }, /* TP */
75 	{ man_IP_pre, NULL }, /* TQ */
76 	{ man_abort_pre, NULL }, /* LP */
77 	{ man_PP_pre, NULL }, /* PP */
78 	{ man_abort_pre, NULL }, /* P */
79 	{ man_IP_pre, NULL }, /* IP */
80 	{ man_PP_pre, NULL }, /* HP */
81 	{ man_SM_pre, NULL }, /* SM */
82 	{ man_SM_pre, NULL }, /* SB */
83 	{ man_alt_pre, NULL }, /* BI */
84 	{ man_alt_pre, NULL }, /* IB */
85 	{ man_alt_pre, NULL }, /* BR */
86 	{ man_alt_pre, NULL }, /* RB */
87 	{ NULL, NULL }, /* R */
88 	{ man_B_pre, NULL }, /* B */
89 	{ man_I_pre, NULL }, /* I */
90 	{ man_alt_pre, NULL }, /* IR */
91 	{ man_alt_pre, NULL }, /* RI */
92 	{ NULL, NULL }, /* RE */
93 	{ man_RS_pre, NULL }, /* RS */
94 	{ man_ign_pre, NULL }, /* DT */
95 	{ man_ign_pre, NULL }, /* UC */
96 	{ man_ign_pre, NULL }, /* PD */
97 	{ man_ign_pre, NULL }, /* AT */
98 	{ man_in_pre, NULL }, /* in */
99 	{ man_SY_pre, NULL }, /* SY */
100 	{ NULL, NULL }, /* YS */
101 	{ man_OP_pre, NULL }, /* OP */
102 	{ NULL, NULL }, /* EX */
103 	{ NULL, NULL }, /* EE */
104 	{ man_UR_pre, NULL }, /* UR */
105 	{ NULL, NULL }, /* UE */
106 	{ man_UR_pre, NULL }, /* MT */
107 	{ NULL, NULL }, /* ME */
108 };
109 
110 
111 void
112 html_man(void *arg, const struct roff_meta *man)
113 {
114 	struct html		*h;
115 	struct roff_node	*n;
116 	struct tag		*t;
117 
118 	h = (struct html *)arg;
119 	n = man->first->child;
120 
121 	if ((h->oflags & HTML_FRAGMENT) == 0) {
122 		print_gen_decls(h);
123 		print_otag(h, TAG_HTML, "");
124 		if (n != NULL && n->type == ROFFT_COMMENT)
125 			print_gen_comment(h, n);
126 		t = print_otag(h, TAG_HEAD, "");
127 		print_man_head(man, h);
128 		print_tagq(h, t);
129 		print_otag(h, TAG_BODY, "");
130 	}
131 
132 	man_root_pre(man, h);
133 	t = print_otag(h, TAG_DIV, "c", "manual-text");
134 	print_man_nodelist(man, n, h);
135 	print_tagq(h, t);
136 	man_root_post(man, h);
137 	print_tagq(h, NULL);
138 }
139 
140 static void
141 print_man_head(const struct roff_meta *man, struct html *h)
142 {
143 	char	*cp;
144 
145 	print_gen_head(h);
146 	mandoc_asprintf(&cp, "%s(%s)", man->title, man->msec);
147 	print_otag(h, TAG_TITLE, "");
148 	print_text(h, cp);
149 	free(cp);
150 }
151 
152 static void
153 print_man_nodelist(MAN_ARGS)
154 {
155 	while (n != NULL) {
156 		print_man_node(man, n, h);
157 		n = n->next;
158 	}
159 }
160 
161 static void
162 print_man_node(MAN_ARGS)
163 {
164 	struct tag	*t;
165 	int		 child;
166 
167 	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
168 		return;
169 
170 	if ((n->flags & NODE_NOFILL) == 0)
171 		html_fillmode(h, ROFF_fi);
172 	else if (html_fillmode(h, ROFF_nf) == ROFF_nf &&
173 	    n->tok != ROFF_fi && n->flags & NODE_LINE &&
174 	    (n->prev == NULL || n->prev->tok != MAN_YS))
175 		print_endline(h);
176 
177 	child = 1;
178 	switch (n->type) {
179 	case ROFFT_TEXT:
180 		if (*n->string == '\0') {
181 			print_endline(h);
182 			return;
183 		}
184 		if (*n->string == ' ' && n->flags & NODE_LINE &&
185 		    (h->flags & HTML_NONEWLINE) == 0)
186 			print_otag(h, TAG_BR, "");
187 		else if (n->flags & NODE_DELIMC)
188 			h->flags |= HTML_NOSPACE;
189 		t = h->tag;
190 		t->refcnt++;
191 		print_text(h, n->string);
192 		break;
193 	case ROFFT_EQN:
194 		t = h->tag;
195 		t->refcnt++;
196 		print_eqn(h, n->eqn);
197 		break;
198 	case ROFFT_TBL:
199 		/*
200 		 * This will take care of initialising all of the table
201 		 * state data for the first table, then tearing it down
202 		 * for the last one.
203 		 */
204 		print_tbl(h, n->span);
205 		return;
206 	default:
207 		/*
208 		 * Close out scope of font prior to opening a macro
209 		 * scope.
210 		 */
211 		if (h->metac != ESCAPE_FONTROMAN) {
212 			h->metal = h->metac;
213 			h->metac = ESCAPE_FONTROMAN;
214 		}
215 
216 		/*
217 		 * Close out the current table, if it's open, and unset
218 		 * the "meta" table state.  This will be reopened on the
219 		 * next table element.
220 		 */
221 		if (h->tblt != NULL)
222 			print_tblclose(h);
223 		t = h->tag;
224 		t->refcnt++;
225 		if (n->tok < ROFF_MAX) {
226 			roff_html_pre(h, n);
227 			t->refcnt--;
228 			print_stagq(h, t);
229 			return;
230 		}
231 		assert(n->tok >= MAN_TH && n->tok < MAN_MAX);
232 		if (man_html_acts[n->tok - MAN_TH].pre != NULL)
233 			child = (*man_html_acts[n->tok - MAN_TH].pre)(man,
234 			    n, h);
235 		break;
236 	}
237 
238 	if (child && n->child != NULL)
239 		print_man_nodelist(man, n->child, h);
240 
241 	/* This will automatically close out any font scope. */
242 	t->refcnt--;
243 	if (n->type == ROFFT_BLOCK &&
244 	    (n->tok == MAN_IP || n->tok == MAN_TP || n->tok == MAN_TQ)) {
245 		t = h->tag;
246 		while (t->tag != TAG_DL && t->tag != TAG_UL)
247 			t = t->next;
248 		/*
249 		 * Close the list if no further item of the same type
250 		 * follows; otherwise, close the item only.
251 		 */
252 		if (list_continues(n, roff_node_next(n)) == '\0') {
253 			print_tagq(h, t);
254 			t = NULL;
255 		}
256 	}
257 	if (t != NULL)
258 		print_stagq(h, t);
259 }
260 
261 static void
262 man_root_pre(const struct roff_meta *man, struct html *h)
263 {
264 	struct tag	*t, *tt;
265 	char		*title;
266 
267 	assert(man->title);
268 	assert(man->msec);
269 	mandoc_asprintf(&title, "%s(%s)", man->title, man->msec);
270 
271 	t = print_otag(h, TAG_TABLE, "c", "head");
272 	tt = print_otag(h, TAG_TR, "");
273 
274 	print_otag(h, TAG_TD, "c", "head-ltitle");
275 	print_text(h, title);
276 	print_stagq(h, tt);
277 
278 	print_otag(h, TAG_TD, "c", "head-vol");
279 	if (man->vol != NULL)
280 		print_text(h, man->vol);
281 	print_stagq(h, tt);
282 
283 	print_otag(h, TAG_TD, "c", "head-rtitle");
284 	print_text(h, title);
285 	print_tagq(h, t);
286 	free(title);
287 }
288 
289 static void
290 man_root_post(const struct roff_meta *man, struct html *h)
291 {
292 	struct tag	*t, *tt;
293 
294 	t = print_otag(h, TAG_TABLE, "c", "foot");
295 	tt = print_otag(h, TAG_TR, "");
296 
297 	print_otag(h, TAG_TD, "c", "foot-date");
298 	print_text(h, man->date);
299 	print_stagq(h, tt);
300 
301 	print_otag(h, TAG_TD, "c", "foot-os");
302 	if (man->os != NULL)
303 		print_text(h, man->os);
304 	print_tagq(h, t);
305 }
306 
307 static int
308 man_SH_pre(MAN_ARGS)
309 {
310 	const char	*class;
311 	enum htmltag	 tag;
312 
313 	if (n->tok == MAN_SH) {
314 		tag = TAG_H1;
315 		class = "Sh";
316 	} else {
317 		tag = TAG_H2;
318 		class = "Ss";
319 	}
320 	switch (n->type) {
321 	case ROFFT_BLOCK:
322 		html_close_paragraph(h);
323 		print_otag(h, TAG_SECTION, "c", class);
324 		break;
325 	case ROFFT_HEAD:
326 		print_otag_id(h, tag, class, n);
327 		break;
328 	case ROFFT_BODY:
329 		break;
330 	default:
331 		abort();
332 	}
333 	return 1;
334 }
335 
336 static int
337 man_alt_pre(MAN_ARGS)
338 {
339 	const struct roff_node	*nn;
340 	struct tag	*t;
341 	int		 i;
342 	enum htmltag	 fp;
343 
344 	for (i = 0, nn = n->child; nn != NULL; nn = nn->next, i++) {
345 		switch (n->tok) {
346 		case MAN_BI:
347 			fp = i % 2 ? TAG_I : TAG_B;
348 			break;
349 		case MAN_IB:
350 			fp = i % 2 ? TAG_B : TAG_I;
351 			break;
352 		case MAN_RI:
353 			fp = i % 2 ? TAG_I : TAG_MAX;
354 			break;
355 		case MAN_IR:
356 			fp = i % 2 ? TAG_MAX : TAG_I;
357 			break;
358 		case MAN_BR:
359 			fp = i % 2 ? TAG_MAX : TAG_B;
360 			break;
361 		case MAN_RB:
362 			fp = i % 2 ? TAG_B : TAG_MAX;
363 			break;
364 		default:
365 			abort();
366 		}
367 
368 		if (i)
369 			h->flags |= HTML_NOSPACE;
370 
371 		if (fp != TAG_MAX)
372 			t = print_otag(h, fp, "");
373 
374 		print_text(h, nn->string);
375 
376 		if (fp != TAG_MAX)
377 			print_tagq(h, t);
378 	}
379 	return 0;
380 }
381 
382 static int
383 man_SM_pre(MAN_ARGS)
384 {
385 	print_otag(h, TAG_SMALL, "");
386 	if (n->tok == MAN_SB)
387 		print_otag(h, TAG_B, "");
388 	return 1;
389 }
390 
391 static int
392 man_PP_pre(MAN_ARGS)
393 {
394 	switch (n->type) {
395 	case ROFFT_BLOCK:
396 		html_close_paragraph(h);
397 		break;
398 	case ROFFT_HEAD:
399 		return 0;
400 	case ROFFT_BODY:
401 		if (n->child != NULL &&
402 		    (n->child->flags & NODE_NOFILL) == 0)
403 			print_otag(h, TAG_P, "c",
404 			    n->tok == MAN_PP ? "Pp" : "Pp HP");
405 		break;
406 	default:
407 		abort();
408 	}
409 	return 1;
410 }
411 
412 static char
413 list_continues(const struct roff_node *n1, const struct roff_node *n2)
414 {
415 	const char *s1, *s2;
416 	char c1, c2;
417 
418 	if (n1 == NULL || n1->type != ROFFT_BLOCK ||
419 	    n2 == NULL || n2->type != ROFFT_BLOCK)
420 		return '\0';
421 	if ((n1->tok == MAN_TP || n1->tok == MAN_TQ) &&
422 	    (n2->tok == MAN_TP || n2->tok == MAN_TQ))
423 		return ' ';
424 	if (n1->tok != MAN_IP || n2->tok != MAN_IP)
425 		return '\0';
426 	n1 = n1->head->child;
427 	n2 = n2->head->child;
428 	s1 = n1 == NULL ? "" : n1->string;
429 	s2 = n2 == NULL ? "" : n2->string;
430 	c1 = strcmp(s1, "*") == 0 ? '*' :
431 	     strcmp(s1, "\\-") == 0 ? '-' :
432 	     strcmp(s1, "\\(bu") == 0 ? 'b' : ' ';
433 	c2 = strcmp(s2, "*") == 0 ? '*' :
434 	     strcmp(s2, "\\-") == 0 ? '-' :
435 	     strcmp(s2, "\\(bu") == 0 ? 'b' : ' ';
436 	return c1 != c2 ? '\0' : c1 == 'b' ? '*' : c1;
437 }
438 
439 static int
440 man_IP_pre(MAN_ARGS)
441 {
442 	struct roff_node	*nn;
443 	const char		*list_class;
444 	enum htmltag		 list_elem, body_elem;
445 	char			 list_type;
446 
447 	nn = n->type == ROFFT_BLOCK ? n : n->parent;
448 	list_type = list_continues(roff_node_prev(nn), nn);
449 	if (list_type == '\0') {
450 		/* Start a new list. */
451 		list_type = list_continues(nn, roff_node_next(nn));
452 		if (list_type == '\0')
453 			list_type = ' ';
454 		switch (list_type) {
455 		case ' ':
456 			list_class = "Bl-tag";
457 			list_elem = TAG_DL;
458 			break;
459 		case '*':
460 			list_class = "Bl-bullet";
461 			list_elem = TAG_UL;
462 			break;
463 		case '-':
464 			list_class = "Bl-dash";
465 			list_elem = TAG_UL;
466 			break;
467 		default:
468 			abort();
469 		}
470 	} else {
471 		/* Continue a list that was started earlier. */
472 		list_class = NULL;
473 		list_elem = TAG_MAX;
474 	}
475 	body_elem = list_type == ' ' ? TAG_DD : TAG_LI;
476 
477 	switch (n->type) {
478 	case ROFFT_BLOCK:
479 		html_close_paragraph(h);
480 		if (list_elem != TAG_MAX)
481 			print_otag(h, list_elem, "c", list_class);
482 		return 1;
483 	case ROFFT_HEAD:
484 		if (body_elem == TAG_LI)
485 			return 0;
486 		print_otag_id(h, TAG_DT, NULL, n);
487 		break;
488 	case ROFFT_BODY:
489 		print_otag(h, body_elem, "");
490 		return 1;
491 	default:
492 		abort();
493 	}
494 	switch(n->tok) {
495 	case MAN_IP:  /* Only print the first header element. */
496 		if (n->child != NULL)
497 			print_man_node(man, n->child, h);
498 		break;
499 	case MAN_TP:  /* Only print next-line header elements. */
500 	case MAN_TQ:
501 		nn = n->child;
502 		while (nn != NULL && (NODE_LINE & nn->flags) == 0)
503 			nn = nn->next;
504 		while (nn != NULL) {
505 			print_man_node(man, nn, h);
506 			nn = nn->next;
507 		}
508 		break;
509 	default:
510 		abort();
511 	}
512 	return 0;
513 }
514 
515 static int
516 man_OP_pre(MAN_ARGS)
517 {
518 	struct tag	*tt;
519 
520 	print_text(h, "[");
521 	h->flags |= HTML_NOSPACE;
522 	tt = print_otag(h, TAG_SPAN, "c", "Op");
523 
524 	if ((n = n->child) != NULL) {
525 		print_otag(h, TAG_B, "");
526 		print_text(h, n->string);
527 	}
528 
529 	print_stagq(h, tt);
530 
531 	if (n != NULL && n->next != NULL) {
532 		print_otag(h, TAG_I, "");
533 		print_text(h, n->next->string);
534 	}
535 
536 	print_stagq(h, tt);
537 	h->flags |= HTML_NOSPACE;
538 	print_text(h, "]");
539 	return 0;
540 }
541 
542 static int
543 man_B_pre(MAN_ARGS)
544 {
545 	print_otag(h, TAG_B, "");
546 	return 1;
547 }
548 
549 static int
550 man_I_pre(MAN_ARGS)
551 {
552 	print_otag(h, TAG_I, "");
553 	return 1;
554 }
555 
556 static int
557 man_in_pre(MAN_ARGS)
558 {
559 	print_otag(h, TAG_BR, "");
560 	return 0;
561 }
562 
563 static int
564 man_ign_pre(MAN_ARGS)
565 {
566 	return 0;
567 }
568 
569 static int
570 man_RS_pre(MAN_ARGS)
571 {
572 	switch (n->type) {
573 	case ROFFT_BLOCK:
574 		html_close_paragraph(h);
575 		break;
576 	case ROFFT_HEAD:
577 		return 0;
578 	case ROFFT_BODY:
579 		print_otag(h, TAG_DIV, "c", "Bd-indent");
580 		break;
581 	default:
582 		abort();
583 	}
584 	return 1;
585 }
586 
587 static int
588 man_SY_pre(MAN_ARGS)
589 {
590 	switch (n->type) {
591 	case ROFFT_BLOCK:
592 		html_close_paragraph(h);
593 		print_otag(h, TAG_TABLE, "c", "Nm");
594 		print_otag(h, TAG_TR, "");
595 		break;
596 	case ROFFT_HEAD:
597 		print_otag(h, TAG_TD, "");
598 		print_otag(h, TAG_CODE, "c", "Nm");
599 		break;
600 	case ROFFT_BODY:
601 		print_otag(h, TAG_TD, "");
602 		break;
603 	default:
604 		abort();
605 	}
606 	return 1;
607 }
608 
609 static int
610 man_UR_pre(MAN_ARGS)
611 {
612 	char *cp;
613 
614 	n = n->child;
615 	assert(n->type == ROFFT_HEAD);
616 	if (n->child != NULL) {
617 		assert(n->child->type == ROFFT_TEXT);
618 		if (n->tok == MAN_MT) {
619 			mandoc_asprintf(&cp, "mailto:%s", n->child->string);
620 			print_otag(h, TAG_A, "ch", "Mt", cp);
621 			free(cp);
622 		} else
623 			print_otag(h, TAG_A, "ch", "Lk", n->child->string);
624 	}
625 
626 	assert(n->next->type == ROFFT_BODY);
627 	if (n->next->child != NULL)
628 		n = n->next;
629 
630 	print_man_nodelist(man, n->child, h);
631 	return 0;
632 }
633 
634 static int
635 man_abort_pre(MAN_ARGS)
636 {
637 	abort();
638 }
639