xref: /openbsd-src/usr.bin/mandoc/man_html.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*	$OpenBSD: man_html.c,v 1.127 2019/04/30 15:52:42 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <ctype.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 
26 #include "mandoc_aux.h"
27 #include "mandoc.h"
28 #include "roff.h"
29 #include "man.h"
30 #include "out.h"
31 #include "html.h"
32 #include "main.h"
33 
34 #define	MAN_ARGS	  const struct roff_meta *man, \
35 			  const struct roff_node *n, \
36 			  struct html *h
37 
38 struct	man_html_act {
39 	int		(*pre)(MAN_ARGS);
40 	int		(*post)(MAN_ARGS);
41 };
42 
43 static	void		  print_man_head(const struct roff_meta *,
44 				struct html *);
45 static	void		  print_man_nodelist(MAN_ARGS);
46 static	void		  print_man_node(MAN_ARGS);
47 static	char		  list_continues(const struct roff_node *,
48 				const struct roff_node *);
49 static	int		  man_B_pre(MAN_ARGS);
50 static	int		  man_IP_pre(MAN_ARGS);
51 static	int		  man_I_pre(MAN_ARGS);
52 static	int		  man_OP_pre(MAN_ARGS);
53 static	int		  man_PP_pre(MAN_ARGS);
54 static	int		  man_RS_pre(MAN_ARGS);
55 static	int		  man_SH_pre(MAN_ARGS);
56 static	int		  man_SM_pre(MAN_ARGS);
57 static	int		  man_SY_pre(MAN_ARGS);
58 static	int		  man_UR_pre(MAN_ARGS);
59 static	int		  man_abort_pre(MAN_ARGS);
60 static	int		  man_alt_pre(MAN_ARGS);
61 static	int		  man_ign_pre(MAN_ARGS);
62 static	int		  man_in_pre(MAN_ARGS);
63 static	void		  man_root_post(const struct roff_meta *,
64 				struct html *);
65 static	void		  man_root_pre(const struct roff_meta *,
66 				struct html *);
67 
68 static	const struct man_html_act man_html_acts[MAN_MAX - MAN_TH] = {
69 	{ NULL, NULL }, /* TH */
70 	{ man_SH_pre, NULL }, /* SH */
71 	{ man_SH_pre, NULL }, /* SS */
72 	{ man_IP_pre, NULL }, /* TP */
73 	{ man_IP_pre, NULL }, /* TQ */
74 	{ man_abort_pre, NULL }, /* LP */
75 	{ man_PP_pre, NULL }, /* PP */
76 	{ man_abort_pre, NULL }, /* P */
77 	{ man_IP_pre, NULL }, /* IP */
78 	{ man_PP_pre, NULL }, /* HP */
79 	{ man_SM_pre, NULL }, /* SM */
80 	{ man_SM_pre, NULL }, /* SB */
81 	{ man_alt_pre, NULL }, /* BI */
82 	{ man_alt_pre, NULL }, /* IB */
83 	{ man_alt_pre, NULL }, /* BR */
84 	{ man_alt_pre, NULL }, /* RB */
85 	{ NULL, NULL }, /* R */
86 	{ man_B_pre, NULL }, /* B */
87 	{ man_I_pre, NULL }, /* I */
88 	{ man_alt_pre, NULL }, /* IR */
89 	{ man_alt_pre, NULL }, /* RI */
90 	{ NULL, NULL }, /* RE */
91 	{ man_RS_pre, NULL }, /* RS */
92 	{ man_ign_pre, NULL }, /* DT */
93 	{ man_ign_pre, NULL }, /* UC */
94 	{ man_ign_pre, NULL }, /* PD */
95 	{ man_ign_pre, NULL }, /* AT */
96 	{ man_in_pre, NULL }, /* in */
97 	{ man_SY_pre, NULL }, /* SY */
98 	{ NULL, NULL }, /* YS */
99 	{ man_OP_pre, NULL }, /* OP */
100 	{ NULL, NULL }, /* EX */
101 	{ NULL, NULL }, /* EE */
102 	{ man_UR_pre, NULL }, /* UR */
103 	{ NULL, NULL }, /* UE */
104 	{ man_UR_pre, NULL }, /* MT */
105 	{ NULL, NULL }, /* ME */
106 };
107 
108 
109 void
110 html_man(void *arg, const struct roff_meta *man)
111 {
112 	struct html		*h;
113 	struct roff_node	*n;
114 	struct tag		*t;
115 
116 	h = (struct html *)arg;
117 	n = man->first->child;
118 
119 	if ((h->oflags & HTML_FRAGMENT) == 0) {
120 		print_gen_decls(h);
121 		print_otag(h, TAG_HTML, "");
122 		if (n != NULL && n->type == ROFFT_COMMENT)
123 			print_gen_comment(h, n);
124 		t = print_otag(h, TAG_HEAD, "");
125 		print_man_head(man, h);
126 		print_tagq(h, t);
127 		print_otag(h, TAG_BODY, "");
128 	}
129 
130 	man_root_pre(man, h);
131 	t = print_otag(h, TAG_DIV, "c", "manual-text");
132 	print_man_nodelist(man, n, h);
133 	print_tagq(h, t);
134 	man_root_post(man, h);
135 	print_tagq(h, NULL);
136 }
137 
138 static void
139 print_man_head(const struct roff_meta *man, struct html *h)
140 {
141 	char	*cp;
142 
143 	print_gen_head(h);
144 	mandoc_asprintf(&cp, "%s(%s)", man->title, man->msec);
145 	print_otag(h, TAG_TITLE, "");
146 	print_text(h, cp);
147 	free(cp);
148 }
149 
150 static void
151 print_man_nodelist(MAN_ARGS)
152 {
153 	while (n != NULL) {
154 		print_man_node(man, n, h);
155 		n = n->next;
156 	}
157 }
158 
159 static void
160 print_man_node(MAN_ARGS)
161 {
162 	struct tag	*t;
163 	int		 child;
164 
165 	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
166 		return;
167 
168 	html_fillmode(h, n->flags & NODE_NOFILL ? ROFF_nf : ROFF_fi);
169 
170 	child = 1;
171 	switch (n->type) {
172 	case ROFFT_TEXT:
173 		if (*n->string == '\0') {
174 			print_endline(h);
175 			return;
176 		}
177 		if (*n->string == ' ' && n->flags & NODE_LINE &&
178 		    (h->flags & HTML_NONEWLINE) == 0)
179 			print_endline(h);
180 		else if (n->flags & NODE_DELIMC)
181 			h->flags |= HTML_NOSPACE;
182 		t = h->tag;
183 		t->refcnt++;
184 		print_text(h, n->string);
185 		break;
186 	case ROFFT_EQN:
187 		t = h->tag;
188 		t->refcnt++;
189 		print_eqn(h, n->eqn);
190 		break;
191 	case ROFFT_TBL:
192 		/*
193 		 * This will take care of initialising all of the table
194 		 * state data for the first table, then tearing it down
195 		 * for the last one.
196 		 */
197 		print_tbl(h, n->span);
198 		return;
199 	default:
200 		/*
201 		 * Close out scope of font prior to opening a macro
202 		 * scope.
203 		 */
204 		if (h->metac != ESCAPE_FONTROMAN) {
205 			h->metal = h->metac;
206 			h->metac = ESCAPE_FONTROMAN;
207 		}
208 
209 		/*
210 		 * Close out the current table, if it's open, and unset
211 		 * the "meta" table state.  This will be reopened on the
212 		 * next table element.
213 		 */
214 		if (h->tblt != NULL)
215 			print_tblclose(h);
216 		t = h->tag;
217 		t->refcnt++;
218 		if (n->tok < ROFF_MAX) {
219 			roff_html_pre(h, n);
220 			t->refcnt--;
221 			print_stagq(h, t);
222 			return;
223 		}
224 		assert(n->tok >= MAN_TH && n->tok < MAN_MAX);
225 		if (man_html_acts[n->tok - MAN_TH].pre != NULL)
226 			child = (*man_html_acts[n->tok - MAN_TH].pre)(man,
227 			    n, h);
228 		break;
229 	}
230 
231 	if (child && n->child != NULL)
232 		print_man_nodelist(man, n->child, h);
233 
234 	/* This will automatically close out any font scope. */
235 	t->refcnt--;
236 	if (n->type == ROFFT_BLOCK &&
237 	    (n->tok == MAN_IP || n->tok == MAN_TP || n->tok == MAN_TQ)) {
238 		t = h->tag;
239 		while (t->tag != TAG_DL && t->tag != TAG_UL)
240 			t = t->next;
241 		/*
242 		 * Close the list if no further item of the same type
243 		 * follows; otherwise, close the item only.
244 		 */
245 		if (list_continues(n, n->next) == '\0') {
246 			print_tagq(h, t);
247 			t = NULL;
248 		}
249 	}
250 	if (t != NULL)
251 		print_stagq(h, t);
252 
253 	if (n->flags & NODE_NOFILL && n->tok != MAN_YS &&
254 	    (n->next != NULL && n->next->flags & NODE_LINE)) {
255 		/* In .nf = <pre>, print even empty lines. */
256 		h->col++;
257 		print_endline(h);
258 	}
259 }
260 
261 static void
262 man_root_pre(const struct roff_meta *man, struct html *h)
263 {
264 	struct tag	*t, *tt;
265 	char		*title;
266 
267 	assert(man->title);
268 	assert(man->msec);
269 	mandoc_asprintf(&title, "%s(%s)", man->title, man->msec);
270 
271 	t = print_otag(h, TAG_TABLE, "c", "head");
272 	tt = print_otag(h, TAG_TR, "");
273 
274 	print_otag(h, TAG_TD, "c", "head-ltitle");
275 	print_text(h, title);
276 	print_stagq(h, tt);
277 
278 	print_otag(h, TAG_TD, "c", "head-vol");
279 	if (man->vol != NULL)
280 		print_text(h, man->vol);
281 	print_stagq(h, tt);
282 
283 	print_otag(h, TAG_TD, "c", "head-rtitle");
284 	print_text(h, title);
285 	print_tagq(h, t);
286 	free(title);
287 }
288 
289 static void
290 man_root_post(const struct roff_meta *man, struct html *h)
291 {
292 	struct tag	*t, *tt;
293 
294 	t = print_otag(h, TAG_TABLE, "c", "foot");
295 	tt = print_otag(h, TAG_TR, "");
296 
297 	print_otag(h, TAG_TD, "c", "foot-date");
298 	print_text(h, man->date);
299 	print_stagq(h, tt);
300 
301 	print_otag(h, TAG_TD, "c", "foot-os");
302 	if (man->os != NULL)
303 		print_text(h, man->os);
304 	print_tagq(h, t);
305 }
306 
307 static int
308 man_SH_pre(MAN_ARGS)
309 {
310 	const char	*class;
311 	char		*id;
312 	enum htmltag	 tag;
313 
314 	if (n->tok == MAN_SH) {
315 		tag = TAG_H1;
316 		class = "Sh";
317 	} else {
318 		tag = TAG_H2;
319 		class = "Ss";
320 	}
321 	switch (n->type) {
322 	case ROFFT_BLOCK:
323 		html_close_paragraph(h);
324 		print_otag(h, TAG_SECTION, "c", class);
325 		break;
326 	case ROFFT_HEAD:
327 		id = html_make_id(n, 1);
328 		print_otag(h, tag, "ci", class, id);
329 		if (id != NULL)
330 			print_otag(h, TAG_A, "chR", "permalink", id);
331 		break;
332 	case ROFFT_BODY:
333 		break;
334 	default:
335 		abort();
336 	}
337 	return 1;
338 }
339 
340 static int
341 man_alt_pre(MAN_ARGS)
342 {
343 	const struct roff_node	*nn;
344 	struct tag	*t;
345 	int		 i;
346 	enum htmltag	 fp;
347 
348 	for (i = 0, nn = n->child; nn != NULL; nn = nn->next, i++) {
349 		switch (n->tok) {
350 		case MAN_BI:
351 			fp = i % 2 ? TAG_I : TAG_B;
352 			break;
353 		case MAN_IB:
354 			fp = i % 2 ? TAG_B : TAG_I;
355 			break;
356 		case MAN_RI:
357 			fp = i % 2 ? TAG_I : TAG_MAX;
358 			break;
359 		case MAN_IR:
360 			fp = i % 2 ? TAG_MAX : TAG_I;
361 			break;
362 		case MAN_BR:
363 			fp = i % 2 ? TAG_MAX : TAG_B;
364 			break;
365 		case MAN_RB:
366 			fp = i % 2 ? TAG_B : TAG_MAX;
367 			break;
368 		default:
369 			abort();
370 		}
371 
372 		if (i)
373 			h->flags |= HTML_NOSPACE;
374 
375 		if (fp != TAG_MAX)
376 			t = print_otag(h, fp, "");
377 
378 		print_text(h, nn->string);
379 
380 		if (fp != TAG_MAX)
381 			print_tagq(h, t);
382 	}
383 	return 0;
384 }
385 
386 static int
387 man_SM_pre(MAN_ARGS)
388 {
389 	print_otag(h, TAG_SMALL, "");
390 	if (n->tok == MAN_SB)
391 		print_otag(h, TAG_B, "");
392 	return 1;
393 }
394 
395 static int
396 man_PP_pre(MAN_ARGS)
397 {
398 	switch (n->type) {
399 	case ROFFT_BLOCK:
400 		html_close_paragraph(h);
401 		break;
402 	case ROFFT_HEAD:
403 		return 0;
404 	case ROFFT_BODY:
405 		if (n->child != NULL &&
406 		    (n->child->flags & NODE_NOFILL) == 0)
407 			print_otag(h, TAG_P, "c",
408 			    n->tok == MAN_PP ? "Pp" : "Pp HP");
409 		break;
410 	default:
411 		abort();
412 	}
413 	return 1;
414 }
415 
416 static char
417 list_continues(const struct roff_node *n1, const struct roff_node *n2)
418 {
419 	const char *s1, *s2;
420 	char c1, c2;
421 
422 	if (n1 == NULL || n1->type != ROFFT_BLOCK ||
423 	    n2 == NULL || n2->type != ROFFT_BLOCK)
424 		return '\0';
425 	if ((n1->tok == MAN_TP || n1->tok == MAN_TQ) &&
426 	    (n2->tok == MAN_TP || n2->tok == MAN_TQ))
427 		return ' ';
428 	if (n1->tok != MAN_IP || n2->tok != MAN_IP)
429 		return '\0';
430 	n1 = n1->head->child;
431 	n2 = n2->head->child;
432 	s1 = n1 == NULL ? "" : n1->string;
433 	s2 = n2 == NULL ? "" : n2->string;
434 	c1 = strcmp(s1, "*") == 0 ? '*' :
435 	     strcmp(s1, "\\-") == 0 ? '-' :
436 	     strcmp(s1, "\\(bu") == 0 ? 'b' : ' ';
437 	c2 = strcmp(s2, "*") == 0 ? '*' :
438 	     strcmp(s2, "\\-") == 0 ? '-' :
439 	     strcmp(s2, "\\(bu") == 0 ? 'b' : ' ';
440 	return c1 != c2 ? '\0' : c1 == 'b' ? '*' : c1;
441 }
442 
443 static int
444 man_IP_pre(MAN_ARGS)
445 {
446 	const struct roff_node	*nn;
447 	const char		*list_class;
448 	enum htmltag		 list_elem, body_elem;
449 	char			 list_type;
450 
451 	nn = n->type == ROFFT_BLOCK ? n : n->parent;
452 	if ((list_type = list_continues(nn->prev, nn)) == '\0') {
453 		/* Start a new list. */
454 		if ((list_type = list_continues(nn, nn->next)) == '\0')
455 			list_type = ' ';
456 		switch (list_type) {
457 		case ' ':
458 			list_class = "Bl-tag";
459 			list_elem = TAG_DL;
460 			break;
461 		case '*':
462 			list_class = "Bl-bullet";
463 			list_elem = TAG_UL;
464 			break;
465 		case '-':
466 			list_class = "Bl-dash";
467 			list_elem = TAG_UL;
468 			break;
469 		default:
470 			abort();
471 		}
472 	} else {
473 		/* Continue a list that was started earlier. */
474 		list_class = NULL;
475 		list_elem = TAG_MAX;
476 	}
477 	body_elem = list_type == ' ' ? TAG_DD : TAG_LI;
478 
479 	switch (n->type) {
480 	case ROFFT_BLOCK:
481 		html_close_paragraph(h);
482 		if (list_elem != TAG_MAX)
483 			print_otag(h, list_elem, "c", list_class);
484 		return 1;
485 	case ROFFT_HEAD:
486 		if (body_elem == TAG_LI)
487 			return 0;
488 		print_otag(h, TAG_DT, "");
489 		break;
490 	case ROFFT_BODY:
491 		print_otag(h, body_elem, "");
492 		return 1;
493 	default:
494 		abort();
495 	}
496 
497 	switch(n->tok) {
498 	case MAN_IP:  /* Only print the first header element. */
499 		if (n->child != NULL)
500 			print_man_node(man, n->child, h);
501 		break;
502 	case MAN_TP:  /* Only print next-line header elements. */
503 	case MAN_TQ:
504 		nn = n->child;
505 		while (nn != NULL && (NODE_LINE & nn->flags) == 0)
506 			nn = nn->next;
507 		while (nn != NULL) {
508 			print_man_node(man, nn, h);
509 			nn = nn->next;
510 		}
511 		break;
512 	default:
513 		abort();
514 	}
515 	return 0;
516 }
517 
518 static int
519 man_OP_pre(MAN_ARGS)
520 {
521 	struct tag	*tt;
522 
523 	print_text(h, "[");
524 	h->flags |= HTML_NOSPACE;
525 	tt = print_otag(h, TAG_SPAN, "c", "Op");
526 
527 	if ((n = n->child) != NULL) {
528 		print_otag(h, TAG_B, "");
529 		print_text(h, n->string);
530 	}
531 
532 	print_stagq(h, tt);
533 
534 	if (n != NULL && n->next != NULL) {
535 		print_otag(h, TAG_I, "");
536 		print_text(h, n->next->string);
537 	}
538 
539 	print_stagq(h, tt);
540 	h->flags |= HTML_NOSPACE;
541 	print_text(h, "]");
542 	return 0;
543 }
544 
545 static int
546 man_B_pre(MAN_ARGS)
547 {
548 	print_otag(h, TAG_B, "");
549 	return 1;
550 }
551 
552 static int
553 man_I_pre(MAN_ARGS)
554 {
555 	print_otag(h, TAG_I, "");
556 	return 1;
557 }
558 
559 static int
560 man_in_pre(MAN_ARGS)
561 {
562 	print_otag(h, TAG_BR, "");
563 	return 0;
564 }
565 
566 static int
567 man_ign_pre(MAN_ARGS)
568 {
569 	return 0;
570 }
571 
572 static int
573 man_RS_pre(MAN_ARGS)
574 {
575 	switch (n->type) {
576 	case ROFFT_BLOCK:
577 		html_close_paragraph(h);
578 		break;
579 	case ROFFT_HEAD:
580 		return 0;
581 	case ROFFT_BODY:
582 		print_otag(h, TAG_DIV, "c", "Bd-indent");
583 		break;
584 	default:
585 		abort();
586 	}
587 	return 1;
588 }
589 
590 static int
591 man_SY_pre(MAN_ARGS)
592 {
593 	switch (n->type) {
594 	case ROFFT_BLOCK:
595 		html_close_paragraph(h);
596 		print_otag(h, TAG_TABLE, "c", "Nm");
597 		print_otag(h, TAG_TR, "");
598 		break;
599 	case ROFFT_HEAD:
600 		print_otag(h, TAG_TD, "");
601 		print_otag(h, TAG_CODE, "c", "Nm");
602 		break;
603 	case ROFFT_BODY:
604 		print_otag(h, TAG_TD, "");
605 		break;
606 	default:
607 		abort();
608 	}
609 	return 1;
610 }
611 
612 static int
613 man_UR_pre(MAN_ARGS)
614 {
615 	char *cp;
616 
617 	n = n->child;
618 	assert(n->type == ROFFT_HEAD);
619 	if (n->child != NULL) {
620 		assert(n->child->type == ROFFT_TEXT);
621 		if (n->tok == MAN_MT) {
622 			mandoc_asprintf(&cp, "mailto:%s", n->child->string);
623 			print_otag(h, TAG_A, "ch", "Mt", cp);
624 			free(cp);
625 		} else
626 			print_otag(h, TAG_A, "ch", "Lk", n->child->string);
627 	}
628 
629 	assert(n->next->type == ROFFT_BODY);
630 	if (n->next->child != NULL)
631 		n = n->next;
632 
633 	print_man_nodelist(man, n->child, h);
634 	return 0;
635 }
636 
637 static int
638 man_abort_pre(MAN_ARGS)
639 {
640 	abort();
641 }
642