xref: /openbsd-src/usr.bin/mandoc/mdoc.c (revision 2b0358df1d88d06ef4139321dd05bd5e05d91eaf)
1 /* $Id: mdoc.c,v 1.1 2009/04/06 20:30:40 kristaps Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the
7  * above copyright notice and this permission notice appear in all
8  * copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11  * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12  * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13  * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14  * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15  * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16  * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17  * PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include <assert.h>
20 #include <ctype.h>
21 #include <stdarg.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 
26 #include "libmdoc.h"
27 
28 enum	merr {
29 	ENOCALL,
30 	EBODYPROL,
31 	EPROLBODY,
32 	ESPACE,
33 	ETEXTPROL,
34 	ENOBLANK,
35 	EMALLOC
36 };
37 
38 const	char *const __mdoc_macronames[MDOC_MAX] = {
39 	"\\\"",		"Dd",		"Dt",		"Os",
40 	"Sh",		"Ss",		"Pp",		"D1",
41 	"Dl",		"Bd",		"Ed",		"Bl",
42 	"El",		"It",		"Ad",		"An",
43 	"Ar",		"Cd",		"Cm",		"Dv",
44 	"Er",		"Ev",		"Ex",		"Fa",
45 	"Fd",		"Fl",		"Fn",		"Ft",
46 	"Ic",		"In",		"Li",		"Nd",
47 	"Nm",		"Op",		"Ot",		"Pa",
48 	"Rv",		"St",		"Va",		"Vt",
49 	/* LINTED */
50 	"Xr",		"\%A",		"\%B",		"\%D",
51 	/* LINTED */
52 	"\%I",		"\%J",		"\%N",		"\%O",
53 	/* LINTED */
54 	"\%P",		"\%R",		"\%T",		"\%V",
55 	"Ac",		"Ao",		"Aq",		"At",
56 	"Bc",		"Bf",		"Bo",		"Bq",
57 	"Bsx",		"Bx",		"Db",		"Dc",
58 	"Do",		"Dq",		"Ec",		"Ef",
59 	"Em",		"Eo",		"Fx",		"Ms",
60 	"No",		"Ns",		"Nx",		"Ox",
61 	"Pc",		"Pf",		"Po",		"Pq",
62 	"Qc",		"Ql",		"Qo",		"Qq",
63 	"Re",		"Rs",		"Sc",		"So",
64 	"Sq",		"Sm",		"Sx",		"Sy",
65 	"Tn",		"Ux",		"Xc",		"Xo",
66 	"Fo",		"Fc",		"Oo",		"Oc",
67 	"Bk",		"Ek",		"Bt",		"Hf",
68 	"Fr",		"Ud",		"Lb",		"Ap",
69 	"Lp",		"Lk",		"Mt",		"Brq",
70 	/* LINTED */
71 	"Bro",		"Brc",		"\%C",		"Es",
72 	/* LINTED */
73 	"En",		"Dx",		"\%Q"
74 	};
75 
76 const	char *const __mdoc_argnames[MDOC_ARG_MAX] = {
77 	"split",		"nosplit",		"ragged",
78 	"unfilled",		"literal",		"file",
79 	"offset",		"bullet",		"dash",
80 	"hyphen",		"item",			"enum",
81 	"tag",			"diag",			"hang",
82 	"ohang",		"inset",		"column",
83 	"width",		"compact",		"std",
84 	"filled",		"words",		"emphasis",
85 	"symbolic",		"nested"
86 	};
87 
88 const	char * const *mdoc_macronames = __mdoc_macronames;
89 const	char * const *mdoc_argnames = __mdoc_argnames;
90 
91 static	void		  mdoc_free1(struct mdoc *);
92 static	int		  mdoc_alloc1(struct mdoc *);
93 static	struct mdoc_node *node_alloc(struct mdoc *, int, int,
94 				int, enum mdoc_type);
95 static	int		  node_append(struct mdoc *,
96 				struct mdoc_node *);
97 static	int		  parsetext(struct mdoc *, int, char *);
98 static	int		  parsemacro(struct mdoc *, int, char *);
99 static	int		  macrowarn(struct mdoc *, int, const char *);
100 static	int		  perr(struct mdoc *, int, int, enum merr);
101 
102 #define verr(m, t) perr((m), (m)->last->line, (m)->last->pos, (t))
103 
104 /*
105  * Get the first (root) node of the parse tree.
106  */
107 const struct mdoc_node *
108 mdoc_node(const struct mdoc *m)
109 {
110 
111 	return(MDOC_HALT & m->flags ? NULL : m->first);
112 }
113 
114 
115 const struct mdoc_meta *
116 mdoc_meta(const struct mdoc *m)
117 {
118 
119 	return(MDOC_HALT & m->flags ? NULL : &m->meta);
120 }
121 
122 
123 static void
124 mdoc_free1(struct mdoc *mdoc)
125 {
126 
127 	if (mdoc->first)
128 		mdoc_node_freelist(mdoc->first);
129 	if (mdoc->meta.title)
130 		free(mdoc->meta.title);
131 	if (mdoc->meta.os)
132 		free(mdoc->meta.os);
133 	if (mdoc->meta.name)
134 		free(mdoc->meta.name);
135 	if (mdoc->meta.arch)
136 		free(mdoc->meta.arch);
137 	if (mdoc->meta.vol)
138 		free(mdoc->meta.vol);
139 }
140 
141 
142 static int
143 mdoc_alloc1(struct mdoc *mdoc)
144 {
145 
146 	bzero(&mdoc->meta, sizeof(struct mdoc_meta));
147 	mdoc->flags = 0;
148 	mdoc->lastnamed = mdoc->lastsec = 0;
149 	mdoc->last = calloc(1, sizeof(struct mdoc_node));
150 	if (NULL == mdoc->last)
151 		return(0);
152 
153 	mdoc->first = mdoc->last;
154 	mdoc->last->type = MDOC_ROOT;
155 	mdoc->next = MDOC_NEXT_CHILD;
156 	return(1);
157 }
158 
159 
160 /*
161  * Free up all resources contributed by a parse:  the node tree,
162  * meta-data and so on.  Then reallocate the root node for another
163  * parse.
164  */
165 int
166 mdoc_reset(struct mdoc *mdoc)
167 {
168 
169 	mdoc_free1(mdoc);
170 	return(mdoc_alloc1(mdoc));
171 }
172 
173 
174 /*
175  * Completely free up all resources.
176  */
177 void
178 mdoc_free(struct mdoc *mdoc)
179 {
180 
181 	mdoc_free1(mdoc);
182 	if (mdoc->htab)
183 		mdoc_hash_free(mdoc->htab);
184 	free(mdoc);
185 }
186 
187 
188 struct mdoc *
189 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
190 {
191 	struct mdoc	*p;
192 
193 	if (NULL == (p = calloc(1, sizeof(struct mdoc))))
194 		return(NULL);
195 	if (cb)
196 		(void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
197 
198 	p->data = data;
199 	p->pflags = pflags;
200 
201 	if (NULL == (p->htab = mdoc_hash_alloc())) {
202 		free(p);
203 		return(NULL);
204 	} else if (mdoc_alloc1(p))
205 		return(p);
206 
207 	free(p);
208 	return(NULL);
209 }
210 
211 
212 /*
213  * Climb back up the parse tree, validating open scopes.  Mostly calls
214  * through to macro_end in macro.c.
215  */
216 int
217 mdoc_endparse(struct mdoc *m)
218 {
219 
220 	if (MDOC_HALT & m->flags)
221 		return(0);
222 	else if (mdoc_macroend(m))
223 		return(1);
224 	m->flags |= MDOC_HALT;
225 	return(0);
226 }
227 
228 
229 /*
230  * Main parse routine.  Parses a single line -- really just hands off to
231  * the macro or text parser.
232  */
233 int
234 mdoc_parseln(struct mdoc *m, int ln, char *buf)
235 {
236 
237 	/* If in error-mode, then we parse no more. */
238 
239 	if (MDOC_HALT & m->flags)
240 		return(0);
241 
242 	return('.' == *buf ? parsemacro(m, ln, buf) :
243 			parsetext(m, ln, buf));
244 }
245 
246 
247 void
248 mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
249 {
250 	char		  buf[256];
251 	va_list		  ap;
252 
253 	if (NULL == mdoc->cb.mdoc_msg)
254 		return;
255 
256 	va_start(ap, fmt);
257 	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
258 	va_end(ap);
259 	(*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf);
260 }
261 
262 
263 int
264 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
265 		const char *fmt, ...)
266 {
267 	char		 buf[256];
268 	va_list		 ap;
269 
270 	if (NULL == mdoc->cb.mdoc_err)
271 		return(0);
272 
273 	va_start(ap, fmt);
274 	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
275 	va_end(ap);
276 	return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
277 }
278 
279 
280 int
281 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos,
282 		enum mdoc_warn type, const char *fmt, ...)
283 {
284 	char		 buf[256];
285 	va_list		 ap;
286 
287 	if (NULL == mdoc->cb.mdoc_warn)
288 		return(0);
289 
290 	va_start(ap, fmt);
291 	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
292 	va_end(ap);
293 	return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf));
294 }
295 
296 
297 int
298 mdoc_macro(struct mdoc *m, int tok,
299 		int ln, int pp, int *pos, char *buf)
300 {
301 
302 	/* FIXME - these should happen during validation. */
303 
304 	if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
305 			SEC_PROLOGUE != m->lastnamed)
306 		return(perr(m, ln, pp, EPROLBODY));
307 
308 	if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
309 			SEC_PROLOGUE == m->lastnamed)
310 		return(perr(m, ln, pp, EBODYPROL));
311 
312 	if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
313 		return(perr(m, ln, pp, ENOCALL));
314 
315 	return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
316 }
317 
318 
319 static int
320 perr(struct mdoc *m, int line, int pos, enum merr type)
321 {
322 	char		*p;
323 
324 	p = NULL;
325 	switch (type) {
326 	case (ENOCALL):
327 		p = "not callable";
328 		break;
329 	case (EPROLBODY):
330 		p = "macro disallowed in document body";
331 		break;
332 	case (EBODYPROL):
333 		p = "macro disallowed in document prologue";
334 		break;
335 	case (EMALLOC):
336 		p = "memory exhausted";
337 		break;
338 	case (ETEXTPROL):
339 		p = "text disallowed in document prologue";
340 		break;
341 	case (ENOBLANK):
342 		p = "blank lines disallowed in non-literal contexts";
343 		break;
344 	case (ESPACE):
345 		p = "whitespace disallowed after delimiter";
346 		break;
347 	}
348 	assert(p);
349 	return(mdoc_perr(m, line, pos, p));
350 }
351 
352 
353 static int
354 node_append(struct mdoc *mdoc, struct mdoc_node *p)
355 {
356 
357 	assert(mdoc->last);
358 	assert(mdoc->first);
359 	assert(MDOC_ROOT != p->type);
360 
361 	switch (mdoc->next) {
362 	case (MDOC_NEXT_SIBLING):
363 		mdoc->last->next = p;
364 		p->prev = mdoc->last;
365 		p->parent = mdoc->last->parent;
366 		break;
367 	case (MDOC_NEXT_CHILD):
368 		mdoc->last->child = p;
369 		p->parent = mdoc->last;
370 		break;
371 	default:
372 		abort();
373 		/* NOTREACHED */
374 	}
375 
376 	if ( ! mdoc_valid_pre(mdoc, p))
377 		return(0);
378 	if ( ! mdoc_action_pre(mdoc, p))
379 		return(0);
380 
381 	switch (p->type) {
382 	case (MDOC_HEAD):
383 		assert(MDOC_BLOCK == p->parent->type);
384 		p->parent->head = p;
385 		break;
386 	case (MDOC_TAIL):
387 		assert(MDOC_BLOCK == p->parent->type);
388 		p->parent->tail = p;
389 		break;
390 	case (MDOC_BODY):
391 		assert(MDOC_BLOCK == p->parent->type);
392 		p->parent->body = p;
393 		break;
394 	default:
395 		break;
396 	}
397 
398 	mdoc->last = p;
399 
400 	switch (p->type) {
401 	case (MDOC_TEXT):
402 		if ( ! mdoc_valid_post(mdoc))
403 			return(0);
404 		if ( ! mdoc_action_post(mdoc))
405 			return(0);
406 		break;
407 	default:
408 		break;
409 	}
410 
411 	return(1);
412 }
413 
414 
415 static struct mdoc_node *
416 node_alloc(struct mdoc *mdoc, int line,
417 		int pos, int tok, enum mdoc_type type)
418 {
419 	struct mdoc_node *p;
420 
421 	if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
422 		(void)verr(mdoc, EMALLOC);
423 		return(NULL);
424 	}
425 
426 	p->sec = mdoc->lastsec;
427 	p->line = line;
428 	p->pos = pos;
429 	p->tok = tok;
430 	if (MDOC_TEXT != (p->type = type))
431 		assert(p->tok >= 0);
432 
433 	return(p);
434 }
435 
436 
437 int
438 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
439 {
440 	struct mdoc_node *p;
441 
442 	p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL);
443 	if (NULL == p)
444 		return(0);
445 	return(node_append(mdoc, p));
446 }
447 
448 
449 int
450 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
451 {
452 	struct mdoc_node *p;
453 
454 	assert(mdoc->first);
455 	assert(mdoc->last);
456 
457 	p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD);
458 	if (NULL == p)
459 		return(0);
460 	return(node_append(mdoc, p));
461 }
462 
463 
464 int
465 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
466 {
467 	struct mdoc_node *p;
468 
469 	p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
470 	if (NULL == p)
471 		return(0);
472 	return(node_append(mdoc, p));
473 }
474 
475 
476 int
477 mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
478 		int tok, struct mdoc_arg *args)
479 {
480 	struct mdoc_node *p;
481 
482 	p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK);
483 	if (NULL == p)
484 		return(0);
485 	if ((p->args = args))
486 		(args->refcnt)++;
487 	return(node_append(mdoc, p));
488 }
489 
490 
491 int
492 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
493 		int tok, struct mdoc_arg *args)
494 {
495 	struct mdoc_node *p;
496 
497 	p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM);
498 	if (NULL == p)
499 		return(0);
500 	if ((p->args = args))
501 		(args->refcnt)++;
502 	return(node_append(mdoc, p));
503 }
504 
505 
506 int
507 mdoc_word_alloc(struct mdoc *mdoc,
508 		int line, int pos, const char *word)
509 {
510 	struct mdoc_node *p;
511 
512 	p = node_alloc(mdoc, line, pos, -1, MDOC_TEXT);
513 	if (NULL == p)
514 		return(0);
515 	if (NULL == (p->string = strdup(word))) {
516 		(void)verr(mdoc, EMALLOC);
517 		return(0);
518 	}
519 	return(node_append(mdoc, p));
520 }
521 
522 
523 void
524 mdoc_node_free(struct mdoc_node *p)
525 {
526 
527 	if (p->string)
528 		free(p->string);
529 	if (p->args)
530 		mdoc_argv_free(p->args);
531 	free(p);
532 }
533 
534 
535 void
536 mdoc_node_freelist(struct mdoc_node *p)
537 {
538 
539 	if (p->child)
540 		mdoc_node_freelist(p->child);
541 	if (p->next)
542 		mdoc_node_freelist(p->next);
543 
544 	mdoc_node_free(p);
545 }
546 
547 
548 /*
549  * Parse free-form text, that is, a line that does not begin with the
550  * control character.
551  */
552 static int
553 parsetext(struct mdoc *m, int line, char *buf)
554 {
555 
556 	if (SEC_PROLOGUE == m->lastnamed)
557 		return(perr(m, line, 0, ETEXTPROL));
558 
559 	if (0 == buf[0] && ! (MDOC_LITERAL & m->flags))
560 		return(perr(m, line, 0, ENOBLANK));
561 
562 	if ( ! mdoc_word_alloc(m, line, 0, buf))
563 		return(0);
564 
565 	m->next = MDOC_NEXT_SIBLING;
566 	return(1);
567 }
568 
569 
570 static int
571 macrowarn(struct mdoc *m, int ln, const char *buf)
572 {
573 	if ( ! (MDOC_IGN_MACRO & m->pflags))
574 		return(mdoc_perr(m, ln, 1,
575 				"unknown macro: %s%s",
576 				buf, strlen(buf) > 3 ? "..." : ""));
577 	return(mdoc_pwarn(m, ln, 1, WARN_SYNTAX,
578 				"unknown macro: %s%s",
579 				buf, strlen(buf) > 3 ? "..." : ""));
580 }
581 
582 
583 
584 /*
585  * Parse a macro line, that is, a line beginning with the control
586  * character.
587  */
588 int
589 parsemacro(struct mdoc *m, int ln, char *buf)
590 {
591 	int		  i, c;
592 	char		  mac[5];
593 
594 	/* Comments and empties are quickly ignored. */
595 
596 	if (0 == buf[1])
597 		return(1);
598 
599 	if (' ' == buf[1]) {
600 		i = 2;
601 		while (buf[i] && ' ' == buf[i])
602 			i++;
603 		if (0 == buf[i])
604 			return(1);
605 		return(perr(m, ln, 1, ESPACE));
606 	}
607 
608 	if (buf[1] && '\\' == buf[1])
609 		if (buf[2] && '\"' == buf[2])
610 			return(1);
611 
612 	/* Copy the first word into a nil-terminated buffer. */
613 
614 	for (i = 1; i < 5; i++) {
615 		if (0 == (mac[i - 1] = buf[i]))
616 			break;
617 		else if (' ' == buf[i])
618 			break;
619 	}
620 
621 	mac[i - 1] = 0;
622 
623 	if (i == 5 || i <= 2) {
624 		if ( ! macrowarn(m, ln, mac))
625 			goto err;
626 		return(1);
627 	}
628 
629 	if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) {
630 		if ( ! macrowarn(m, ln, mac))
631 			goto err;
632 		return(1);
633 	}
634 
635 	/* The macro is sane.  Jump to the next word. */
636 
637 	while (buf[i] && ' ' == buf[i])
638 		i++;
639 
640 	/* Begin recursive parse sequence. */
641 
642 	if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
643 		goto err;
644 
645 	return(1);
646 
647 err:	/* Error out. */
648 
649 	m->flags |= MDOC_HALT;
650 	return(0);
651 }
652