xref: /openbsd-src/usr.bin/mandoc/mdoc.c (revision be38755c412cc72cb8d40f51ea70c9893196afff)
1 /*	$Id: mdoc.c,v 1.85 2011/09/18 15:54:48 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <stdarg.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <time.h>
26 
27 #include "mdoc.h"
28 #include "mandoc.h"
29 #include "libmdoc.h"
30 #include "libmandoc.h"
31 
32 const	char *const __mdoc_macronames[MDOC_MAX] = {
33 	"Ap",		"Dd",		"Dt",		"Os",
34 	"Sh",		"Ss",		"Pp",		"D1",
35 	"Dl",		"Bd",		"Ed",		"Bl",
36 	"El",		"It",		"Ad",		"An",
37 	"Ar",		"Cd",		"Cm",		"Dv",
38 	"Er",		"Ev",		"Ex",		"Fa",
39 	"Fd",		"Fl",		"Fn",		"Ft",
40 	"Ic",		"In",		"Li",		"Nd",
41 	"Nm",		"Op",		"Ot",		"Pa",
42 	"Rv",		"St",		"Va",		"Vt",
43 	/* LINTED */
44 	"Xr",		"%A",		"%B",		"%D",
45 	/* LINTED */
46 	"%I",		"%J",		"%N",		"%O",
47 	/* LINTED */
48 	"%P",		"%R",		"%T",		"%V",
49 	"Ac",		"Ao",		"Aq",		"At",
50 	"Bc",		"Bf",		"Bo",		"Bq",
51 	"Bsx",		"Bx",		"Db",		"Dc",
52 	"Do",		"Dq",		"Ec",		"Ef",
53 	"Em",		"Eo",		"Fx",		"Ms",
54 	"No",		"Ns",		"Nx",		"Ox",
55 	"Pc",		"Pf",		"Po",		"Pq",
56 	"Qc",		"Ql",		"Qo",		"Qq",
57 	"Re",		"Rs",		"Sc",		"So",
58 	"Sq",		"Sm",		"Sx",		"Sy",
59 	"Tn",		"Ux",		"Xc",		"Xo",
60 	"Fo",		"Fc",		"Oo",		"Oc",
61 	"Bk",		"Ek",		"Bt",		"Hf",
62 	"Fr",		"Ud",		"Lb",		"Lp",
63 	"Lk",		"Mt",		"Brq",		"Bro",
64 	/* LINTED */
65 	"Brc",		"%C",		"Es",		"En",
66 	/* LINTED */
67 	"Dx",		"%Q",		"br",		"sp",
68 	/* LINTED */
69 	"%U",		"Ta"
70 	};
71 
72 const	char *const __mdoc_argnames[MDOC_ARG_MAX] = {
73 	"split",		"nosplit",		"ragged",
74 	"unfilled",		"literal",		"file",
75 	"offset",		"bullet",		"dash",
76 	"hyphen",		"item",			"enum",
77 	"tag",			"diag",			"hang",
78 	"ohang",		"inset",		"column",
79 	"width",		"compact",		"std",
80 	"filled",		"words",		"emphasis",
81 	"symbolic",		"nested",		"centered"
82 	};
83 
84 const	char * const *mdoc_macronames = __mdoc_macronames;
85 const	char * const *mdoc_argnames = __mdoc_argnames;
86 
87 static	void		  mdoc_node_free(struct mdoc_node *);
88 static	void		  mdoc_node_unlink(struct mdoc *,
89 				struct mdoc_node *);
90 static	void		  mdoc_free1(struct mdoc *);
91 static	void		  mdoc_alloc1(struct mdoc *);
92 static	struct mdoc_node *node_alloc(struct mdoc *, int, int,
93 				enum mdoct, enum mdoc_type);
94 static	int		  node_append(struct mdoc *,
95 				struct mdoc_node *);
96 #if 0
97 static	int		  mdoc_preptext(struct mdoc *, int, char *, int);
98 #endif
99 static	int		  mdoc_ptext(struct mdoc *, int, char *, int);
100 static	int		  mdoc_pmacro(struct mdoc *, int, char *, int);
101 
102 const struct mdoc_node *
103 mdoc_node(const struct mdoc *m)
104 {
105 
106 	assert( ! (MDOC_HALT & m->flags));
107 	return(m->first);
108 }
109 
110 
111 const struct mdoc_meta *
112 mdoc_meta(const struct mdoc *m)
113 {
114 
115 	assert( ! (MDOC_HALT & m->flags));
116 	return(&m->meta);
117 }
118 
119 
120 /*
121  * Frees volatile resources (parse tree, meta-data, fields).
122  */
123 static void
124 mdoc_free1(struct mdoc *mdoc)
125 {
126 
127 	if (mdoc->first)
128 		mdoc_node_delete(mdoc, mdoc->first);
129 	if (mdoc->meta.title)
130 		free(mdoc->meta.title);
131 	if (mdoc->meta.os)
132 		free(mdoc->meta.os);
133 	if (mdoc->meta.name)
134 		free(mdoc->meta.name);
135 	if (mdoc->meta.arch)
136 		free(mdoc->meta.arch);
137 	if (mdoc->meta.vol)
138 		free(mdoc->meta.vol);
139 	if (mdoc->meta.msec)
140 		free(mdoc->meta.msec);
141 	if (mdoc->meta.date)
142 		free(mdoc->meta.date);
143 }
144 
145 
146 /*
147  * Allocate all volatile resources (parse tree, meta-data, fields).
148  */
149 static void
150 mdoc_alloc1(struct mdoc *mdoc)
151 {
152 
153 	memset(&mdoc->meta, 0, sizeof(struct mdoc_meta));
154 	mdoc->flags = 0;
155 	mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
156 	mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node));
157 	mdoc->first = mdoc->last;
158 	mdoc->last->type = MDOC_ROOT;
159 	mdoc->next = MDOC_NEXT_CHILD;
160 }
161 
162 
163 /*
164  * Free up volatile resources (see mdoc_free1()) then re-initialises the
165  * data with mdoc_alloc1().  After invocation, parse data has been reset
166  * and the parser is ready for re-invocation on a new tree; however,
167  * cross-parse non-volatile data is kept intact.
168  */
169 void
170 mdoc_reset(struct mdoc *mdoc)
171 {
172 
173 	mdoc_free1(mdoc);
174 	mdoc_alloc1(mdoc);
175 }
176 
177 
178 /*
179  * Completely free up all volatile and non-volatile parse resources.
180  * After invocation, the pointer is no longer usable.
181  */
182 void
183 mdoc_free(struct mdoc *mdoc)
184 {
185 
186 	mdoc_free1(mdoc);
187 	free(mdoc);
188 }
189 
190 
191 /*
192  * Allocate volatile and non-volatile parse resources.
193  */
194 struct mdoc *
195 mdoc_alloc(struct roff *roff, struct mparse *parse)
196 {
197 	struct mdoc	*p;
198 
199 	p = mandoc_calloc(1, sizeof(struct mdoc));
200 
201 	p->parse = parse;
202 	p->roff = roff;
203 
204 	mdoc_hash_init();
205 	mdoc_alloc1(p);
206 	return(p);
207 }
208 
209 
210 /*
211  * Climb back up the parse tree, validating open scopes.  Mostly calls
212  * through to macro_end() in macro.c.
213  */
214 int
215 mdoc_endparse(struct mdoc *m)
216 {
217 
218 	assert( ! (MDOC_HALT & m->flags));
219 	if (mdoc_macroend(m))
220 		return(1);
221 	m->flags |= MDOC_HALT;
222 	return(0);
223 }
224 
225 int
226 mdoc_addeqn(struct mdoc *m, const struct eqn *ep)
227 {
228 	struct mdoc_node *n;
229 
230 	assert( ! (MDOC_HALT & m->flags));
231 
232 	/* No text before an initial macro. */
233 
234 	if (SEC_NONE == m->lastnamed) {
235 		mdoc_pmsg(m, ep->ln, ep->pos, MANDOCERR_NOTEXT);
236 		return(1);
237 	}
238 
239 	n = node_alloc(m, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN);
240 	n->eqn = ep;
241 
242 	if ( ! node_append(m, n))
243 		return(0);
244 
245 	m->next = MDOC_NEXT_SIBLING;
246 	return(1);
247 }
248 
249 int
250 mdoc_addspan(struct mdoc *m, const struct tbl_span *sp)
251 {
252 	struct mdoc_node *n;
253 
254 	assert( ! (MDOC_HALT & m->flags));
255 
256 	/* No text before an initial macro. */
257 
258 	if (SEC_NONE == m->lastnamed) {
259 		mdoc_pmsg(m, sp->line, 0, MANDOCERR_NOTEXT);
260 		return(1);
261 	}
262 
263 	n = node_alloc(m, sp->line, 0, MDOC_MAX, MDOC_TBL);
264 	n->span = sp;
265 
266 	if ( ! node_append(m, n))
267 		return(0);
268 
269 	m->next = MDOC_NEXT_SIBLING;
270 	return(1);
271 }
272 
273 
274 /*
275  * Main parse routine.  Parses a single line -- really just hands off to
276  * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
277  */
278 int
279 mdoc_parseln(struct mdoc *m, int ln, char *buf, int offs)
280 {
281 
282 	assert( ! (MDOC_HALT & m->flags));
283 
284 	m->flags |= MDOC_NEWLINE;
285 
286 	/*
287 	 * Let the roff nS register switch SYNOPSIS mode early,
288 	 * such that the parser knows at all times
289 	 * whether this mode is on or off.
290 	 * Note that this mode is also switched by the Sh macro.
291 	 */
292 	if (roff_regisset(m->roff, REG_nS)) {
293 		if (roff_regget(m->roff, REG_nS))
294 			m->flags |= MDOC_SYNOPSIS;
295 		else
296 			m->flags &= ~MDOC_SYNOPSIS;
297 	}
298 
299 	return(mandoc_getcontrol(buf, &offs) ?
300 			mdoc_pmacro(m, ln, buf, offs) :
301 			mdoc_ptext(m, ln, buf, offs));
302 }
303 
304 int
305 mdoc_macro(MACRO_PROT_ARGS)
306 {
307 	assert(tok < MDOC_MAX);
308 
309 	/* If we're in the body, deny prologue calls. */
310 
311 	if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
312 			MDOC_PBODY & m->flags) {
313 		mdoc_pmsg(m, line, ppos, MANDOCERR_BADBODY);
314 		return(1);
315 	}
316 
317 	/* If we're in the prologue, deny "body" macros.  */
318 
319 	if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
320 			! (MDOC_PBODY & m->flags)) {
321 		mdoc_pmsg(m, line, ppos, MANDOCERR_BADPROLOG);
322 		if (NULL == m->meta.msec)
323 			m->meta.msec = mandoc_strdup("1");
324 		if (NULL == m->meta.title)
325 			m->meta.title = mandoc_strdup("UNKNOWN");
326 		if (NULL == m->meta.vol)
327 			m->meta.vol = mandoc_strdup("LOCAL");
328 		if (NULL == m->meta.os)
329 			m->meta.os = mandoc_strdup("LOCAL");
330 		if (NULL == m->meta.date)
331 			m->meta.date = mandoc_normdate
332 				(m->parse, NULL, line, ppos);
333 		m->flags |= MDOC_PBODY;
334 	}
335 
336 	return((*mdoc_macros[tok].fp)(m, tok, line, ppos, pos, buf));
337 }
338 
339 
340 static int
341 node_append(struct mdoc *mdoc, struct mdoc_node *p)
342 {
343 
344 	assert(mdoc->last);
345 	assert(mdoc->first);
346 	assert(MDOC_ROOT != p->type);
347 
348 	switch (mdoc->next) {
349 	case (MDOC_NEXT_SIBLING):
350 		mdoc->last->next = p;
351 		p->prev = mdoc->last;
352 		p->parent = mdoc->last->parent;
353 		break;
354 	case (MDOC_NEXT_CHILD):
355 		mdoc->last->child = p;
356 		p->parent = mdoc->last;
357 		break;
358 	default:
359 		abort();
360 		/* NOTREACHED */
361 	}
362 
363 	p->parent->nchild++;
364 
365 	/*
366 	 * Copy over the normalised-data pointer of our parent.  Not
367 	 * everybody has one, but copying a null pointer is fine.
368 	 */
369 
370 	switch (p->type) {
371 	case (MDOC_BODY):
372 		/* FALLTHROUGH */
373 	case (MDOC_TAIL):
374 		/* FALLTHROUGH */
375 	case (MDOC_HEAD):
376 		p->norm = p->parent->norm;
377 		break;
378 	default:
379 		break;
380 	}
381 
382 	if ( ! mdoc_valid_pre(mdoc, p))
383 		return(0);
384 
385 	switch (p->type) {
386 	case (MDOC_HEAD):
387 		assert(MDOC_BLOCK == p->parent->type);
388 		p->parent->head = p;
389 		break;
390 	case (MDOC_TAIL):
391 		assert(MDOC_BLOCK == p->parent->type);
392 		p->parent->tail = p;
393 		break;
394 	case (MDOC_BODY):
395 		if (p->end)
396 			break;
397 		assert(MDOC_BLOCK == p->parent->type);
398 		p->parent->body = p;
399 		break;
400 	default:
401 		break;
402 	}
403 
404 	mdoc->last = p;
405 
406 	switch (p->type) {
407 	case (MDOC_TBL):
408 		/* FALLTHROUGH */
409 	case (MDOC_TEXT):
410 		if ( ! mdoc_valid_post(mdoc))
411 			return(0);
412 		break;
413 	default:
414 		break;
415 	}
416 
417 	return(1);
418 }
419 
420 
421 static struct mdoc_node *
422 node_alloc(struct mdoc *m, int line, int pos,
423 		enum mdoct tok, enum mdoc_type type)
424 {
425 	struct mdoc_node *p;
426 
427 	p = mandoc_calloc(1, sizeof(struct mdoc_node));
428 	p->sec = m->lastsec;
429 	p->line = line;
430 	p->pos = pos;
431 	p->tok = tok;
432 	p->type = type;
433 
434 	/* Flag analysis. */
435 
436 	if (MDOC_SYNOPSIS & m->flags)
437 		p->flags |= MDOC_SYNPRETTY;
438 	else
439 		p->flags &= ~MDOC_SYNPRETTY;
440 	if (MDOC_NEWLINE & m->flags)
441 		p->flags |= MDOC_LINE;
442 	m->flags &= ~MDOC_NEWLINE;
443 
444 	return(p);
445 }
446 
447 
448 int
449 mdoc_tail_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
450 {
451 	struct mdoc_node *p;
452 
453 	p = node_alloc(m, line, pos, tok, MDOC_TAIL);
454 	if ( ! node_append(m, p))
455 		return(0);
456 	m->next = MDOC_NEXT_CHILD;
457 	return(1);
458 }
459 
460 
461 int
462 mdoc_head_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
463 {
464 	struct mdoc_node *p;
465 
466 	assert(m->first);
467 	assert(m->last);
468 
469 	p = node_alloc(m, line, pos, tok, MDOC_HEAD);
470 	if ( ! node_append(m, p))
471 		return(0);
472 	m->next = MDOC_NEXT_CHILD;
473 	return(1);
474 }
475 
476 
477 int
478 mdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
479 {
480 	struct mdoc_node *p;
481 
482 	p = node_alloc(m, line, pos, tok, MDOC_BODY);
483 	if ( ! node_append(m, p))
484 		return(0);
485 	m->next = MDOC_NEXT_CHILD;
486 	return(1);
487 }
488 
489 
490 int
491 mdoc_endbody_alloc(struct mdoc *m, int line, int pos, enum mdoct tok,
492 		struct mdoc_node *body, enum mdoc_endbody end)
493 {
494 	struct mdoc_node *p;
495 
496 	p = node_alloc(m, line, pos, tok, MDOC_BODY);
497 	p->pending = body;
498 	p->end = end;
499 	if ( ! node_append(m, p))
500 		return(0);
501 	m->next = MDOC_NEXT_SIBLING;
502 	return(1);
503 }
504 
505 
506 int
507 mdoc_block_alloc(struct mdoc *m, int line, int pos,
508 		enum mdoct tok, struct mdoc_arg *args)
509 {
510 	struct mdoc_node *p;
511 
512 	p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
513 	p->args = args;
514 	if (p->args)
515 		(args->refcnt)++;
516 
517 	switch (tok) {
518 	case (MDOC_Bd):
519 		/* FALLTHROUGH */
520 	case (MDOC_Bf):
521 		/* FALLTHROUGH */
522 	case (MDOC_Bl):
523 		/* FALLTHROUGH */
524 	case (MDOC_Rs):
525 		p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
526 		break;
527 	default:
528 		break;
529 	}
530 
531 	if ( ! node_append(m, p))
532 		return(0);
533 	m->next = MDOC_NEXT_CHILD;
534 	return(1);
535 }
536 
537 
538 int
539 mdoc_elem_alloc(struct mdoc *m, int line, int pos,
540 		enum mdoct tok, struct mdoc_arg *args)
541 {
542 	struct mdoc_node *p;
543 
544 	p = node_alloc(m, line, pos, tok, MDOC_ELEM);
545 	p->args = args;
546 	if (p->args)
547 		(args->refcnt)++;
548 
549 	switch (tok) {
550 	case (MDOC_An):
551 		p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
552 		break;
553 	default:
554 		break;
555 	}
556 
557 	if ( ! node_append(m, p))
558 		return(0);
559 	m->next = MDOC_NEXT_CHILD;
560 	return(1);
561 }
562 
563 int
564 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
565 {
566 	struct mdoc_node *n;
567 
568 	n = node_alloc(m, line, pos, MDOC_MAX, MDOC_TEXT);
569 	n->string = roff_strdup(m->roff, p);
570 
571 	if ( ! node_append(m, n))
572 		return(0);
573 
574 	m->next = MDOC_NEXT_SIBLING;
575 	return(1);
576 }
577 
578 
579 static void
580 mdoc_node_free(struct mdoc_node *p)
581 {
582 
583 	if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type)
584 		free(p->norm);
585 	if (p->string)
586 		free(p->string);
587 	if (p->args)
588 		mdoc_argv_free(p->args);
589 	free(p);
590 }
591 
592 
593 static void
594 mdoc_node_unlink(struct mdoc *m, struct mdoc_node *n)
595 {
596 
597 	/* Adjust siblings. */
598 
599 	if (n->prev)
600 		n->prev->next = n->next;
601 	if (n->next)
602 		n->next->prev = n->prev;
603 
604 	/* Adjust parent. */
605 
606 	if (n->parent) {
607 		n->parent->nchild--;
608 		if (n->parent->child == n)
609 			n->parent->child = n->prev ? n->prev : n->next;
610 		if (n->parent->last == n)
611 			n->parent->last = n->prev ? n->prev : NULL;
612 	}
613 
614 	/* Adjust parse point, if applicable. */
615 
616 	if (m && m->last == n) {
617 		if (n->prev) {
618 			m->last = n->prev;
619 			m->next = MDOC_NEXT_SIBLING;
620 		} else {
621 			m->last = n->parent;
622 			m->next = MDOC_NEXT_CHILD;
623 		}
624 	}
625 
626 	if (m && m->first == n)
627 		m->first = NULL;
628 }
629 
630 
631 void
632 mdoc_node_delete(struct mdoc *m, struct mdoc_node *p)
633 {
634 
635 	while (p->child) {
636 		assert(p->nchild);
637 		mdoc_node_delete(m, p->child);
638 	}
639 	assert(0 == p->nchild);
640 
641 	mdoc_node_unlink(m, p);
642 	mdoc_node_free(p);
643 }
644 
645 #if 0
646 /*
647  * Pre-treat a text line.
648  * Text lines can consist of equations, which must be handled apart from
649  * the regular text.
650  * Thus, use this function to step through a line checking if it has any
651  * equations embedded in it.
652  * This must handle multiple equations AND equations that do not end at
653  * the end-of-line, i.e., will re-enter in the next roff parse.
654  */
655 static int
656 mdoc_preptext(struct mdoc *m, int line, char *buf, int offs)
657 {
658 	char		*start, *end;
659 	char		 delim;
660 
661 	while ('\0' != buf[offs]) {
662 		/* Mark starting position if eqn is set. */
663 		start = NULL;
664 		if ('\0' != (delim = roff_eqndelim(m->roff)))
665 			if (NULL != (start = strchr(buf + offs, delim)))
666 				*start++ = '\0';
667 
668 		/* Parse text as normal. */
669 		if ( ! mdoc_ptext(m, line, buf, offs))
670 			return(0);
671 
672 		/* Continue only if an equation exists. */
673 		if (NULL == start)
674 			break;
675 
676 		/* Read past the end of the equation. */
677 		offs += start - (buf + offs);
678 		assert(start == &buf[offs]);
679 		if (NULL != (end = strchr(buf + offs, delim))) {
680 			*end++ = '\0';
681 			while (' ' == *end)
682 				end++;
683 		}
684 
685 		/* Parse the equation itself. */
686 		roff_openeqn(m->roff, NULL, line, offs, buf);
687 
688 		/* Process a finished equation? */
689 		if (roff_closeeqn(m->roff))
690 			if ( ! mdoc_addeqn(m, roff_eqn(m->roff)))
691 				return(0);
692 		offs += (end - (buf + offs));
693 	}
694 
695 	return(1);
696 }
697 #endif
698 
699 /*
700  * Parse free-form text, that is, a line that does not begin with the
701  * control character.
702  */
703 static int
704 mdoc_ptext(struct mdoc *m, int line, char *buf, int offs)
705 {
706 	char		 *c, *ws, *end;
707 	struct mdoc_node *n;
708 
709 	/* No text before an initial macro. */
710 
711 	if (SEC_NONE == m->lastnamed) {
712 		mdoc_pmsg(m, line, offs, MANDOCERR_NOTEXT);
713 		return(1);
714 	}
715 
716 	assert(m->last);
717 	n = m->last;
718 
719 	/*
720 	 * Divert directly to list processing if we're encountering a
721 	 * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry
722 	 * (a MDOC_BODY means it's already open, in which case we should
723 	 * process within its context in the normal way).
724 	 */
725 
726 	if (MDOC_Bl == n->tok && MDOC_BODY == n->type &&
727 			LIST_column == n->norm->Bl.type) {
728 		/* `Bl' is open without any children. */
729 		m->flags |= MDOC_FREECOL;
730 		return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf));
731 	}
732 
733 	if (MDOC_It == n->tok && MDOC_BLOCK == n->type &&
734 			NULL != n->parent &&
735 			MDOC_Bl == n->parent->tok &&
736 			LIST_column == n->parent->norm->Bl.type) {
737 		/* `Bl' has block-level `It' children. */
738 		m->flags |= MDOC_FREECOL;
739 		return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf));
740 	}
741 
742 	/*
743 	 * Search for the beginning of unescaped trailing whitespace (ws)
744 	 * and for the first character not to be output (end).
745 	 */
746 
747 	/* FIXME: replace with strcspn(). */
748 	ws = NULL;
749 	for (c = end = buf + offs; *c; c++) {
750 		switch (*c) {
751 		case ' ':
752 			if (NULL == ws)
753 				ws = c;
754 			continue;
755 		case '\t':
756 			/*
757 			 * Always warn about trailing tabs,
758 			 * even outside literal context,
759 			 * where they should be put on the next line.
760 			 */
761 			if (NULL == ws)
762 				ws = c;
763 			/*
764 			 * Strip trailing tabs in literal context only;
765 			 * outside, they affect the next line.
766 			 */
767 			if (MDOC_LITERAL & m->flags)
768 				continue;
769 			break;
770 		case '\\':
771 			/* Skip the escaped character, too, if any. */
772 			if (c[1])
773 				c++;
774 			/* FALLTHROUGH */
775 		default:
776 			ws = NULL;
777 			break;
778 		}
779 		end = c + 1;
780 	}
781 	*end = '\0';
782 
783 	if (ws)
784 		mdoc_pmsg(m, line, (int)(ws-buf), MANDOCERR_EOLNSPACE);
785 
786 	if ('\0' == buf[offs] && ! (MDOC_LITERAL & m->flags)) {
787 		mdoc_pmsg(m, line, (int)(c-buf), MANDOCERR_NOBLANKLN);
788 
789 		/*
790 		 * Insert a `sp' in the case of a blank line.  Technically,
791 		 * blank lines aren't allowed, but enough manuals assume this
792 		 * behaviour that we want to work around it.
793 		 */
794 		if ( ! mdoc_elem_alloc(m, line, offs, MDOC_sp, NULL))
795 			return(0);
796 
797 		m->next = MDOC_NEXT_SIBLING;
798 		return(1);
799 	}
800 
801 	if ( ! mdoc_word_alloc(m, line, offs, buf+offs))
802 		return(0);
803 
804 	if (MDOC_LITERAL & m->flags)
805 		return(1);
806 
807 	/*
808 	 * End-of-sentence check.  If the last character is an unescaped
809 	 * EOS character, then flag the node as being the end of a
810 	 * sentence.  The front-end will know how to interpret this.
811 	 */
812 
813 	assert(buf < end);
814 
815 	if (mandoc_eos(buf+offs, (size_t)(end-buf-offs), 0))
816 		m->last->flags |= MDOC_EOS;
817 
818 	return(1);
819 }
820 
821 
822 /*
823  * Parse a macro line, that is, a line beginning with the control
824  * character.
825  */
826 static int
827 mdoc_pmacro(struct mdoc *m, int ln, char *buf, int offs)
828 {
829 	enum mdoct	  tok;
830 	int		  i, sv;
831 	char		  mac[5];
832 	struct mdoc_node *n;
833 
834 	/* Empty post-control lines are ignored. */
835 
836 	if ('"' == buf[offs]) {
837 		mdoc_pmsg(m, ln, offs, MANDOCERR_BADCOMMENT);
838 		return(1);
839 	} else if ('\0' == buf[offs])
840 		return(1);
841 
842 	sv = offs;
843 
844 	/*
845 	 * Copy the first word into a nil-terminated buffer.
846 	 * Stop copying when a tab, space, or eoln is encountered.
847 	 */
848 
849 	i = 0;
850 	while (i < 4 && '\0' != buf[offs] &&
851 			' ' != buf[offs] && '\t' != buf[offs])
852 		mac[i++] = buf[offs++];
853 
854 	mac[i] = '\0';
855 
856 	tok = (i > 1 || i < 4) ? mdoc_hash_find(mac) : MDOC_MAX;
857 
858 	if (MDOC_MAX == tok) {
859 		mandoc_vmsg(MANDOCERR_MACRO, m->parse,
860 				ln, sv, "%s", buf + sv - 1);
861 		return(1);
862 	}
863 
864 	/* Disregard the first trailing tab, if applicable. */
865 
866 	if ('\t' == buf[offs])
867 		offs++;
868 
869 	/* Jump to the next non-whitespace word. */
870 
871 	while (buf[offs] && ' ' == buf[offs])
872 		offs++;
873 
874 	/*
875 	 * Trailing whitespace.  Note that tabs are allowed to be passed
876 	 * into the parser as "text", so we only warn about spaces here.
877 	 */
878 
879 	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
880 		mdoc_pmsg(m, ln, offs - 1, MANDOCERR_EOLNSPACE);
881 
882 	/*
883 	 * If an initial macro or a list invocation, divert directly
884 	 * into macro processing.
885 	 */
886 
887 	if (NULL == m->last || MDOC_It == tok || MDOC_El == tok) {
888 		if ( ! mdoc_macro(m, tok, ln, sv, &offs, buf))
889 			goto err;
890 		return(1);
891 	}
892 
893 	n = m->last;
894 	assert(m->last);
895 
896 	/*
897 	 * If the first macro of a `Bl -column', open an `It' block
898 	 * context around the parsed macro.
899 	 */
900 
901 	if (MDOC_Bl == n->tok && MDOC_BODY == n->type &&
902 			LIST_column == n->norm->Bl.type) {
903 		m->flags |= MDOC_FREECOL;
904 		if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf))
905 			goto err;
906 		return(1);
907 	}
908 
909 	/*
910 	 * If we're following a block-level `It' within a `Bl -column'
911 	 * context (perhaps opened in the above block or in ptext()),
912 	 * then open an `It' block context around the parsed macro.
913 	 */
914 
915 	if (MDOC_It == n->tok && MDOC_BLOCK == n->type &&
916 			NULL != n->parent &&
917 			MDOC_Bl == n->parent->tok &&
918 			LIST_column == n->parent->norm->Bl.type) {
919 		m->flags |= MDOC_FREECOL;
920 		if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf))
921 			goto err;
922 		return(1);
923 	}
924 
925 	/* Normal processing of a macro. */
926 
927 	if ( ! mdoc_macro(m, tok, ln, sv, &offs, buf))
928 		goto err;
929 
930 	return(1);
931 
932 err:	/* Error out. */
933 
934 	m->flags |= MDOC_HALT;
935 	return(0);
936 }
937 
938 enum mdelim
939 mdoc_isdelim(const char *p)
940 {
941 
942 	if ('\0' == p[0])
943 		return(DELIM_NONE);
944 
945 	if ('\0' == p[1])
946 		switch (p[0]) {
947 		case('('):
948 			/* FALLTHROUGH */
949 		case('['):
950 			return(DELIM_OPEN);
951 		case('|'):
952 			return(DELIM_MIDDLE);
953 		case('.'):
954 			/* FALLTHROUGH */
955 		case(','):
956 			/* FALLTHROUGH */
957 		case(';'):
958 			/* FALLTHROUGH */
959 		case(':'):
960 			/* FALLTHROUGH */
961 		case('?'):
962 			/* FALLTHROUGH */
963 		case('!'):
964 			/* FALLTHROUGH */
965 		case(')'):
966 			/* FALLTHROUGH */
967 		case(']'):
968 			return(DELIM_CLOSE);
969 		default:
970 			return(DELIM_NONE);
971 		}
972 
973 	if ('\\' != p[0])
974 		return(DELIM_NONE);
975 
976 	if (0 == strcmp(p + 1, "."))
977 		return(DELIM_CLOSE);
978 	if (0 == strcmp(p + 1, "*(Ba"))
979 		return(DELIM_MIDDLE);
980 
981 	return(DELIM_NONE);
982 }
983