xref: /openbsd-src/usr.bin/mandoc/mdoc.c (revision 5ad04d351680822078003e2b066cfc9680d6157d)
1 /*	$Id: mdoc.c,v 1.104 2014/04/25 14:10:59 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <ctype.h>
22 #include <stdarg.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <time.h>
27 
28 #include "mdoc.h"
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "libmdoc.h"
32 #include "libmandoc.h"
33 
34 const	char *const __mdoc_macronames[MDOC_MAX] = {
35 	"Ap",		"Dd",		"Dt",		"Os",
36 	"Sh",		"Ss",		"Pp",		"D1",
37 	"Dl",		"Bd",		"Ed",		"Bl",
38 	"El",		"It",		"Ad",		"An",
39 	"Ar",		"Cd",		"Cm",		"Dv",
40 	"Er",		"Ev",		"Ex",		"Fa",
41 	"Fd",		"Fl",		"Fn",		"Ft",
42 	"Ic",		"In",		"Li",		"Nd",
43 	"Nm",		"Op",		"Ot",		"Pa",
44 	"Rv",		"St",		"Va",		"Vt",
45 	"Xr",		"%A",		"%B",		"%D",
46 	"%I",		"%J",		"%N",		"%O",
47 	"%P",		"%R",		"%T",		"%V",
48 	"Ac",		"Ao",		"Aq",		"At",
49 	"Bc",		"Bf",		"Bo",		"Bq",
50 	"Bsx",		"Bx",		"Db",		"Dc",
51 	"Do",		"Dq",		"Ec",		"Ef",
52 	"Em",		"Eo",		"Fx",		"Ms",
53 	"No",		"Ns",		"Nx",		"Ox",
54 	"Pc",		"Pf",		"Po",		"Pq",
55 	"Qc",		"Ql",		"Qo",		"Qq",
56 	"Re",		"Rs",		"Sc",		"So",
57 	"Sq",		"Sm",		"Sx",		"Sy",
58 	"Tn",		"Ux",		"Xc",		"Xo",
59 	"Fo",		"Fc",		"Oo",		"Oc",
60 	"Bk",		"Ek",		"Bt",		"Hf",
61 	"Fr",		"Ud",		"Lb",		"Lp",
62 	"Lk",		"Mt",		"Brq",		"Bro",
63 	"Brc",		"%C",		"Es",		"En",
64 	"Dx",		"%Q",		"br",		"sp",
65 	"%U",		"Ta",		"ll",
66 	};
67 
68 const	char *const __mdoc_argnames[MDOC_ARG_MAX] = {
69 	"split",		"nosplit",		"ragged",
70 	"unfilled",		"literal",		"file",
71 	"offset",		"bullet",		"dash",
72 	"hyphen",		"item",			"enum",
73 	"tag",			"diag",			"hang",
74 	"ohang",		"inset",		"column",
75 	"width",		"compact",		"std",
76 	"filled",		"words",		"emphasis",
77 	"symbolic",		"nested",		"centered"
78 	};
79 
80 const	char * const *mdoc_macronames = __mdoc_macronames;
81 const	char * const *mdoc_argnames = __mdoc_argnames;
82 
83 static	void		  mdoc_node_free(struct mdoc_node *);
84 static	void		  mdoc_node_unlink(struct mdoc *,
85 				struct mdoc_node *);
86 static	void		  mdoc_free1(struct mdoc *);
87 static	void		  mdoc_alloc1(struct mdoc *);
88 static	struct mdoc_node *node_alloc(struct mdoc *, int, int,
89 				enum mdoct, enum mdoc_type);
90 static	int		  node_append(struct mdoc *,
91 				struct mdoc_node *);
92 #if 0
93 static	int		  mdoc_preptext(struct mdoc *, int, char *, int);
94 #endif
95 static	int		  mdoc_ptext(struct mdoc *, int, char *, int);
96 static	int		  mdoc_pmacro(struct mdoc *, int, char *, int);
97 
98 
99 const struct mdoc_node *
100 mdoc_node(const struct mdoc *mdoc)
101 {
102 
103 	assert( ! (MDOC_HALT & mdoc->flags));
104 	return(mdoc->first);
105 }
106 
107 const struct mdoc_meta *
108 mdoc_meta(const struct mdoc *mdoc)
109 {
110 
111 	assert( ! (MDOC_HALT & mdoc->flags));
112 	return(&mdoc->meta);
113 }
114 
115 /*
116  * Frees volatile resources (parse tree, meta-data, fields).
117  */
118 static void
119 mdoc_free1(struct mdoc *mdoc)
120 {
121 
122 	if (mdoc->first)
123 		mdoc_node_delete(mdoc, mdoc->first);
124 	if (mdoc->meta.title)
125 		free(mdoc->meta.title);
126 	if (mdoc->meta.os)
127 		free(mdoc->meta.os);
128 	if (mdoc->meta.name)
129 		free(mdoc->meta.name);
130 	if (mdoc->meta.arch)
131 		free(mdoc->meta.arch);
132 	if (mdoc->meta.vol)
133 		free(mdoc->meta.vol);
134 	if (mdoc->meta.msec)
135 		free(mdoc->meta.msec);
136 	if (mdoc->meta.date)
137 		free(mdoc->meta.date);
138 }
139 
140 /*
141  * Allocate all volatile resources (parse tree, meta-data, fields).
142  */
143 static void
144 mdoc_alloc1(struct mdoc *mdoc)
145 {
146 
147 	memset(&mdoc->meta, 0, sizeof(struct mdoc_meta));
148 	mdoc->flags = 0;
149 	mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
150 	mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node));
151 	mdoc->first = mdoc->last;
152 	mdoc->last->type = MDOC_ROOT;
153 	mdoc->last->tok = MDOC_MAX;
154 	mdoc->next = MDOC_NEXT_CHILD;
155 }
156 
157 /*
158  * Free up volatile resources (see mdoc_free1()) then re-initialises the
159  * data with mdoc_alloc1().  After invocation, parse data has been reset
160  * and the parser is ready for re-invocation on a new tree; however,
161  * cross-parse non-volatile data is kept intact.
162  */
163 void
164 mdoc_reset(struct mdoc *mdoc)
165 {
166 
167 	mdoc_free1(mdoc);
168 	mdoc_alloc1(mdoc);
169 }
170 
171 /*
172  * Completely free up all volatile and non-volatile parse resources.
173  * After invocation, the pointer is no longer usable.
174  */
175 void
176 mdoc_free(struct mdoc *mdoc)
177 {
178 
179 	mdoc_free1(mdoc);
180 	free(mdoc);
181 }
182 
183 /*
184  * Allocate volatile and non-volatile parse resources.
185  */
186 struct mdoc *
187 mdoc_alloc(struct roff *roff, struct mparse *parse,
188 	char *defos, int quick)
189 {
190 	struct mdoc	*p;
191 
192 	p = mandoc_calloc(1, sizeof(struct mdoc));
193 
194 	p->parse = parse;
195 	p->defos = defos;
196 	p->quick = quick;
197 	p->roff = roff;
198 
199 	mdoc_hash_init();
200 	mdoc_alloc1(p);
201 	return(p);
202 }
203 
204 /*
205  * Climb back up the parse tree, validating open scopes.  Mostly calls
206  * through to macro_end() in macro.c.
207  */
208 int
209 mdoc_endparse(struct mdoc *mdoc)
210 {
211 
212 	assert( ! (MDOC_HALT & mdoc->flags));
213 	if (mdoc_macroend(mdoc))
214 		return(1);
215 	mdoc->flags |= MDOC_HALT;
216 	return(0);
217 }
218 
219 int
220 mdoc_addeqn(struct mdoc *mdoc, const struct eqn *ep)
221 {
222 	struct mdoc_node *n;
223 
224 	assert( ! (MDOC_HALT & mdoc->flags));
225 
226 	/* No text before an initial macro. */
227 
228 	if (SEC_NONE == mdoc->lastnamed) {
229 		mdoc_pmsg(mdoc, ep->ln, ep->pos, MANDOCERR_NOTEXT);
230 		return(1);
231 	}
232 
233 	n = node_alloc(mdoc, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN);
234 	n->eqn = ep;
235 
236 	if ( ! node_append(mdoc, n))
237 		return(0);
238 
239 	mdoc->next = MDOC_NEXT_SIBLING;
240 	return(1);
241 }
242 
243 int
244 mdoc_addspan(struct mdoc *mdoc, const struct tbl_span *sp)
245 {
246 	struct mdoc_node *n;
247 
248 	assert( ! (MDOC_HALT & mdoc->flags));
249 
250 	/* No text before an initial macro. */
251 
252 	if (SEC_NONE == mdoc->lastnamed) {
253 		mdoc_pmsg(mdoc, sp->line, 0, MANDOCERR_NOTEXT);
254 		return(1);
255 	}
256 
257 	n = node_alloc(mdoc, sp->line, 0, MDOC_MAX, MDOC_TBL);
258 	n->span = sp;
259 
260 	if ( ! node_append(mdoc, n))
261 		return(0);
262 
263 	mdoc->next = MDOC_NEXT_SIBLING;
264 	return(1);
265 }
266 
267 /*
268  * Main parse routine.  Parses a single line -- really just hands off to
269  * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
270  */
271 int
272 mdoc_parseln(struct mdoc *mdoc, int ln, char *buf, int offs)
273 {
274 
275 	assert( ! (MDOC_HALT & mdoc->flags));
276 
277 	mdoc->flags |= MDOC_NEWLINE;
278 
279 	/*
280 	 * Let the roff nS register switch SYNOPSIS mode early,
281 	 * such that the parser knows at all times
282 	 * whether this mode is on or off.
283 	 * Note that this mode is also switched by the Sh macro.
284 	 */
285 	if (roff_getreg(mdoc->roff, "nS"))
286 		mdoc->flags |= MDOC_SYNOPSIS;
287 	else
288 		mdoc->flags &= ~MDOC_SYNOPSIS;
289 
290 	return(roff_getcontrol(mdoc->roff, buf, &offs) ?
291 	    mdoc_pmacro(mdoc, ln, buf, offs) :
292 	    mdoc_ptext(mdoc, ln, buf, offs));
293 }
294 
295 int
296 mdoc_macro(MACRO_PROT_ARGS)
297 {
298 	assert(tok < MDOC_MAX);
299 
300 	/* If we're in the body, deny prologue calls. */
301 
302 	if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
303 	    MDOC_PBODY & mdoc->flags) {
304 		mdoc_pmsg(mdoc, line, ppos, MANDOCERR_BADBODY);
305 		return(1);
306 	}
307 
308 	/* If we're in the prologue, deny "body" macros.  */
309 
310 	if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
311 	     ! (MDOC_PBODY & mdoc->flags)) {
312 		mdoc_pmsg(mdoc, line, ppos, MANDOCERR_BADPROLOG);
313 		if (NULL == mdoc->meta.msec)
314 			mdoc->meta.msec = mandoc_strdup("1");
315 		if (NULL == mdoc->meta.title)
316 			mdoc->meta.title = mandoc_strdup("UNKNOWN");
317 		if (NULL == mdoc->meta.vol)
318 			mdoc->meta.vol = mandoc_strdup("LOCAL");
319 		if (NULL == mdoc->meta.os)
320 			mdoc->meta.os = mandoc_strdup("LOCAL");
321 		if (NULL == mdoc->meta.date)
322 			mdoc->meta.date = mandoc_normdate
323 				(mdoc->parse, NULL, line, ppos);
324 		mdoc->flags |= MDOC_PBODY;
325 	}
326 
327 	return((*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf));
328 }
329 
330 
331 static int
332 node_append(struct mdoc *mdoc, struct mdoc_node *p)
333 {
334 
335 	assert(mdoc->last);
336 	assert(mdoc->first);
337 	assert(MDOC_ROOT != p->type);
338 
339 	switch (mdoc->next) {
340 	case MDOC_NEXT_SIBLING:
341 		mdoc->last->next = p;
342 		p->prev = mdoc->last;
343 		p->parent = mdoc->last->parent;
344 		break;
345 	case MDOC_NEXT_CHILD:
346 		mdoc->last->child = p;
347 		p->parent = mdoc->last;
348 		break;
349 	default:
350 		abort();
351 		/* NOTREACHED */
352 	}
353 
354 	p->parent->nchild++;
355 
356 	/*
357 	 * Copy over the normalised-data pointer of our parent.  Not
358 	 * everybody has one, but copying a null pointer is fine.
359 	 */
360 
361 	switch (p->type) {
362 	case MDOC_BODY:
363 		if (ENDBODY_NOT != p->end)
364 			break;
365 		/* FALLTHROUGH */
366 	case MDOC_TAIL:
367 		/* FALLTHROUGH */
368 	case MDOC_HEAD:
369 		p->norm = p->parent->norm;
370 		break;
371 	default:
372 		break;
373 	}
374 
375 	if ( ! mdoc_valid_pre(mdoc, p))
376 		return(0);
377 
378 	switch (p->type) {
379 	case MDOC_HEAD:
380 		assert(MDOC_BLOCK == p->parent->type);
381 		p->parent->head = p;
382 		break;
383 	case MDOC_TAIL:
384 		assert(MDOC_BLOCK == p->parent->type);
385 		p->parent->tail = p;
386 		break;
387 	case MDOC_BODY:
388 		if (p->end)
389 			break;
390 		assert(MDOC_BLOCK == p->parent->type);
391 		p->parent->body = p;
392 		break;
393 	default:
394 		break;
395 	}
396 
397 	mdoc->last = p;
398 
399 	switch (p->type) {
400 	case MDOC_TBL:
401 		/* FALLTHROUGH */
402 	case MDOC_TEXT:
403 		if ( ! mdoc_valid_post(mdoc))
404 			return(0);
405 		break;
406 	default:
407 		break;
408 	}
409 
410 	return(1);
411 }
412 
413 static struct mdoc_node *
414 node_alloc(struct mdoc *mdoc, int line, int pos,
415 		enum mdoct tok, enum mdoc_type type)
416 {
417 	struct mdoc_node *p;
418 
419 	p = mandoc_calloc(1, sizeof(struct mdoc_node));
420 	p->sec = mdoc->lastsec;
421 	p->line = line;
422 	p->pos = pos;
423 	p->lastline = line;
424 	p->tok = tok;
425 	p->type = type;
426 
427 	/* Flag analysis. */
428 
429 	if (MDOC_SYNOPSIS & mdoc->flags)
430 		p->flags |= MDOC_SYNPRETTY;
431 	else
432 		p->flags &= ~MDOC_SYNPRETTY;
433 	if (MDOC_NEWLINE & mdoc->flags)
434 		p->flags |= MDOC_LINE;
435 	mdoc->flags &= ~MDOC_NEWLINE;
436 
437 	return(p);
438 }
439 
440 int
441 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok)
442 {
443 	struct mdoc_node *p;
444 
445 	p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL);
446 	if ( ! node_append(mdoc, p))
447 		return(0);
448 	mdoc->next = MDOC_NEXT_CHILD;
449 	return(1);
450 }
451 
452 int
453 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok)
454 {
455 	struct mdoc_node *p;
456 
457 	assert(mdoc->first);
458 	assert(mdoc->last);
459 
460 	p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD);
461 	if ( ! node_append(mdoc, p))
462 		return(0);
463 	mdoc->next = MDOC_NEXT_CHILD;
464 	return(1);
465 }
466 
467 int
468 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok)
469 {
470 	struct mdoc_node *p;
471 
472 	p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
473 	if ( ! node_append(mdoc, p))
474 		return(0);
475 	mdoc->next = MDOC_NEXT_CHILD;
476 	return(1);
477 }
478 
479 int
480 mdoc_endbody_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok,
481 		struct mdoc_node *body, enum mdoc_endbody end)
482 {
483 	struct mdoc_node *p;
484 
485 	p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
486 	p->pending = body;
487 	p->norm = body->norm;
488 	p->end = end;
489 	if ( ! node_append(mdoc, p))
490 		return(0);
491 	mdoc->next = MDOC_NEXT_SIBLING;
492 	return(1);
493 }
494 
495 int
496 mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
497 		enum mdoct tok, struct mdoc_arg *args)
498 {
499 	struct mdoc_node *p;
500 
501 	p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK);
502 	p->args = args;
503 	if (p->args)
504 		(args->refcnt)++;
505 
506 	switch (tok) {
507 	case MDOC_Bd:
508 		/* FALLTHROUGH */
509 	case MDOC_Bf:
510 		/* FALLTHROUGH */
511 	case MDOC_Bl:
512 		/* FALLTHROUGH */
513 	case MDOC_Rs:
514 		p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
515 		break;
516 	default:
517 		break;
518 	}
519 
520 	if ( ! node_append(mdoc, p))
521 		return(0);
522 	mdoc->next = MDOC_NEXT_CHILD;
523 	return(1);
524 }
525 
526 int
527 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
528 		enum mdoct tok, struct mdoc_arg *args)
529 {
530 	struct mdoc_node *p;
531 
532 	p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM);
533 	p->args = args;
534 	if (p->args)
535 		(args->refcnt)++;
536 
537 	switch (tok) {
538 	case MDOC_An:
539 		p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
540 		break;
541 	default:
542 		break;
543 	}
544 
545 	if ( ! node_append(mdoc, p))
546 		return(0);
547 	mdoc->next = MDOC_NEXT_CHILD;
548 	return(1);
549 }
550 
551 int
552 mdoc_word_alloc(struct mdoc *mdoc, int line, int pos, const char *p)
553 {
554 	struct mdoc_node *n;
555 
556 	n = node_alloc(mdoc, line, pos, MDOC_MAX, MDOC_TEXT);
557 	n->string = roff_strdup(mdoc->roff, p);
558 
559 	if ( ! node_append(mdoc, n))
560 		return(0);
561 
562 	mdoc->next = MDOC_NEXT_SIBLING;
563 	return(1);
564 }
565 
566 void
567 mdoc_word_append(struct mdoc *mdoc, const char *p)
568 {
569 	struct mdoc_node	*n;
570 	char			*addstr, *newstr;
571 
572 	n = mdoc->last;
573 	addstr = roff_strdup(mdoc->roff, p);
574 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
575 	free(addstr);
576 	free(n->string);
577 	n->string = newstr;
578 	mdoc->next = MDOC_NEXT_SIBLING;
579 }
580 
581 static void
582 mdoc_node_free(struct mdoc_node *p)
583 {
584 
585 	if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type)
586 		free(p->norm);
587 	if (p->string)
588 		free(p->string);
589 	if (p->args)
590 		mdoc_argv_free(p->args);
591 	free(p);
592 }
593 
594 static void
595 mdoc_node_unlink(struct mdoc *mdoc, struct mdoc_node *n)
596 {
597 
598 	/* Adjust siblings. */
599 
600 	if (n->prev)
601 		n->prev->next = n->next;
602 	if (n->next)
603 		n->next->prev = n->prev;
604 
605 	/* Adjust parent. */
606 
607 	if (n->parent) {
608 		n->parent->nchild--;
609 		if (n->parent->child == n)
610 			n->parent->child = n->prev ? n->prev : n->next;
611 		if (n->parent->last == n)
612 			n->parent->last = n->prev ? n->prev : NULL;
613 	}
614 
615 	/* Adjust parse point, if applicable. */
616 
617 	if (mdoc && mdoc->last == n) {
618 		if (n->prev) {
619 			mdoc->last = n->prev;
620 			mdoc->next = MDOC_NEXT_SIBLING;
621 		} else {
622 			mdoc->last = n->parent;
623 			mdoc->next = MDOC_NEXT_CHILD;
624 		}
625 	}
626 
627 	if (mdoc && mdoc->first == n)
628 		mdoc->first = NULL;
629 }
630 
631 void
632 mdoc_node_delete(struct mdoc *mdoc, struct mdoc_node *p)
633 {
634 
635 	while (p->child) {
636 		assert(p->nchild);
637 		mdoc_node_delete(mdoc, p->child);
638 	}
639 	assert(0 == p->nchild);
640 
641 	mdoc_node_unlink(mdoc, p);
642 	mdoc_node_free(p);
643 }
644 
645 int
646 mdoc_node_relink(struct mdoc *mdoc, struct mdoc_node *p)
647 {
648 
649 	mdoc_node_unlink(mdoc, p);
650 	return(node_append(mdoc, p));
651 }
652 
653 #if 0
654 /*
655  * Pre-treat a text line.
656  * Text lines can consist of equations, which must be handled apart from
657  * the regular text.
658  * Thus, use this function to step through a line checking if it has any
659  * equations embedded in it.
660  * This must handle multiple equations AND equations that do not end at
661  * the end-of-line, i.e., will re-enter in the next roff parse.
662  */
663 static int
664 mdoc_preptext(struct mdoc *mdoc, int line, char *buf, int offs)
665 {
666 	char		*start, *end;
667 	char		 delim;
668 
669 	while ('\0' != buf[offs]) {
670 		/* Mark starting position if eqn is set. */
671 		start = NULL;
672 		if ('\0' != (delim = roff_eqndelim(mdoc->roff)))
673 			if (NULL != (start = strchr(buf + offs, delim)))
674 				*start++ = '\0';
675 
676 		/* Parse text as normal. */
677 		if ( ! mdoc_ptext(mdoc, line, buf, offs))
678 			return(0);
679 
680 		/* Continue only if an equation exists. */
681 		if (NULL == start)
682 			break;
683 
684 		/* Read past the end of the equation. */
685 		offs += start - (buf + offs);
686 		assert(start == &buf[offs]);
687 		if (NULL != (end = strchr(buf + offs, delim))) {
688 			*end++ = '\0';
689 			while (' ' == *end)
690 				end++;
691 		}
692 
693 		/* Parse the equation itself. */
694 		roff_openeqn(mdoc->roff, NULL, line, offs, buf);
695 
696 		/* Process a finished equation? */
697 		if (roff_closeeqn(mdoc->roff))
698 			if ( ! mdoc_addeqn(mdoc, roff_eqn(mdoc->roff)))
699 				return(0);
700 		offs += (end - (buf + offs));
701 	}
702 
703 	return(1);
704 }
705 #endif
706 
707 /*
708  * Parse free-form text, that is, a line that does not begin with the
709  * control character.
710  */
711 static int
712 mdoc_ptext(struct mdoc *mdoc, int line, char *buf, int offs)
713 {
714 	char		 *c, *ws, *end;
715 	struct mdoc_node *n;
716 
717 	/* No text before an initial macro. */
718 
719 	if (SEC_NONE == mdoc->lastnamed) {
720 		mdoc_pmsg(mdoc, line, offs, MANDOCERR_NOTEXT);
721 		return(1);
722 	}
723 
724 	assert(mdoc->last);
725 	n = mdoc->last;
726 
727 	/*
728 	 * Divert directly to list processing if we're encountering a
729 	 * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry
730 	 * (a MDOC_BODY means it's already open, in which case we should
731 	 * process within its context in the normal way).
732 	 */
733 
734 	if (MDOC_Bl == n->tok && MDOC_BODY == n->type &&
735 	    LIST_column == n->norm->Bl.type) {
736 		/* `Bl' is open without any children. */
737 		mdoc->flags |= MDOC_FREECOL;
738 		return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf));
739 	}
740 
741 	if (MDOC_It == n->tok && MDOC_BLOCK == n->type &&
742 	    NULL != n->parent &&
743 	    MDOC_Bl == n->parent->tok &&
744 	    LIST_column == n->parent->norm->Bl.type) {
745 		/* `Bl' has block-level `It' children. */
746 		mdoc->flags |= MDOC_FREECOL;
747 		return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf));
748 	}
749 
750 	/*
751 	 * Search for the beginning of unescaped trailing whitespace (ws)
752 	 * and for the first character not to be output (end).
753 	 */
754 
755 	/* FIXME: replace with strcspn(). */
756 	ws = NULL;
757 	for (c = end = buf + offs; *c; c++) {
758 		switch (*c) {
759 		case ' ':
760 			if (NULL == ws)
761 				ws = c;
762 			continue;
763 		case '\t':
764 			/*
765 			 * Always warn about trailing tabs,
766 			 * even outside literal context,
767 			 * where they should be put on the next line.
768 			 */
769 			if (NULL == ws)
770 				ws = c;
771 			/*
772 			 * Strip trailing tabs in literal context only;
773 			 * outside, they affect the next line.
774 			 */
775 			if (MDOC_LITERAL & mdoc->flags)
776 				continue;
777 			break;
778 		case '\\':
779 			/* Skip the escaped character, too, if any. */
780 			if (c[1])
781 				c++;
782 			/* FALLTHROUGH */
783 		default:
784 			ws = NULL;
785 			break;
786 		}
787 		end = c + 1;
788 	}
789 	*end = '\0';
790 
791 	if (ws)
792 		mdoc_pmsg(mdoc, line, (int)(ws-buf), MANDOCERR_EOLNSPACE);
793 
794 	if ('\0' == buf[offs] && ! (MDOC_LITERAL & mdoc->flags)) {
795 		mdoc_pmsg(mdoc, line, (int)(c-buf), MANDOCERR_NOBLANKLN);
796 
797 		/*
798 		 * Insert a `sp' in the case of a blank line.  Technically,
799 		 * blank lines aren't allowed, but enough manuals assume this
800 		 * behaviour that we want to work around it.
801 		 */
802 		if ( ! mdoc_elem_alloc(mdoc, line, offs, MDOC_sp, NULL))
803 			return(0);
804 
805 		mdoc->next = MDOC_NEXT_SIBLING;
806 
807 		return(mdoc_valid_post(mdoc));
808 	}
809 
810 	if ( ! mdoc_word_alloc(mdoc, line, offs, buf+offs))
811 		return(0);
812 
813 	if (MDOC_LITERAL & mdoc->flags)
814 		return(1);
815 
816 	/*
817 	 * End-of-sentence check.  If the last character is an unescaped
818 	 * EOS character, then flag the node as being the end of a
819 	 * sentence.  The front-end will know how to interpret this.
820 	 */
821 
822 	assert(buf < end);
823 
824 	if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
825 		mdoc->last->flags |= MDOC_EOS;
826 
827 	return(1);
828 }
829 
830 /*
831  * Parse a macro line, that is, a line beginning with the control
832  * character.
833  */
834 static int
835 mdoc_pmacro(struct mdoc *mdoc, int ln, char *buf, int offs)
836 {
837 	enum mdoct	  tok;
838 	int		  i, sv;
839 	char		  mac[5];
840 	struct mdoc_node *n;
841 
842 	/* Empty post-control lines are ignored. */
843 
844 	if ('"' == buf[offs]) {
845 		mdoc_pmsg(mdoc, ln, offs, MANDOCERR_BADCOMMENT);
846 		return(1);
847 	} else if ('\0' == buf[offs])
848 		return(1);
849 
850 	sv = offs;
851 
852 	/*
853 	 * Copy the first word into a nil-terminated buffer.
854 	 * Stop copying when a tab, space, or eoln is encountered.
855 	 */
856 
857 	i = 0;
858 	while (i < 4 && '\0' != buf[offs] && ' ' != buf[offs] &&
859 	    '\t' != buf[offs])
860 		mac[i++] = buf[offs++];
861 
862 	mac[i] = '\0';
863 
864 	tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : MDOC_MAX;
865 
866 	if (MDOC_MAX == tok) {
867 		mandoc_vmsg(MANDOCERR_MACRO, mdoc->parse,
868 		    ln, sv, "%s", buf + sv - 1);
869 		return(1);
870 	}
871 
872 	/* Disregard the first trailing tab, if applicable. */
873 
874 	if ('\t' == buf[offs])
875 		offs++;
876 
877 	/* Jump to the next non-whitespace word. */
878 
879 	while (buf[offs] && ' ' == buf[offs])
880 		offs++;
881 
882 	/*
883 	 * Trailing whitespace.  Note that tabs are allowed to be passed
884 	 * into the parser as "text", so we only warn about spaces here.
885 	 */
886 
887 	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
888 		mdoc_pmsg(mdoc, ln, offs - 1, MANDOCERR_EOLNSPACE);
889 
890 	/*
891 	 * If an initial macro or a list invocation, divert directly
892 	 * into macro processing.
893 	 */
894 
895 	if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) {
896 		if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf))
897 			goto err;
898 		return(1);
899 	}
900 
901 	n = mdoc->last;
902 	assert(mdoc->last);
903 
904 	/*
905 	 * If the first macro of a `Bl -column', open an `It' block
906 	 * context around the parsed macro.
907 	 */
908 
909 	if (MDOC_Bl == n->tok && MDOC_BODY == n->type &&
910 	    LIST_column == n->norm->Bl.type) {
911 		mdoc->flags |= MDOC_FREECOL;
912 		if ( ! mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf))
913 			goto err;
914 		return(1);
915 	}
916 
917 	/*
918 	 * If we're following a block-level `It' within a `Bl -column'
919 	 * context (perhaps opened in the above block or in ptext()),
920 	 * then open an `It' block context around the parsed macro.
921 	 */
922 
923 	if (MDOC_It == n->tok && MDOC_BLOCK == n->type &&
924 	    NULL != n->parent &&
925 	    MDOC_Bl == n->parent->tok &&
926 	    LIST_column == n->parent->norm->Bl.type) {
927 		mdoc->flags |= MDOC_FREECOL;
928 		if ( ! mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf))
929 			goto err;
930 		return(1);
931 	}
932 
933 	/* Normal processing of a macro. */
934 
935 	if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf))
936 		goto err;
937 
938 	/* In quick mode (for mandocdb), abort after the NAME section. */
939 
940 	if (mdoc->quick && MDOC_Sh == tok &&
941 	    SEC_NAME != mdoc->last->sec)
942 		return(2);
943 
944 	return(1);
945 
946 err:	/* Error out. */
947 
948 	mdoc->flags |= MDOC_HALT;
949 	return(0);
950 }
951 
952 enum mdelim
953 mdoc_isdelim(const char *p)
954 {
955 
956 	if ('\0' == p[0])
957 		return(DELIM_NONE);
958 
959 	if ('\0' == p[1])
960 		switch (p[0]) {
961 		case '(':
962 			/* FALLTHROUGH */
963 		case '[':
964 			return(DELIM_OPEN);
965 		case '|':
966 			return(DELIM_MIDDLE);
967 		case '.':
968 			/* FALLTHROUGH */
969 		case ',':
970 			/* FALLTHROUGH */
971 		case ';':
972 			/* FALLTHROUGH */
973 		case ':':
974 			/* FALLTHROUGH */
975 		case '?':
976 			/* FALLTHROUGH */
977 		case '!':
978 			/* FALLTHROUGH */
979 		case ')':
980 			/* FALLTHROUGH */
981 		case ']':
982 			return(DELIM_CLOSE);
983 		default:
984 			return(DELIM_NONE);
985 		}
986 
987 	if ('\\' != p[0])
988 		return(DELIM_NONE);
989 
990 	if (0 == strcmp(p + 1, "."))
991 		return(DELIM_CLOSE);
992 	if (0 == strcmp(p + 1, "fR|\\fP"))
993 		return(DELIM_MIDDLE);
994 
995 	return(DELIM_NONE);
996 }
997 
998 void
999 mdoc_deroff(char **dest, const struct mdoc_node *n)
1000 {
1001 	char	*cp;
1002 	size_t	 sz;
1003 
1004 	if (MDOC_TEXT != n->type) {
1005 		for (n = n->child; n; n = n->next)
1006 			mdoc_deroff(dest, n);
1007 		return;
1008 	}
1009 
1010 	/* Skip leading whitespace. */
1011 
1012 	for (cp = n->string; '\0' != *cp; cp++)
1013 		if (0 == isspace((unsigned char)*cp))
1014 			break;
1015 
1016 	/* Skip trailing whitespace. */
1017 
1018 	for (sz = strlen(cp); sz; sz--)
1019 		if (0 == isspace((unsigned char)cp[sz-1]))
1020 			break;
1021 
1022 	/* Skip empty strings. */
1023 
1024 	if (0 == sz)
1025 		return;
1026 
1027 	if (NULL == *dest) {
1028 		*dest = mandoc_strndup(cp, sz);
1029 		return;
1030 	}
1031 
1032 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1033 	free(*dest);
1034 	*dest = cp;
1035 }
1036