xref: /illumos-gate/usr/src/cmd/mandoc/mdoc.c (revision 4d131170e62381276a07ffc0aeb1b62e527d940c)
1*4d131170SRobert Mustacchi /* $Id: mdoc.c,v 1.275 2020/04/06 10:16:17 schwarze Exp $ */
295c635efSGarrett D'Amore /*
3*4d131170SRobert Mustacchi  * Copyright (c) 2010, 2012-2018, 2020 Ingo Schwarze <schwarze@openbsd.org>
495c635efSGarrett D'Amore  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
595c635efSGarrett D'Amore  *
695c635efSGarrett D'Amore  * Permission to use, copy, modify, and distribute this software for any
795c635efSGarrett D'Amore  * purpose with or without fee is hereby granted, provided that the above
895c635efSGarrett D'Amore  * copyright notice and this permission notice appear in all copies.
995c635efSGarrett D'Amore  *
10371584c2SYuri Pankov  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1195c635efSGarrett D'Amore  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12371584c2SYuri Pankov  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1395c635efSGarrett D'Amore  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1495c635efSGarrett D'Amore  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1595c635efSGarrett D'Amore  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1695c635efSGarrett D'Amore  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17*4d131170SRobert Mustacchi  *
18*4d131170SRobert Mustacchi  * Top level and utility functions of the mdoc(7) parser for mandoc(1).
1995c635efSGarrett D'Amore  */
2095c635efSGarrett D'Amore #include "config.h"
2195c635efSGarrett D'Amore 
2295c635efSGarrett D'Amore #include <sys/types.h>
2395c635efSGarrett D'Amore 
2495c635efSGarrett D'Amore #include <assert.h>
25260e9a87SYuri Pankov #include <ctype.h>
2695c635efSGarrett D'Amore #include <stdarg.h>
2795c635efSGarrett D'Amore #include <stdio.h>
2895c635efSGarrett D'Amore #include <stdlib.h>
2995c635efSGarrett D'Amore #include <string.h>
3095c635efSGarrett D'Amore #include <time.h>
3195c635efSGarrett D'Amore 
32260e9a87SYuri Pankov #include "mandoc_aux.h"
33371584c2SYuri Pankov #include "mandoc.h"
34371584c2SYuri Pankov #include "roff.h"
35371584c2SYuri Pankov #include "mdoc.h"
3695c635efSGarrett D'Amore #include "libmandoc.h"
37371584c2SYuri Pankov #include "roff_int.h"
38371584c2SYuri Pankov #include "libmdoc.h"
3995c635efSGarrett D'Amore 
4095c635efSGarrett D'Amore const	char *const __mdoc_argnames[MDOC_ARG_MAX] = {
4195c635efSGarrett D'Amore 	"split",		"nosplit",		"ragged",
4295c635efSGarrett D'Amore 	"unfilled",		"literal",		"file",
4395c635efSGarrett D'Amore 	"offset",		"bullet",		"dash",
4495c635efSGarrett D'Amore 	"hyphen",		"item",			"enum",
4595c635efSGarrett D'Amore 	"tag",			"diag",			"hang",
4695c635efSGarrett D'Amore 	"ohang",		"inset",		"column",
4795c635efSGarrett D'Amore 	"width",		"compact",		"std",
4895c635efSGarrett D'Amore 	"filled",		"words",		"emphasis",
4995c635efSGarrett D'Amore 	"symbolic",		"nested",		"centered"
5095c635efSGarrett D'Amore };
5195c635efSGarrett D'Amore const	char * const *mdoc_argnames = __mdoc_argnames;
5295c635efSGarrett D'Amore 
53371584c2SYuri Pankov static	int		  mdoc_ptext(struct roff_man *, int, char *, int);
54371584c2SYuri Pankov static	int		  mdoc_pmacro(struct roff_man *, int, char *, int);
5595c635efSGarrett D'Amore 
56260e9a87SYuri Pankov 
5795c635efSGarrett D'Amore /*
5895c635efSGarrett D'Amore  * Main parse routine.  Parses a single line -- really just hands off to
5995c635efSGarrett D'Amore  * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
6095c635efSGarrett D'Amore  */
6195c635efSGarrett D'Amore int
mdoc_parseln(struct roff_man * mdoc,int ln,char * buf,int offs)62371584c2SYuri Pankov mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs)
6395c635efSGarrett D'Amore {
6495c635efSGarrett D'Amore 
65371584c2SYuri Pankov 	if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line)
66698f87a4SGarrett D'Amore 		mdoc->flags |= MDOC_NEWLINE;
6795c635efSGarrett D'Amore 
6895c635efSGarrett D'Amore 	/*
6995c635efSGarrett D'Amore 	 * Let the roff nS register switch SYNOPSIS mode early,
7095c635efSGarrett D'Amore 	 * such that the parser knows at all times
7195c635efSGarrett D'Amore 	 * whether this mode is on or off.
7295c635efSGarrett D'Amore 	 * Note that this mode is also switched by the Sh macro.
7395c635efSGarrett D'Amore 	 */
74698f87a4SGarrett D'Amore 	if (roff_getreg(mdoc->roff, "nS"))
75698f87a4SGarrett D'Amore 		mdoc->flags |= MDOC_SYNOPSIS;
7695c635efSGarrett D'Amore 	else
77698f87a4SGarrett D'Amore 		mdoc->flags &= ~MDOC_SYNOPSIS;
7895c635efSGarrett D'Amore 
79371584c2SYuri Pankov 	return roff_getcontrol(mdoc->roff, buf, &offs) ?
80698f87a4SGarrett D'Amore 	    mdoc_pmacro(mdoc, ln, buf, offs) :
81371584c2SYuri Pankov 	    mdoc_ptext(mdoc, ln, buf, offs);
8295c635efSGarrett D'Amore }
8395c635efSGarrett D'Amore 
84260e9a87SYuri Pankov void
mdoc_tail_alloc(struct roff_man * mdoc,int line,int pos,enum roff_tok tok)85c66b8046SYuri Pankov mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok)
8695c635efSGarrett D'Amore {
87371584c2SYuri Pankov 	struct roff_node *p;
8895c635efSGarrett D'Amore 
89371584c2SYuri Pankov 	p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok);
90371584c2SYuri Pankov 	roff_node_append(mdoc, p);
91371584c2SYuri Pankov 	mdoc->next = ROFF_NEXT_CHILD;
9295c635efSGarrett D'Amore }
9395c635efSGarrett D'Amore 
94371584c2SYuri Pankov struct roff_node *
mdoc_endbody_alloc(struct roff_man * mdoc,int line,int pos,enum roff_tok tok,struct roff_node * body)95c66b8046SYuri Pankov mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos,
96c66b8046SYuri Pankov     enum roff_tok tok, struct roff_node *body)
9795c635efSGarrett D'Amore {
98371584c2SYuri Pankov 	struct roff_node *p;
9995c635efSGarrett D'Amore 
100a40ea1a7SYuri Pankov 	body->flags |= NODE_ENDED;
101a40ea1a7SYuri Pankov 	body->parent->flags |= NODE_ENDED;
102371584c2SYuri Pankov 	p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok);
103260e9a87SYuri Pankov 	p->body = body;
104698f87a4SGarrett D'Amore 	p->norm = body->norm;
105a40ea1a7SYuri Pankov 	p->end = ENDBODY_SPACE;
106371584c2SYuri Pankov 	roff_node_append(mdoc, p);
107371584c2SYuri Pankov 	mdoc->next = ROFF_NEXT_SIBLING;
108371584c2SYuri Pankov 	return p;
10995c635efSGarrett D'Amore }
11095c635efSGarrett D'Amore 
111371584c2SYuri Pankov struct roff_node *
mdoc_block_alloc(struct roff_man * mdoc,int line,int pos,enum roff_tok tok,struct mdoc_arg * args)112371584c2SYuri Pankov mdoc_block_alloc(struct roff_man *mdoc, int line, int pos,
113c66b8046SYuri Pankov     enum roff_tok tok, struct mdoc_arg *args)
11495c635efSGarrett D'Amore {
115371584c2SYuri Pankov 	struct roff_node *p;
11695c635efSGarrett D'Amore 
117371584c2SYuri Pankov 	p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok);
11895c635efSGarrett D'Amore 	p->args = args;
11995c635efSGarrett D'Amore 	if (p->args)
12095c635efSGarrett D'Amore 		(args->refcnt)++;
12195c635efSGarrett D'Amore 
12295c635efSGarrett D'Amore 	switch (tok) {
123260e9a87SYuri Pankov 	case MDOC_Bd:
124260e9a87SYuri Pankov 	case MDOC_Bf:
125260e9a87SYuri Pankov 	case MDOC_Bl:
126260e9a87SYuri Pankov 	case MDOC_En:
127260e9a87SYuri Pankov 	case MDOC_Rs:
12895c635efSGarrett D'Amore 		p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
12995c635efSGarrett D'Amore 		break;
13095c635efSGarrett D'Amore 	default:
13195c635efSGarrett D'Amore 		break;
13295c635efSGarrett D'Amore 	}
133371584c2SYuri Pankov 	roff_node_append(mdoc, p);
134371584c2SYuri Pankov 	mdoc->next = ROFF_NEXT_CHILD;
135371584c2SYuri Pankov 	return p;
13695c635efSGarrett D'Amore }
13795c635efSGarrett D'Amore 
138260e9a87SYuri Pankov void
mdoc_elem_alloc(struct roff_man * mdoc,int line,int pos,enum roff_tok tok,struct mdoc_arg * args)139371584c2SYuri Pankov mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos,
140c66b8046SYuri Pankov      enum roff_tok tok, struct mdoc_arg *args)
14195c635efSGarrett D'Amore {
142371584c2SYuri Pankov 	struct roff_node *p;
14395c635efSGarrett D'Amore 
144371584c2SYuri Pankov 	p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok);
14595c635efSGarrett D'Amore 	p->args = args;
14695c635efSGarrett D'Amore 	if (p->args)
14795c635efSGarrett D'Amore 		(args->refcnt)++;
14895c635efSGarrett D'Amore 
14995c635efSGarrett D'Amore 	switch (tok) {
150260e9a87SYuri Pankov 	case MDOC_An:
15195c635efSGarrett D'Amore 		p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
15295c635efSGarrett D'Amore 		break;
15395c635efSGarrett D'Amore 	default:
15495c635efSGarrett D'Amore 		break;
15595c635efSGarrett D'Amore 	}
156371584c2SYuri Pankov 	roff_node_append(mdoc, p);
157371584c2SYuri Pankov 	mdoc->next = ROFF_NEXT_CHILD;
15895c635efSGarrett D'Amore }
15995c635efSGarrett D'Amore 
16095c635efSGarrett D'Amore /*
16195c635efSGarrett D'Amore  * Parse free-form text, that is, a line that does not begin with the
16295c635efSGarrett D'Amore  * control character.
16395c635efSGarrett D'Amore  */
16495c635efSGarrett D'Amore static int
mdoc_ptext(struct roff_man * mdoc,int line,char * buf,int offs)165371584c2SYuri Pankov mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs)
16695c635efSGarrett D'Amore {
167371584c2SYuri Pankov 	struct roff_node *n;
168c66b8046SYuri Pankov 	const char	 *cp, *sp;
16995c635efSGarrett D'Amore 	char		 *c, *ws, *end;
17095c635efSGarrett D'Amore 
171698f87a4SGarrett D'Amore 	n = mdoc->last;
17295c635efSGarrett D'Amore 
17395c635efSGarrett D'Amore 	/*
174a40ea1a7SYuri Pankov 	 * If a column list contains plain text, assume an implicit item
175a40ea1a7SYuri Pankov 	 * macro.  This can happen one or more times at the beginning
176a40ea1a7SYuri Pankov 	 * of such a list, intermixed with non-It mdoc macros and with
177a40ea1a7SYuri Pankov 	 * nodes generated on the roff level, for example by tbl.
17895c635efSGarrett D'Amore 	 */
17995c635efSGarrett D'Amore 
180a40ea1a7SYuri Pankov 	if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
181a40ea1a7SYuri Pankov 	     n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
182a40ea1a7SYuri Pankov 	    (n->parent != NULL && n->parent->tok == MDOC_Bl &&
183a40ea1a7SYuri Pankov 	     n->parent->norm->Bl.type == LIST_column)) {
184698f87a4SGarrett D'Amore 		mdoc->flags |= MDOC_FREECOL;
185cec8643bSMichal Nowak 		(*mdoc_macro(MDOC_It)->fp)(mdoc, MDOC_It,
186cec8643bSMichal Nowak 		    line, offs, &offs, buf);
187371584c2SYuri Pankov 		return 1;
18895c635efSGarrett D'Amore 	}
18995c635efSGarrett D'Amore 
19095c635efSGarrett D'Amore 	/*
19195c635efSGarrett D'Amore 	 * Search for the beginning of unescaped trailing whitespace (ws)
19295c635efSGarrett D'Amore 	 * and for the first character not to be output (end).
19395c635efSGarrett D'Amore 	 */
19495c635efSGarrett D'Amore 
19595c635efSGarrett D'Amore 	/* FIXME: replace with strcspn(). */
19695c635efSGarrett D'Amore 	ws = NULL;
19795c635efSGarrett D'Amore 	for (c = end = buf + offs; *c; c++) {
19895c635efSGarrett D'Amore 		switch (*c) {
19995c635efSGarrett D'Amore 		case ' ':
20095c635efSGarrett D'Amore 			if (NULL == ws)
20195c635efSGarrett D'Amore 				ws = c;
20295c635efSGarrett D'Amore 			continue;
20395c635efSGarrett D'Amore 		case '\t':
20495c635efSGarrett D'Amore 			/*
20595c635efSGarrett D'Amore 			 * Always warn about trailing tabs,
20695c635efSGarrett D'Amore 			 * even outside literal context,
20795c635efSGarrett D'Amore 			 * where they should be put on the next line.
20895c635efSGarrett D'Amore 			 */
20995c635efSGarrett D'Amore 			if (NULL == ws)
21095c635efSGarrett D'Amore 				ws = c;
21195c635efSGarrett D'Amore 			/*
21295c635efSGarrett D'Amore 			 * Strip trailing tabs in literal context only;
21395c635efSGarrett D'Amore 			 * outside, they affect the next line.
21495c635efSGarrett D'Amore 			 */
215cec8643bSMichal Nowak 			if (mdoc->flags & ROFF_NOFILL)
21695c635efSGarrett D'Amore 				continue;
21795c635efSGarrett D'Amore 			break;
21895c635efSGarrett D'Amore 		case '\\':
21995c635efSGarrett D'Amore 			/* Skip the escaped character, too, if any. */
22095c635efSGarrett D'Amore 			if (c[1])
22195c635efSGarrett D'Amore 				c++;
22295c635efSGarrett D'Amore 			/* FALLTHROUGH */
22395c635efSGarrett D'Amore 		default:
22495c635efSGarrett D'Amore 			ws = NULL;
22595c635efSGarrett D'Amore 			break;
22695c635efSGarrett D'Amore 		}
22795c635efSGarrett D'Amore 		end = c + 1;
22895c635efSGarrett D'Amore 	}
22995c635efSGarrett D'Amore 	*end = '\0';
23095c635efSGarrett D'Amore 
23195c635efSGarrett D'Amore 	if (ws)
232cec8643bSMichal Nowak 		mandoc_msg(MANDOCERR_SPACE_EOL, line, (int)(ws - buf), NULL);
23395c635efSGarrett D'Amore 
234c66b8046SYuri Pankov 	/*
235c66b8046SYuri Pankov 	 * Blank lines are allowed in no-fill mode
236c66b8046SYuri Pankov 	 * and cancel preceding \c,
237c66b8046SYuri Pankov 	 * but add a single vertical space elsewhere.
238c66b8046SYuri Pankov 	 */
239c66b8046SYuri Pankov 
240cec8643bSMichal Nowak 	if (buf[offs] == '\0' && (mdoc->flags & ROFF_NOFILL) == 0) {
241c66b8046SYuri Pankov 		switch (mdoc->last->type) {
242c66b8046SYuri Pankov 		case ROFFT_TEXT:
243c66b8046SYuri Pankov 			sp = mdoc->last->string;
244c66b8046SYuri Pankov 			cp = end = strchr(sp, '\0') - 2;
245c66b8046SYuri Pankov 			if (cp < sp || cp[0] != '\\' || cp[1] != 'c')
246c66b8046SYuri Pankov 				break;
247c66b8046SYuri Pankov 			while (cp > sp && cp[-1] == '\\')
248c66b8046SYuri Pankov 				cp--;
249c66b8046SYuri Pankov 			if ((end - cp) % 2)
250c66b8046SYuri Pankov 				break;
251c66b8046SYuri Pankov 			*end = '\0';
252c66b8046SYuri Pankov 			return 1;
253c66b8046SYuri Pankov 		default:
254c66b8046SYuri Pankov 			break;
255c66b8046SYuri Pankov 		}
256cec8643bSMichal Nowak 		mandoc_msg(MANDOCERR_FI_BLANK, line, (int)(c - buf), NULL);
257c66b8046SYuri Pankov 		roff_elem_alloc(mdoc, line, offs, ROFF_sp);
258a40ea1a7SYuri Pankov 		mdoc->last->flags |= NODE_VALID | NODE_ENDED;
259371584c2SYuri Pankov 		mdoc->next = ROFF_NEXT_SIBLING;
260371584c2SYuri Pankov 		return 1;
26195c635efSGarrett D'Amore 	}
26295c635efSGarrett D'Amore 
263371584c2SYuri Pankov 	roff_word_alloc(mdoc, line, offs, buf+offs);
26495c635efSGarrett D'Amore 
265cec8643bSMichal Nowak 	if (mdoc->flags & ROFF_NOFILL)
266371584c2SYuri Pankov 		return 1;
26795c635efSGarrett D'Amore 
26895c635efSGarrett D'Amore 	/*
26995c635efSGarrett D'Amore 	 * End-of-sentence check.  If the last character is an unescaped
27095c635efSGarrett D'Amore 	 * EOS character, then flag the node as being the end of a
27195c635efSGarrett D'Amore 	 * sentence.  The front-end will know how to interpret this.
27295c635efSGarrett D'Amore 	 */
27395c635efSGarrett D'Amore 
27495c635efSGarrett D'Amore 	assert(buf < end);
27595c635efSGarrett D'Amore 
276260e9a87SYuri Pankov 	if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
277a40ea1a7SYuri Pankov 		mdoc->last->flags |= NODE_EOS;
278a40ea1a7SYuri Pankov 
279a40ea1a7SYuri Pankov 	for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) {
280a40ea1a7SYuri Pankov 		if (c - buf < offs + 2)
281a40ea1a7SYuri Pankov 			continue;
282c66b8046SYuri Pankov 		if (end - c < 3)
283a40ea1a7SYuri Pankov 			break;
284c66b8046SYuri Pankov 		if (c[1] != ' ' ||
2856640c13bSYuri Pankov 		    isalnum((unsigned char)c[-2]) == 0 ||
2866640c13bSYuri Pankov 		    isalnum((unsigned char)c[-1]) == 0 ||
287c66b8046SYuri Pankov 		    (c[-2] == 'n' && c[-1] == 'c') ||
288c66b8046SYuri Pankov 		    (c[-2] == 'v' && c[-1] == 's'))
289c66b8046SYuri Pankov 			continue;
290c66b8046SYuri Pankov 		c += 2;
291c66b8046SYuri Pankov 		if (*c == ' ')
292c66b8046SYuri Pankov 			c++;
293c66b8046SYuri Pankov 		if (*c == ' ')
294c66b8046SYuri Pankov 			c++;
295c66b8046SYuri Pankov 		if (isupper((unsigned char)(*c)))
296cec8643bSMichal Nowak 			mandoc_msg(MANDOCERR_EOS, line, (int)(c - buf), NULL);
297a40ea1a7SYuri Pankov 	}
298a40ea1a7SYuri Pankov 
299371584c2SYuri Pankov 	return 1;
30095c635efSGarrett D'Amore }
30195c635efSGarrett D'Amore 
30295c635efSGarrett D'Amore /*
30395c635efSGarrett D'Amore  * Parse a macro line, that is, a line beginning with the control
30495c635efSGarrett D'Amore  * character.
30595c635efSGarrett D'Amore  */
30695c635efSGarrett D'Amore static int
mdoc_pmacro(struct roff_man * mdoc,int ln,char * buf,int offs)307371584c2SYuri Pankov mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs)
30895c635efSGarrett D'Amore {
309371584c2SYuri Pankov 	struct roff_node *n;
310260e9a87SYuri Pankov 	const char	 *cp;
311c66b8046SYuri Pankov 	size_t		  sz;
312c66b8046SYuri Pankov 	enum roff_tok	  tok;
313c66b8046SYuri Pankov 	int		  sv;
314c66b8046SYuri Pankov 
315c66b8046SYuri Pankov 	/* Determine the line macro. */
31695c635efSGarrett D'Amore 
31795c635efSGarrett D'Amore 	sv = offs;
318c66b8046SYuri Pankov 	tok = TOKEN_NONE;
319c66b8046SYuri Pankov 	for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++)
320c66b8046SYuri Pankov 		offs++;
321c66b8046SYuri Pankov 	if (sz == 2 || sz == 3)
322c66b8046SYuri Pankov 		tok = roffhash_find(mdoc->mdocmac, buf + sv, sz);
323371584c2SYuri Pankov 	if (tok == TOKEN_NONE) {
324cec8643bSMichal Nowak 		mandoc_msg(MANDOCERR_MACRO, ln, sv, "%s", buf + sv - 1);
325371584c2SYuri Pankov 		return 1;
32695c635efSGarrett D'Amore 	}
32795c635efSGarrett D'Amore 
328260e9a87SYuri Pankov 	/* Skip a leading escape sequence or tab. */
32995c635efSGarrett D'Amore 
330260e9a87SYuri Pankov 	switch (buf[offs]) {
331260e9a87SYuri Pankov 	case '\\':
332260e9a87SYuri Pankov 		cp = buf + offs + 1;
333260e9a87SYuri Pankov 		mandoc_escape(&cp, NULL, NULL);
334260e9a87SYuri Pankov 		offs = cp - buf;
335260e9a87SYuri Pankov 		break;
336260e9a87SYuri Pankov 	case '\t':
33795c635efSGarrett D'Amore 		offs++;
338260e9a87SYuri Pankov 		break;
339260e9a87SYuri Pankov 	default:
340260e9a87SYuri Pankov 		break;
341260e9a87SYuri Pankov 	}
34295c635efSGarrett D'Amore 
34395c635efSGarrett D'Amore 	/* Jump to the next non-whitespace word. */
34495c635efSGarrett D'Amore 
345c66b8046SYuri Pankov 	while (buf[offs] == ' ')
34695c635efSGarrett D'Amore 		offs++;
34795c635efSGarrett D'Amore 
34895c635efSGarrett D'Amore 	/*
34995c635efSGarrett D'Amore 	 * Trailing whitespace.  Note that tabs are allowed to be passed
35095c635efSGarrett D'Amore 	 * into the parser as "text", so we only warn about spaces here.
35195c635efSGarrett D'Amore 	 */
35295c635efSGarrett D'Amore 
35395c635efSGarrett D'Amore 	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
354cec8643bSMichal Nowak 		mandoc_msg(MANDOCERR_SPACE_EOL, ln, offs - 1, NULL);
35595c635efSGarrett D'Amore 
35695c635efSGarrett D'Amore 	/*
357*4d131170SRobert Mustacchi 	 * If an initial or transparent macro or a list invocation,
358*4d131170SRobert Mustacchi 	 * divert directly into macro processing.
35995c635efSGarrett D'Amore 	 */
36095c635efSGarrett D'Amore 
361a40ea1a7SYuri Pankov 	n = mdoc->last;
362*4d131170SRobert Mustacchi 	if (n == NULL || tok == MDOC_It || tok == MDOC_El ||
363*4d131170SRobert Mustacchi 	    roff_tok_transparent(tok)) {
364cec8643bSMichal Nowak 		(*mdoc_macro(tok)->fp)(mdoc, tok, ln, sv, &offs, buf);
365371584c2SYuri Pankov 		return 1;
36695c635efSGarrett D'Amore 	}
36795c635efSGarrett D'Amore 
36895c635efSGarrett D'Amore 	/*
369a40ea1a7SYuri Pankov 	 * If a column list contains a non-It macro, assume an implicit
370a40ea1a7SYuri Pankov 	 * item macro.  This can happen one or more times at the
371a40ea1a7SYuri Pankov 	 * beginning of such a list, intermixed with text lines and
372a40ea1a7SYuri Pankov 	 * with nodes generated on the roff level, for example by tbl.
37395c635efSGarrett D'Amore 	 */
37495c635efSGarrett D'Amore 
375a40ea1a7SYuri Pankov 	if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
376a40ea1a7SYuri Pankov 	     n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
377a40ea1a7SYuri Pankov 	    (n->parent != NULL && n->parent->tok == MDOC_Bl &&
378a40ea1a7SYuri Pankov 	     n->parent->norm->Bl.type == LIST_column)) {
379698f87a4SGarrett D'Amore 		mdoc->flags |= MDOC_FREECOL;
380cec8643bSMichal Nowak 		(*mdoc_macro(MDOC_It)->fp)(mdoc, MDOC_It, ln, sv, &sv, buf);
381371584c2SYuri Pankov 		return 1;
38295c635efSGarrett D'Amore 	}
38395c635efSGarrett D'Amore 
38495c635efSGarrett D'Amore 	/* Normal processing of a macro. */
38595c635efSGarrett D'Amore 
386cec8643bSMichal Nowak 	(*mdoc_macro(tok)->fp)(mdoc, tok, ln, sv, &offs, buf);
387260e9a87SYuri Pankov 
388260e9a87SYuri Pankov 	/* In quick mode (for mandocdb), abort after the NAME section. */
389260e9a87SYuri Pankov 
390260e9a87SYuri Pankov 	if (mdoc->quick && MDOC_Sh == tok &&
391260e9a87SYuri Pankov 	    SEC_NAME != mdoc->last->sec)
392371584c2SYuri Pankov 		return 2;
39395c635efSGarrett D'Amore 
394371584c2SYuri Pankov 	return 1;
39595c635efSGarrett D'Amore }
39695c635efSGarrett D'Amore 
39795c635efSGarrett D'Amore enum mdelim
mdoc_isdelim(const char * p)39895c635efSGarrett D'Amore mdoc_isdelim(const char *p)
39995c635efSGarrett D'Amore {
40095c635efSGarrett D'Amore 
40195c635efSGarrett D'Amore 	if ('\0' == p[0])
402371584c2SYuri Pankov 		return DELIM_NONE;
40395c635efSGarrett D'Amore 
40495c635efSGarrett D'Amore 	if ('\0' == p[1])
40595c635efSGarrett D'Amore 		switch (p[0]) {
406260e9a87SYuri Pankov 		case '(':
407260e9a87SYuri Pankov 		case '[':
408371584c2SYuri Pankov 			return DELIM_OPEN;
409260e9a87SYuri Pankov 		case '|':
410371584c2SYuri Pankov 			return DELIM_MIDDLE;
411260e9a87SYuri Pankov 		case '.':
412260e9a87SYuri Pankov 		case ',':
413260e9a87SYuri Pankov 		case ';':
414260e9a87SYuri Pankov 		case ':':
415260e9a87SYuri Pankov 		case '?':
416260e9a87SYuri Pankov 		case '!':
417260e9a87SYuri Pankov 		case ')':
418260e9a87SYuri Pankov 		case ']':
419371584c2SYuri Pankov 			return DELIM_CLOSE;
42095c635efSGarrett D'Amore 		default:
421371584c2SYuri Pankov 			return DELIM_NONE;
42295c635efSGarrett D'Amore 		}
42395c635efSGarrett D'Amore 
42495c635efSGarrett D'Amore 	if ('\\' != p[0])
425371584c2SYuri Pankov 		return DELIM_NONE;
42695c635efSGarrett D'Amore 
42795c635efSGarrett D'Amore 	if (0 == strcmp(p + 1, "."))
428371584c2SYuri Pankov 		return DELIM_CLOSE;
429698f87a4SGarrett D'Amore 	if (0 == strcmp(p + 1, "fR|\\fP"))
430371584c2SYuri Pankov 		return DELIM_MIDDLE;
43195c635efSGarrett D'Amore 
432371584c2SYuri Pankov 	return DELIM_NONE;
43395c635efSGarrett D'Amore }
434