1*6167eca2Schristos /* Id: man.c,v 1.187 2019/01/05 00:36:50 schwarze Exp */
24154958bSjoerg /*
348741257Sjoerg * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4*6167eca2Schristos * Copyright (c) 2013-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
5fec65c98Schristos * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org>
64154958bSjoerg *
74154958bSjoerg * Permission to use, copy, modify, and distribute this software for any
84154958bSjoerg * purpose with or without fee is hereby granted, provided that the above
94154958bSjoerg * copyright notice and this permission notice appear in all copies.
104154958bSjoerg *
11f47368cfSchristos * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
124154958bSjoerg * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13f47368cfSchristos * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
144154958bSjoerg * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
154154958bSjoerg * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
164154958bSjoerg * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
174154958bSjoerg * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
184154958bSjoerg */
19d5e63c8dSjoerg #include "config.h"
20d5e63c8dSjoerg
214154958bSjoerg #include <sys/types.h>
224154958bSjoerg
234154958bSjoerg #include <assert.h>
24fec65c98Schristos #include <ctype.h>
254154958bSjoerg #include <stdarg.h>
264154958bSjoerg #include <stdlib.h>
274154958bSjoerg #include <stdio.h>
284154958bSjoerg #include <string.h>
294154958bSjoerg
30fec65c98Schristos #include "mandoc_aux.h"
31f47368cfSchristos #include "mandoc.h"
32f47368cfSchristos #include "roff.h"
33f47368cfSchristos #include "man.h"
343514411fSjoerg #include "libmandoc.h"
35f47368cfSchristos #include "roff_int.h"
36f47368cfSchristos #include "libman.h"
374154958bSjoerg
38*6167eca2Schristos static char *man_hasc(char *);
39f47368cfSchristos static int man_ptext(struct roff_man *, int, char *, int);
40f47368cfSchristos static int man_pmacro(struct roff_man *, int, char *, int);
414154958bSjoerg
424154958bSjoerg
434154958bSjoerg int
man_parseln(struct roff_man * man,int ln,char * buf,int offs)44f47368cfSchristos man_parseln(struct roff_man *man, int ln, char *buf, int offs)
454154958bSjoerg {
464154958bSjoerg
47f47368cfSchristos if (man->last->type != ROFFT_EQN || ln > man->last->line)
4870f041f9Sjoerg man->flags |= MAN_NEWLINE;
4948741257Sjoerg
50f47368cfSchristos return roff_getcontrol(man->roff, buf, &offs) ?
5170f041f9Sjoerg man_pmacro(man, ln, buf, offs) :
52f47368cfSchristos man_ptext(man, ln, buf, offs);
534154958bSjoerg }
544154958bSjoerg
55*6167eca2Schristos /*
56*6167eca2Schristos * If the string ends with \c, return a pointer to the backslash.
57*6167eca2Schristos * Otherwise, return NULL.
58*6167eca2Schristos */
59*6167eca2Schristos static char *
man_hasc(char * start)60*6167eca2Schristos man_hasc(char *start)
61c0d9444aSjoerg {
62*6167eca2Schristos char *cp, *ep;
63*6167eca2Schristos
64*6167eca2Schristos ep = strchr(start, '\0') - 2;
65*6167eca2Schristos if (ep < start || ep[0] != '\\' || ep[1] != 'c')
66*6167eca2Schristos return NULL;
67*6167eca2Schristos for (cp = ep; cp > start; cp--)
68*6167eca2Schristos if (cp[-1] != '\\')
69*6167eca2Schristos break;
70*6167eca2Schristos return (ep - cp) % 2 ? NULL : ep;
71*6167eca2Schristos }
72*6167eca2Schristos
73*6167eca2Schristos void
man_descope(struct roff_man * man,int line,int offs,char * start)74*6167eca2Schristos man_descope(struct roff_man *man, int line, int offs, char *start)
75*6167eca2Schristos {
76*6167eca2Schristos /* Trailing \c keeps next-line scope open. */
77*6167eca2Schristos
78*6167eca2Schristos if (start != NULL && man_hasc(start) != NULL)
79*6167eca2Schristos return;
80*6167eca2Schristos
814154958bSjoerg /*
824154958bSjoerg * Co-ordinate what happens with having a next-line scope open:
83*6167eca2Schristos * first close out the element scopes (if applicable),
84*6167eca2Schristos * then close out the block scope (also if applicable).
854154958bSjoerg */
864154958bSjoerg
87fec65c98Schristos if (man->flags & MAN_ELINE) {
88*6167eca2Schristos while (man->last->parent->type != ROFFT_ROOT &&
89*6167eca2Schristos man_macro(man->last->parent->tok)->flags & MAN_ESCOPED)
90fec65c98Schristos man_unscope(man, man->last->parent);
91*6167eca2Schristos man->flags &= ~MAN_ELINE;
924154958bSjoerg }
93fec65c98Schristos if ( ! (man->flags & MAN_BLINE))
94fec65c98Schristos return;
95fec65c98Schristos man_unscope(man, man->last->parent);
96f47368cfSchristos roff_body_alloc(man, line, offs, man->last->tok);
97*6167eca2Schristos man->flags &= ~(MAN_BLINE | ROFF_NONOFILL);
984154958bSjoerg }
994154958bSjoerg
10022af4063Sjoerg static int
man_ptext(struct roff_man * man,int line,char * buf,int offs)101f47368cfSchristos man_ptext(struct roff_man *man, int line, char *buf, int offs)
10222af4063Sjoerg {
103c0d9444aSjoerg int i;
10414e7489eSchristos char *ep;
1050a84adc5Sjoerg
106*6167eca2Schristos /* In no-fill mode, whitespace is preserved on text lines. */
107c0d9444aSjoerg
108*6167eca2Schristos if (man->flags & ROFF_NOFILL) {
109f47368cfSchristos roff_word_alloc(man, line, offs, buf + offs);
110*6167eca2Schristos man_descope(man, line, offs, buf + offs);
111f47368cfSchristos return 1;
112c0d9444aSjoerg }
113c0d9444aSjoerg
114fec65c98Schristos for (i = offs; buf[i] == ' '; i++)
115c0d9444aSjoerg /* Skip leading whitespace. */ ;
116c0d9444aSjoerg
11770f041f9Sjoerg /*
11814e7489eSchristos * Blank lines are ignored in next line scope
11914e7489eSchristos * and right after headings and cancel preceding \c,
12070f041f9Sjoerg * but add a single vertical space elsewhere.
12170f041f9Sjoerg */
12270f041f9Sjoerg
123fec65c98Schristos if (buf[i] == '\0') {
12414e7489eSchristos if (man->flags & (MAN_ELINE | MAN_BLINE)) {
125*6167eca2Schristos mandoc_msg(MANDOCERR_BLK_BLANK, line, 0, NULL);
12614e7489eSchristos return 1;
12770f041f9Sjoerg }
12814e7489eSchristos if (man->last->tok == MAN_SH || man->last->tok == MAN_SS)
12914e7489eSchristos return 1;
130*6167eca2Schristos if (man->last->type == ROFFT_TEXT &&
131*6167eca2Schristos ((ep = man_hasc(man->last->string)) != NULL)) {
13214e7489eSchristos *ep = '\0';
13314e7489eSchristos return 1;
13414e7489eSchristos }
13514e7489eSchristos roff_elem_alloc(man, line, offs, ROFF_sp);
13614e7489eSchristos man->next = ROFF_NEXT_SIBLING;
137f47368cfSchristos return 1;
138c0d9444aSjoerg }
139c0d9444aSjoerg
140c0d9444aSjoerg /*
141c0d9444aSjoerg * Warn if the last un-escaped character is whitespace. Then
142c0d9444aSjoerg * strip away the remaining spaces (tabs stay!).
143c0d9444aSjoerg */
144c0d9444aSjoerg
145c0d9444aSjoerg i = (int)strlen(buf);
146c0d9444aSjoerg assert(i);
147c0d9444aSjoerg
148c0d9444aSjoerg if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
149c0d9444aSjoerg if (i > 1 && '\\' != buf[i - 2])
150*6167eca2Schristos mandoc_msg(MANDOCERR_SPACE_EOL, line, i - 1, NULL);
151c0d9444aSjoerg
152c0d9444aSjoerg for (--i; i && ' ' == buf[i]; i--)
153c0d9444aSjoerg /* Spin back to non-space. */ ;
154c0d9444aSjoerg
155c0d9444aSjoerg /* Jump ahead of escaped whitespace. */
156c0d9444aSjoerg i += '\\' == buf[i] ? 2 : 1;
157c0d9444aSjoerg
158c0d9444aSjoerg buf[i] = '\0';
159c0d9444aSjoerg }
160f47368cfSchristos roff_word_alloc(man, line, offs, buf + offs);
161c0d9444aSjoerg
162c0d9444aSjoerg /*
163c0d9444aSjoerg * End-of-sentence check. If the last character is an unescaped
164c0d9444aSjoerg * EOS character, then flag the node as being the end of a
165c0d9444aSjoerg * sentence. The front-end will know how to interpret this.
166c0d9444aSjoerg */
167c0d9444aSjoerg
168c0d9444aSjoerg assert(i);
16970f041f9Sjoerg if (mandoc_eos(buf, (size_t)i))
17037ef69edSchristos man->last->flags |= NODE_EOS;
171c0d9444aSjoerg
172*6167eca2Schristos man_descope(man, line, offs, buf + offs);
173f47368cfSchristos return 1;
17422af4063Sjoerg }
17522af4063Sjoerg
176c0d9444aSjoerg static int
man_pmacro(struct roff_man * man,int ln,char * buf,int offs)177f47368cfSchristos man_pmacro(struct roff_man *man, int ln, char *buf, int offs)
1784154958bSjoerg {
179f47368cfSchristos struct roff_node *n;
180fec65c98Schristos const char *cp;
18114e7489eSchristos size_t sz;
18214e7489eSchristos enum roff_tok tok;
18314e7489eSchristos int ppos;
184fec65c98Schristos int bline;
18514e7489eSchristos
18614e7489eSchristos /* Determine the line macro. */
1874154958bSjoerg
18848741257Sjoerg ppos = offs;
18914e7489eSchristos tok = TOKEN_NONE;
19014e7489eSchristos for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++)
19114e7489eSchristos offs++;
19214e7489eSchristos if (sz > 0 && sz < 4)
19314e7489eSchristos tok = roffhash_find(man->manmac, buf + ppos, sz);
194f47368cfSchristos if (tok == TOKEN_NONE) {
195*6167eca2Schristos mandoc_msg(MANDOCERR_MACRO, ln, ppos, "%s", buf + ppos - 1);
196f47368cfSchristos return 1;
1974154958bSjoerg }
1984154958bSjoerg
199fec65c98Schristos /* Skip a leading escape sequence or tab. */
2004154958bSjoerg
201fec65c98Schristos switch (buf[offs]) {
202fec65c98Schristos case '\\':
203fec65c98Schristos cp = buf + offs + 1;
204fec65c98Schristos mandoc_escape(&cp, NULL, NULL);
205fec65c98Schristos offs = cp - buf;
206fec65c98Schristos break;
207fec65c98Schristos case '\t':
208fec65c98Schristos offs++;
209fec65c98Schristos break;
210fec65c98Schristos default:
211fec65c98Schristos break;
212fec65c98Schristos }
213fec65c98Schristos
214fec65c98Schristos /* Jump to the next non-whitespace word. */
215fec65c98Schristos
21614e7489eSchristos while (buf[offs] == ' ')
21748741257Sjoerg offs++;
2184154958bSjoerg
2190a84adc5Sjoerg /*
2200a84adc5Sjoerg * Trailing whitespace. Note that tabs are allowed to be passed
2210a84adc5Sjoerg * into the parser as "text", so we only warn about spaces here.
2220a84adc5Sjoerg */
223d5e63c8dSjoerg
224fec65c98Schristos if (buf[offs] == '\0' && buf[offs - 1] == ' ')
225*6167eca2Schristos mandoc_msg(MANDOCERR_SPACE_EOL, ln, offs - 1, NULL);
226d5e63c8dSjoerg
22731e1f4e3Sjoerg /*
228fec65c98Schristos * Some macros break next-line scopes; otherwise, remember
229fec65c98Schristos * whether we are in next-line scope for a block head.
23031e1f4e3Sjoerg */
2314154958bSjoerg
232fec65c98Schristos man_breakscope(man, tok);
233fec65c98Schristos bline = man->flags & MAN_BLINE;
234fec65c98Schristos
23514e7489eSchristos /*
23614e7489eSchristos * If the line in next-line scope ends with \c, keep the
23714e7489eSchristos * next-line scope open for the subsequent input line.
23814e7489eSchristos * That is not at all portable, only groff >= 1.22.4
23914e7489eSchristos * does it, but *if* this weird idiom occurs in a manual
24014e7489eSchristos * page, that's very likely what the author intended.
24114e7489eSchristos */
24214e7489eSchristos
243*6167eca2Schristos if (bline && man_hasc(buf + offs))
24414e7489eSchristos bline = 0;
24514e7489eSchristos
246fec65c98Schristos /* Call to handler... */
247fec65c98Schristos
248*6167eca2Schristos (*man_macro(tok)->fp)(man, tok, ln, ppos, &offs, buf);
249fec65c98Schristos
250fec65c98Schristos /* In quick mode (for mandocdb), abort after the NAME section. */
251fec65c98Schristos
252fec65c98Schristos if (man->quick && tok == MAN_SH) {
25370f041f9Sjoerg n = man->last;
254f47368cfSchristos if (n->type == ROFFT_BODY &&
255fec65c98Schristos strcmp(n->prev->child->string, "NAME"))
256f47368cfSchristos return 2;
257fec65c98Schristos }
25831e1f4e3Sjoerg
259fec65c98Schristos /*
260fec65c98Schristos * If we are in a next-line scope for a block head,
261fec65c98Schristos * close it out now and switch to the body,
262fec65c98Schristos * unless the next-line scope is allowed to continue.
263fec65c98Schristos */
26431e1f4e3Sjoerg
265*6167eca2Schristos if (bline == 0 ||
266*6167eca2Schristos (man->flags & MAN_BLINE) == 0 ||
267*6167eca2Schristos man->flags & MAN_ELINE ||
268*6167eca2Schristos man_macro(tok)->flags & MAN_NSCOPED)
269f47368cfSchristos return 1;
270fec65c98Schristos
271fec65c98Schristos man_unscope(man, man->last->parent);
272f47368cfSchristos roff_body_alloc(man, ln, ppos, man->last->tok);
273*6167eca2Schristos man->flags &= ~(MAN_BLINE | ROFF_NONOFILL);
274f47368cfSchristos return 1;
275fec65c98Schristos }
276fec65c98Schristos
277fec65c98Schristos void
man_breakscope(struct roff_man * man,int tok)278f47368cfSchristos man_breakscope(struct roff_man *man, int tok)
279fec65c98Schristos {
280f47368cfSchristos struct roff_node *n;
281fec65c98Schristos
282fec65c98Schristos /*
283fec65c98Schristos * An element next line scope is open,
284fec65c98Schristos * and the new macro is not allowed inside elements.
285fec65c98Schristos * Delete the element that is being broken.
286fec65c98Schristos */
287fec65c98Schristos
28814e7489eSchristos if (man->flags & MAN_ELINE && (tok < MAN_TH ||
289*6167eca2Schristos (man_macro(tok)->flags & MAN_NSCOPED) == 0)) {
290fec65c98Schristos n = man->last;
29114e7489eSchristos if (n->type == ROFFT_TEXT)
29214e7489eSchristos n = n->parent;
29314e7489eSchristos if (n->tok < MAN_TH ||
294*6167eca2Schristos (man_macro(n->tok)->flags & (MAN_NSCOPED | MAN_ESCOPED))
295*6167eca2Schristos == MAN_NSCOPED)
296c0d9444aSjoerg n = n->parent;
297c0d9444aSjoerg
298*6167eca2Schristos mandoc_msg(MANDOCERR_BLK_LINE, n->line, n->pos,
299*6167eca2Schristos "%s breaks %s", roff_name[tok], roff_name[n->tok]);
3004154958bSjoerg
301f47368cfSchristos roff_node_delete(man, n);
30270f041f9Sjoerg man->flags &= ~MAN_ELINE;
3034154958bSjoerg }
3044154958bSjoerg
30531e1f4e3Sjoerg /*
306f47368cfSchristos * Weird special case:
307f47368cfSchristos * Switching fill mode closes section headers.
308f47368cfSchristos */
309f47368cfSchristos
310f47368cfSchristos if (man->flags & MAN_BLINE &&
311*6167eca2Schristos (tok == ROFF_nf || tok == ROFF_fi) &&
312f47368cfSchristos (man->last->tok == MAN_SH || man->last->tok == MAN_SS)) {
313f47368cfSchristos n = man->last;
314f47368cfSchristos man_unscope(man, n);
315f47368cfSchristos roff_body_alloc(man, n->line, n->pos, n->tok);
316*6167eca2Schristos man->flags &= ~(MAN_BLINE | ROFF_NONOFILL);
317f47368cfSchristos }
318f47368cfSchristos
319f47368cfSchristos /*
320fec65c98Schristos * A block header next line scope is open,
321fec65c98Schristos * and the new macro is not allowed inside block headers.
322fec65c98Schristos * Delete the block that is being broken.
323cf816816Sjoerg */
324fec65c98Schristos
325*6167eca2Schristos if (man->flags & MAN_BLINE && tok != ROFF_nf && tok != ROFF_fi &&
326*6167eca2Schristos (tok < MAN_TH || man_macro(tok)->flags & MAN_XSCOPE)) {
32770f041f9Sjoerg n = man->last;
328f47368cfSchristos if (n->type == ROFFT_TEXT)
329fec65c98Schristos n = n->parent;
33014e7489eSchristos if (n->tok < MAN_TH ||
331*6167eca2Schristos (man_macro(n->tok)->flags & MAN_XSCOPE) == 0)
332cf816816Sjoerg n = n->parent;
333cf816816Sjoerg
334f47368cfSchristos assert(n->type == ROFFT_HEAD);
335cf816816Sjoerg n = n->parent;
336f47368cfSchristos assert(n->type == ROFFT_BLOCK);
337*6167eca2Schristos assert(man_macro(n->tok)->flags & MAN_BSCOPED);
338cf816816Sjoerg
339*6167eca2Schristos mandoc_msg(MANDOCERR_BLK_LINE, n->line, n->pos,
340*6167eca2Schristos "%s breaks %s", roff_name[tok], roff_name[n->tok]);
341cf816816Sjoerg
342f47368cfSchristos roff_node_delete(man, n);
343*6167eca2Schristos man->flags &= ~(MAN_BLINE | ROFF_NONOFILL);
344cf816816Sjoerg }
3454154958bSjoerg }
346