xref: /openbsd-src/usr.bin/mandoc/roff_escape.c (revision 700ead54516a6e68d161bf3f7b938e4403f771ff)
1*700ead54Sschwarze /* $OpenBSD: roff_escape.c,v 1.15 2024/05/16 21:21:08 schwarze Exp $ */
2cd14d642Sschwarze /*
3cd14d642Sschwarze  * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022
4cd14d642Sschwarze  *               Ingo Schwarze <schwarze@openbsd.org>
5cd14d642Sschwarze  * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
6cd14d642Sschwarze  *
7cd14d642Sschwarze  * Permission to use, copy, modify, and distribute this software for any
8cd14d642Sschwarze  * purpose with or without fee is hereby granted, provided that the above
9cd14d642Sschwarze  * copyright notice and this permission notice appear in all copies.
10cd14d642Sschwarze  *
11cd14d642Sschwarze  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12cd14d642Sschwarze  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13cd14d642Sschwarze  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14cd14d642Sschwarze  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15cd14d642Sschwarze  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16cd14d642Sschwarze  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17cd14d642Sschwarze  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18cd14d642Sschwarze  *
19cd14d642Sschwarze  * Parser for roff(7) escape sequences.
20cd14d642Sschwarze  * To be used by all mandoc(1) parsers and formatters.
21cd14d642Sschwarze  */
22cd14d642Sschwarze #include <assert.h>
23cd14d642Sschwarze #include <ctype.h>
24cd14d642Sschwarze #include <limits.h>
25cd14d642Sschwarze #include <stdio.h>
26cd14d642Sschwarze #include <string.h>
27cd14d642Sschwarze 
28cd14d642Sschwarze #include "mandoc.h"
29cd14d642Sschwarze #include "roff.h"
30cd14d642Sschwarze #include "roff_int.h"
31cd14d642Sschwarze 
32cd14d642Sschwarze /*
33cd14d642Sschwarze  * Traditional escape sequence interpreter for general use
34cd14d642Sschwarze  * including in high-level formatters.  This function does not issue
35cd14d642Sschwarze  * diagnostics and is not usable for expansion in the roff(7) parser.
36cd14d642Sschwarze  * It is documented in the mandoc_escape(3) manual page.
37cd14d642Sschwarze  */
38cd14d642Sschwarze enum mandoc_esc
mandoc_escape(const char ** rendarg,const char ** rarg,int * rargl)39cd14d642Sschwarze mandoc_escape(const char **rendarg, const char **rarg, int *rargl)
40cd14d642Sschwarze {
41cd14d642Sschwarze         int		 iarg, iendarg, iend;
42cd14d642Sschwarze         enum mandoc_esc  rval;
43cd14d642Sschwarze 
449784ce3eSschwarze         rval = roff_escape(--*rendarg, 0, 0,
459784ce3eSschwarze 	    NULL, NULL, &iarg, &iendarg, &iend);
46cd14d642Sschwarze         assert(rval != ESCAPE_EXPAND);
47cd14d642Sschwarze         if (rarg != NULL)
48cd14d642Sschwarze 	       *rarg = *rendarg + iarg;
49cd14d642Sschwarze         if (rargl != NULL)
50cd14d642Sschwarze 	       *rargl = iendarg - iarg;
51cd14d642Sschwarze         *rendarg += iend;
52cd14d642Sschwarze         return rval;
53cd14d642Sschwarze }
54cd14d642Sschwarze 
55cd14d642Sschwarze /*
56cd14d642Sschwarze  * Full-featured escape sequence parser.
57cd14d642Sschwarze  * If it encounters a nested escape sequence that requires expansion
58cd14d642Sschwarze  * by the parser and re-parsing, the positions of that inner escape
59cd14d642Sschwarze  * sequence are returned in *resc ... *rend.
60cd14d642Sschwarze  * Otherwise, *resc is set to aesc and the positions of the escape
61cd14d642Sschwarze  * sequence starting at aesc are returned.
62e44471f3Sschwarze  * Diagnostic messages are generated if and only if ln != 0,
63cd14d642Sschwarze  * that is, if and only if called by roff_expand().
64cd14d642Sschwarze  */
65cd14d642Sschwarze enum mandoc_esc
roff_escape(const char * buf,const int ln,const int aesc,int * resc,int * rnam,int * rarg,int * rendarg,int * rend)66cd14d642Sschwarze roff_escape(const char *buf, const int ln, const int aesc,
679784ce3eSschwarze     int *resc, int *rnam, int *rarg, int *rendarg, int *rend)
68cd14d642Sschwarze {
69cd14d642Sschwarze 	int		 iesc;		/* index of leading escape char */
709784ce3eSschwarze 	int		 inam;		/* index of escape name */
71cd14d642Sschwarze 	int		 iarg;		/* index beginning the argument */
72cd14d642Sschwarze 	int		 iendarg;	/* index right after the argument */
73cd14d642Sschwarze 	int		 iend;		/* index right after the sequence */
749784ce3eSschwarze 	int		 sesc, snam, sarg, sendarg, send; /* for sub-escape */
75e44471f3Sschwarze 	int		 escterm;	/* whether term is escaped */
76cd14d642Sschwarze 	int		 maxl;		/* expected length of the argument */
77cd14d642Sschwarze 	int		 argl;		/* actual length of the argument */
78cd14d642Sschwarze 	int		 c, i;		/* for \[char...] parsing */
7975a6bad9Sschwarze 	int 		 valid_A;	/* for \A parsing */
80cd14d642Sschwarze 	enum mandoc_esc	 rval;		/* return value */
81e44471f3Sschwarze 	enum mandoc_esc	 stype;		/* for sub-escape */
82cd14d642Sschwarze 	enum mandocerr	 err;		/* diagnostic code */
83cd14d642Sschwarze 	char		 term;		/* byte terminating the argument */
84cd14d642Sschwarze 
85cd14d642Sschwarze 	/*
86cd14d642Sschwarze 	 * Treat "\E" just like "\";
87cd14d642Sschwarze 	 * it only makes a difference in copy mode.
88cd14d642Sschwarze 	 */
89cd14d642Sschwarze 
909784ce3eSschwarze 	iesc = inam = aesc;
91cd14d642Sschwarze 	do {
929784ce3eSschwarze 		inam++;
939784ce3eSschwarze 	} while (buf[inam] == 'E');
94cd14d642Sschwarze 
95cd14d642Sschwarze 	/*
96cd14d642Sschwarze 	 * Sort the following cases first by syntax category,
97cd14d642Sschwarze 	 * then by escape sequence type, and finally by ASCII code.
98cd14d642Sschwarze 	 */
99cd14d642Sschwarze 
1009784ce3eSschwarze 	iarg = iendarg = iend = inam + 1;
101cd14d642Sschwarze 	maxl = INT_MAX;
102cd14d642Sschwarze 	term = '\0';
1037e16d1aeSschwarze 	err = MANDOCERR_OK;
1049784ce3eSschwarze 	switch (buf[inam]) {
105cd14d642Sschwarze 
106cd14d642Sschwarze 	/* Escape sequences taking no arguments at all. */
107cd14d642Sschwarze 
108cd14d642Sschwarze 	case '!':
109cd14d642Sschwarze 	case '?':
110aa5d553dSschwarze 	case 'r':
111cd14d642Sschwarze 		rval = ESCAPE_UNSUPP;
112cd14d642Sschwarze 		goto out;
113cd14d642Sschwarze 
114cd14d642Sschwarze 	case '%':
115cd14d642Sschwarze 	case '&':
116cd14d642Sschwarze 	case ')':
117cd14d642Sschwarze 	case ',':
118cd14d642Sschwarze 	case '/':
119cd14d642Sschwarze 	case '^':
120cd14d642Sschwarze 	case 'a':
121cd14d642Sschwarze 	case 'd':
122cd14d642Sschwarze 	case 't':
123cd14d642Sschwarze 	case 'u':
124cd14d642Sschwarze 	case '{':
125cd14d642Sschwarze 	case '|':
126cd14d642Sschwarze 	case '}':
127cd14d642Sschwarze 		rval = ESCAPE_IGNORE;
128cd14d642Sschwarze 		goto out;
129cd14d642Sschwarze 
130f7c16774Sschwarze 	case '\0':
131f7c16774Sschwarze 		iendarg = --iend;
132f7c16774Sschwarze 		/* FALLTHROUGH */
1330ecccf00Sschwarze 	case '.':
134cd14d642Sschwarze 	case '\\':
135cd14d642Sschwarze 	default:
136cd14d642Sschwarze 		iarg--;
137cd14d642Sschwarze 		rval = ESCAPE_UNDEF;
138cd14d642Sschwarze 		goto out;
139cd14d642Sschwarze 
140cd14d642Sschwarze 	case ' ':
141cd14d642Sschwarze 	case '\'':
142cd14d642Sschwarze 	case '-':
143cd14d642Sschwarze 	case '0':
144cd14d642Sschwarze 	case ':':
145cd14d642Sschwarze 	case '_':
146cd14d642Sschwarze 	case '`':
147cd14d642Sschwarze 	case 'e':
148cd14d642Sschwarze 	case '~':
149cd14d642Sschwarze 		iarg--;
150cd14d642Sschwarze 		argl = 1;
151cd14d642Sschwarze 		rval = ESCAPE_SPECIAL;
152cd14d642Sschwarze 		goto out;
153cd14d642Sschwarze 	case 'p':
154cd14d642Sschwarze 		rval = ESCAPE_BREAK;
155cd14d642Sschwarze 		goto out;
156cd14d642Sschwarze 	case 'c':
157cd14d642Sschwarze 		rval = ESCAPE_NOSPACE;
158cd14d642Sschwarze 		goto out;
159cd14d642Sschwarze 	case 'z':
160cd14d642Sschwarze 		rval = ESCAPE_SKIPCHAR;
161cd14d642Sschwarze 		goto out;
162cd14d642Sschwarze 
163cd14d642Sschwarze 	/* Standard argument format. */
164cd14d642Sschwarze 
165cd14d642Sschwarze 	case '$':
166cd14d642Sschwarze 	case '*':
16783a9dfe1Sschwarze 	case 'V':
1686f49ebc4Sschwarze 	case 'g':
169cd14d642Sschwarze 	case 'n':
170cd14d642Sschwarze 		rval = ESCAPE_EXPAND;
171cd14d642Sschwarze 		break;
172cd14d642Sschwarze 	case 'F':
173cd14d642Sschwarze 	case 'M':
174cd14d642Sschwarze 	case 'O':
175cd14d642Sschwarze 	case 'Y':
176cd14d642Sschwarze 	case 'k':
177cd14d642Sschwarze 	case 'm':
178cd14d642Sschwarze 		rval = ESCAPE_IGNORE;
179cd14d642Sschwarze 		break;
180cd14d642Sschwarze 	case '(':
181cd14d642Sschwarze 	case '[':
182cd14d642Sschwarze 		rval = ESCAPE_SPECIAL;
183cd14d642Sschwarze 		iendarg = iend = --iarg;
184cd14d642Sschwarze 		break;
185cd14d642Sschwarze 	case 'f':
186cd14d642Sschwarze 		rval = ESCAPE_FONT;
187cd14d642Sschwarze 		break;
188cd14d642Sschwarze 
189cd14d642Sschwarze 	/* Quoted arguments */
190cd14d642Sschwarze 
19175a6bad9Sschwarze 	case 'A':
192cd14d642Sschwarze 	case 'B':
193cd14d642Sschwarze 	case 'w':
194cd14d642Sschwarze 		rval = ESCAPE_EXPAND;
195cd14d642Sschwarze 		term = '\b';
196cd14d642Sschwarze 		break;
197cd14d642Sschwarze 	case 'D':
198cd14d642Sschwarze 	case 'H':
199cd14d642Sschwarze 	case 'L':
200cd14d642Sschwarze 	case 'R':
201cd14d642Sschwarze 	case 'S':
202cd14d642Sschwarze 	case 'X':
203cd14d642Sschwarze 	case 'Z':
204cd14d642Sschwarze 	case 'b':
205cd14d642Sschwarze 	case 'v':
206cd14d642Sschwarze 	case 'x':
207cd14d642Sschwarze 		rval = ESCAPE_IGNORE;
208cd14d642Sschwarze 		term = '\b';
209cd14d642Sschwarze 		break;
210cd14d642Sschwarze 	case 'C':
211cd14d642Sschwarze 		rval = ESCAPE_SPECIAL;
212cd14d642Sschwarze 		term = '\b';
213cd14d642Sschwarze 		break;
214cd14d642Sschwarze 	case 'N':
215cd14d642Sschwarze 		rval = ESCAPE_NUMBERED;
216cd14d642Sschwarze 		term = '\b';
217cd14d642Sschwarze 		break;
218cd14d642Sschwarze 	case 'h':
219cd14d642Sschwarze 		rval = ESCAPE_HORIZ;
220cd14d642Sschwarze 		term = '\b';
221cd14d642Sschwarze 		break;
222cd14d642Sschwarze 	case 'l':
223cd14d642Sschwarze 		rval = ESCAPE_HLINE;
224cd14d642Sschwarze 		term = '\b';
225cd14d642Sschwarze 		break;
226cd14d642Sschwarze 	case 'o':
227cd14d642Sschwarze 		rval = ESCAPE_OVERSTRIKE;
228cd14d642Sschwarze 		term = '\b';
229cd14d642Sschwarze 		break;
230cd14d642Sschwarze 
231cd14d642Sschwarze 	/* Sizes support both forms, with additional peculiarities. */
232cd14d642Sschwarze 
233cd14d642Sschwarze 	case 's':
234cd14d642Sschwarze 		rval = ESCAPE_IGNORE;
235cd14d642Sschwarze 		if (buf[iarg] == '+' || buf[iarg] == '-'||
236cd14d642Sschwarze 		    buf[iarg] == ASCII_HYPH)
237cd14d642Sschwarze 			iarg++;
238cd14d642Sschwarze 		switch (buf[iarg]) {
239cd14d642Sschwarze 		case '(':
240cd14d642Sschwarze 			maxl = 2;
241cd14d642Sschwarze 			iarg++;
242cd14d642Sschwarze 			break;
243cd14d642Sschwarze 		case '[':
244cd14d642Sschwarze 			term = ']';
245cd14d642Sschwarze 			iarg++;
246cd14d642Sschwarze 			break;
247cd14d642Sschwarze 		case '\'':
248cd14d642Sschwarze 			term = '\'';
249cd14d642Sschwarze 			iarg++;
250cd14d642Sschwarze 			break;
251cd14d642Sschwarze 		case '1':
252cd14d642Sschwarze 		case '2':
253cd14d642Sschwarze 		case '3':
254cd14d642Sschwarze 			if (buf[iarg - 1] == 's' &&
255cd14d642Sschwarze 			    isdigit((unsigned char)buf[iarg + 1])) {
256cd14d642Sschwarze 				maxl = 2;
257cd14d642Sschwarze 				break;
258cd14d642Sschwarze 			}
259cd14d642Sschwarze 			/* FALLTHROUGH */
260cd14d642Sschwarze 		default:
261cd14d642Sschwarze 			maxl = 1;
262cd14d642Sschwarze 			break;
263cd14d642Sschwarze 		}
264cd14d642Sschwarze 		iendarg = iend = iarg;
265cd14d642Sschwarze 	}
266cd14d642Sschwarze 
267cd14d642Sschwarze 	/* Decide how to end the argument. */
268cd14d642Sschwarze 
269e44471f3Sschwarze 	escterm = 0;
270e44471f3Sschwarze 	stype = ESCAPE_EXPAND;
271cd14d642Sschwarze 	if ((term == '\b' || (term == '\0' && maxl == INT_MAX)) &&
272e44471f3Sschwarze 	    buf[iarg] == buf[iesc]) {
273e44471f3Sschwarze 		stype = roff_escape(buf, ln, iendarg,
274e44471f3Sschwarze 		    &sesc, &snam, &sarg, &sendarg, &send);
275e44471f3Sschwarze 		if (stype == ESCAPE_EXPAND)
276cd14d642Sschwarze 			goto out_sub;
277e44471f3Sschwarze 	}
278cd14d642Sschwarze 
279cd14d642Sschwarze 	if (term == '\b') {
280e44471f3Sschwarze 		if (stype == ESCAPE_UNDEF)
281e44471f3Sschwarze 			iarg++;
282e44471f3Sschwarze 		if (stype != ESCAPE_EXPAND && stype != ESCAPE_UNDEF) {
283e44471f3Sschwarze 			if (strchr("BHLRSNhlvx", buf[inam]) != NULL &&
284e44471f3Sschwarze 			    strchr(" ,.0DLOXYZ^abdhlortuvx|~",
285e44471f3Sschwarze 			    buf[snam]) != NULL) {
286e44471f3Sschwarze 				err = MANDOCERR_ESC_DELIM;
287e44471f3Sschwarze 				iend = send;
288e44471f3Sschwarze 				iarg = iendarg = sesc;
289e44471f3Sschwarze 				goto out;
290e44471f3Sschwarze 			}
291e44471f3Sschwarze 			escterm = 1;
292e44471f3Sschwarze 			iarg = send;
293e44471f3Sschwarze 			term = buf[snam];
294e44471f3Sschwarze 		} else if (strchr("BDHLRSvxNhl", buf[inam]) != NULL &&
295d91756bbSschwarze 		    strchr(" %&()*+-./0123456789:<=>", buf[iarg]) != NULL) {
29639be06a3Sschwarze 			err = MANDOCERR_ESC_DELIM;
297d91756bbSschwarze 			if (rval != ESCAPE_EXPAND)
298cd14d642Sschwarze 				rval = ESCAPE_ERROR;
299d91756bbSschwarze 			if (buf[inam] != 'D') {
300d91756bbSschwarze 				iendarg = iend = iarg + 1;
301cd14d642Sschwarze 				goto out;
302cd14d642Sschwarze 			}
303d91756bbSschwarze 		}
304e44471f3Sschwarze 		if (term == '\b')
305cd14d642Sschwarze 			term = buf[iarg++];
306cd14d642Sschwarze 	} else if (term == '\0' && maxl == INT_MAX) {
3079784ce3eSschwarze 		if (buf[inam] == 'n' && (buf[iarg] == '+' || buf[iarg] == '-'))
308cd14d642Sschwarze 			iarg++;
309cd14d642Sschwarze 		switch (buf[iarg]) {
310cd14d642Sschwarze 		case '(':
311cd14d642Sschwarze 			maxl = 2;
312cd14d642Sschwarze 			iarg++;
313cd14d642Sschwarze 			break;
314cd14d642Sschwarze 		case '[':
315cd14d642Sschwarze 			if (buf[++iarg] == ' ') {
316cd14d642Sschwarze 				iendarg = iend = iarg + 1;
31739be06a3Sschwarze 				err = MANDOCERR_ESC_ARG;
318cd14d642Sschwarze 				rval = ESCAPE_ERROR;
319cd14d642Sschwarze 				goto out;
320cd14d642Sschwarze 			}
321cd14d642Sschwarze 			term = ']';
322cd14d642Sschwarze 			break;
323cd14d642Sschwarze 		default:
324cd14d642Sschwarze 			maxl = 1;
325cd14d642Sschwarze 			break;
326cd14d642Sschwarze 		}
327cd14d642Sschwarze 	}
328cd14d642Sschwarze 
329cd14d642Sschwarze 	/* Advance to the end of the argument. */
330cd14d642Sschwarze 
33175a6bad9Sschwarze 	valid_A = 1;
332cd14d642Sschwarze 	iendarg = iarg;
333cd14d642Sschwarze 	while (maxl > 0) {
334cd14d642Sschwarze 		if (buf[iendarg] == '\0') {
335f8e3b479Sschwarze 			err = MANDOCERR_ESC_INCOMPLETE;
336e44471f3Sschwarze 			if (rval != ESCAPE_EXPAND &&
337e44471f3Sschwarze 			    rval != ESCAPE_OVERSTRIKE)
338f8e3b479Sschwarze 				rval = ESCAPE_ERROR;
339e44471f3Sschwarze 			/* Usually, ignore an incomplete argument. */
340e44471f3Sschwarze 			if (strchr("Aow", buf[inam]) == NULL)
341cd14d642Sschwarze 				iendarg = iarg;
342cd14d642Sschwarze 			break;
343cd14d642Sschwarze 		}
344e44471f3Sschwarze 		if (escterm == 0 && buf[iendarg] == term) {
345cd14d642Sschwarze 			iend = iendarg + 1;
346cd14d642Sschwarze 			break;
347cd14d642Sschwarze 		}
348cd14d642Sschwarze 		if (buf[iendarg] == buf[iesc]) {
349e44471f3Sschwarze 			stype = roff_escape(buf, ln, iendarg,
350e44471f3Sschwarze 			    &sesc, &snam, &sarg, &sendarg, &send);
351e44471f3Sschwarze 			if (stype == ESCAPE_EXPAND)
352cd14d642Sschwarze 				goto out_sub;
353e44471f3Sschwarze 			iend = send;
354e44471f3Sschwarze 			if (escterm == 1 &&
355e44471f3Sschwarze 			    (buf[snam] == term || buf[inam] == 'N'))
35675a6bad9Sschwarze 				break;
357e44471f3Sschwarze 			if (stype != ESCAPE_UNDEF)
35875a6bad9Sschwarze 				valid_A = 0;
359e44471f3Sschwarze 			iendarg = send;
360e44471f3Sschwarze 		} else if (buf[inam] == 'N' &&
361e44471f3Sschwarze 		    isdigit((unsigned char)buf[iendarg]) == 0) {
362e44471f3Sschwarze 			iend = iendarg + 1;
36375a6bad9Sschwarze 			break;
364cd14d642Sschwarze 		} else {
36575a6bad9Sschwarze 			if (buf[iendarg] == ' ' || buf[iendarg] == '\t')
36675a6bad9Sschwarze 				valid_A = 0;
367cd14d642Sschwarze 			if (maxl != INT_MAX)
368cd14d642Sschwarze 				maxl--;
369cd14d642Sschwarze 			iend = ++iendarg;
370cd14d642Sschwarze 		}
371cd14d642Sschwarze 	}
372cd14d642Sschwarze 
373cd14d642Sschwarze 	/* Post-process depending on the content of the argument. */
374cd14d642Sschwarze 
375cd14d642Sschwarze 	argl = iendarg - iarg;
3769784ce3eSschwarze 	switch (buf[inam]) {
377cd14d642Sschwarze 	case '*':
378cd14d642Sschwarze 		if (resc == NULL && argl == 2 &&
379cd14d642Sschwarze 		    buf[iarg] == '.' && buf[iarg + 1] == 'T')
380cd14d642Sschwarze 			rval = ESCAPE_DEVICE;
381cd14d642Sschwarze 		break;
38275a6bad9Sschwarze 	case 'A':
38375a6bad9Sschwarze 		if (valid_A == 0)
38475a6bad9Sschwarze 			iendarg = iarg;
38575a6bad9Sschwarze 		break;
386cd14d642Sschwarze 	case 'O':
387cd14d642Sschwarze 		switch (buf[iarg]) {
388cd14d642Sschwarze 		case '0':
389cd14d642Sschwarze 			rval = ESCAPE_UNSUPP;
390cd14d642Sschwarze 			break;
391cd14d642Sschwarze 		case '1':
392cd14d642Sschwarze 		case '2':
393cd14d642Sschwarze 		case '3':
394cd14d642Sschwarze 		case '4':
39539be06a3Sschwarze 			if (argl == 1)
39639be06a3Sschwarze 				rval = ESCAPE_IGNORE;
39739be06a3Sschwarze 			else {
39839be06a3Sschwarze 				err = MANDOCERR_ESC_ARG;
39939be06a3Sschwarze 				rval = ESCAPE_ERROR;
40039be06a3Sschwarze 			}
401cd14d642Sschwarze 			break;
402cd14d642Sschwarze 		case '5':
40339be06a3Sschwarze 			if (buf[iarg - 1] == '[')
40439be06a3Sschwarze 				rval = ESCAPE_UNSUPP;
40539be06a3Sschwarze 			else {
40639be06a3Sschwarze 				err = MANDOCERR_ESC_ARG;
40739be06a3Sschwarze 				rval = ESCAPE_ERROR;
40839be06a3Sschwarze 			}
409cd14d642Sschwarze 			break;
410cd14d642Sschwarze 		default:
41139be06a3Sschwarze 			err = MANDOCERR_ESC_ARG;
412cd14d642Sschwarze 			rval = ESCAPE_ERROR;
413cd14d642Sschwarze 			break;
414cd14d642Sschwarze 		}
415cd14d642Sschwarze 		break;
416cd14d642Sschwarze 	default:
417cd14d642Sschwarze 		break;
418cd14d642Sschwarze 	}
419cd14d642Sschwarze 
420cd14d642Sschwarze 	switch (rval) {
421cd14d642Sschwarze 	case ESCAPE_FONT:
422cd14d642Sschwarze 		rval = mandoc_font(buf + iarg, argl);
42339be06a3Sschwarze 		if (rval == ESCAPE_ERROR)
42439be06a3Sschwarze 			err = MANDOCERR_ESC_ARG;
425cd14d642Sschwarze 		break;
426cd14d642Sschwarze 
427cd14d642Sschwarze 	case ESCAPE_SPECIAL:
4287711a225Sschwarze 		if (argl == 0) {
4297711a225Sschwarze 			err = MANDOCERR_ESC_BADCHAR;
4307711a225Sschwarze 			rval = ESCAPE_ERROR;
4317711a225Sschwarze 			break;
4327711a225Sschwarze 		}
433cd14d642Sschwarze 
434cd14d642Sschwarze 		/*
435cd14d642Sschwarze 		 * The file chars.c only provides one common list of
436cd14d642Sschwarze 		 * character names, but \[-] == \- is the only one of
437cd14d642Sschwarze 		 * the characters with one-byte names that allows
438cd14d642Sschwarze 		 * enclosing the name in brackets.
439cd14d642Sschwarze 		 */
440cd14d642Sschwarze 
441cd14d642Sschwarze 		if (term != '\0' && argl == 1 && buf[iarg] != '-') {
442f8e3b479Sschwarze 			err = MANDOCERR_ESC_BADCHAR;
443cd14d642Sschwarze 			rval = ESCAPE_ERROR;
444cd14d642Sschwarze 			break;
445cd14d642Sschwarze 		}
446cd14d642Sschwarze 
447cd14d642Sschwarze 		/* Treat \[char...] as an alias for \N'...'. */
448cd14d642Sschwarze 
449cd14d642Sschwarze 		if (buf[iarg] == 'c') {
450cd14d642Sschwarze 			if (argl < 6 || argl > 7 ||
451cd14d642Sschwarze 			    strncmp(buf + iarg, "char", 4) != 0 ||
452cd14d642Sschwarze 			    (int)strspn(buf + iarg + 4, "0123456789")
453cd14d642Sschwarze 			     + 4 < argl)
454cd14d642Sschwarze 				break;
455cd14d642Sschwarze 			c = 0;
456cd14d642Sschwarze 			for (i = iarg; i < iendarg; i++)
457cd14d642Sschwarze 				c = 10 * c + (buf[i] - '0');
458f8e3b479Sschwarze 			if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) {
459f8e3b479Sschwarze 				err = MANDOCERR_ESC_BADCHAR;
460cd14d642Sschwarze 				break;
461f8e3b479Sschwarze 			}
462cd14d642Sschwarze 			iarg += 4;
463cd14d642Sschwarze 			rval = ESCAPE_NUMBERED;
464cd14d642Sschwarze 			break;
465cd14d642Sschwarze 		}
466cd14d642Sschwarze 
467cd14d642Sschwarze 		/*
468cd14d642Sschwarze 		 * Unicode escapes are defined in groff as \[u0000]
469cd14d642Sschwarze 		 * to \[u10FFFF], where the contained value must be
470*700ead54Sschwarze 		 * a valid Unicode codepoint.
471cd14d642Sschwarze 		 */
472cd14d642Sschwarze 
473cd14d642Sschwarze 		if (buf[iarg] != 'u' || argl < 5 || argl > 7)
474cd14d642Sschwarze 			break;
475*700ead54Sschwarze 		if (argl == 7 &&  /* beyond the Unicode range */
476f8e3b479Sschwarze 		    (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) {
477f8e3b479Sschwarze 			err = MANDOCERR_ESC_BADCHAR;
478cd14d642Sschwarze 			break;
479f8e3b479Sschwarze 		}
480f8e3b479Sschwarze 		if (argl == 6 && buf[iarg + 1] == '0') {
481f8e3b479Sschwarze 			err = MANDOCERR_ESC_BADCHAR;
482cd14d642Sschwarze 			break;
483f8e3b479Sschwarze 		}
484*700ead54Sschwarze 		if (argl == 5 &&  /* UTF-16 surrogate */
485*700ead54Sschwarze 		    toupper((unsigned char)buf[iarg + 1]) == 'D' &&
486*700ead54Sschwarze 		    strchr("89ABCDEFabcdef", buf[iarg + 2]) != NULL) {
487f8e3b479Sschwarze 			err = MANDOCERR_ESC_BADCHAR;
488cd14d642Sschwarze 			break;
489f8e3b479Sschwarze 		}
490cd14d642Sschwarze 		if ((int)strspn(buf + iarg + 1, "0123456789ABCDEFabcdef")
491cd14d642Sschwarze 		    + 1 == argl)
492cd14d642Sschwarze 			rval = ESCAPE_UNICODE;
493cd14d642Sschwarze 		break;
494cd14d642Sschwarze 	default:
495cd14d642Sschwarze 		break;
496cd14d642Sschwarze 	}
497cd14d642Sschwarze 	goto out;
498cd14d642Sschwarze 
499cd14d642Sschwarze out_sub:
500cd14d642Sschwarze 	iesc = sesc;
5019784ce3eSschwarze 	inam = snam;
502cd14d642Sschwarze 	iarg = sarg;
503cd14d642Sschwarze 	iendarg = sendarg;
504cd14d642Sschwarze 	iend = send;
505cd14d642Sschwarze 	rval = ESCAPE_EXPAND;
506cd14d642Sschwarze 
507cd14d642Sschwarze out:
508e44471f3Sschwarze 	if (resc != NULL)
509e44471f3Sschwarze 		*resc = iesc;
5109784ce3eSschwarze 	if (rnam != NULL)
5119784ce3eSschwarze 		*rnam = inam;
512cd14d642Sschwarze 	if (rarg != NULL)
513cd14d642Sschwarze 		*rarg = iarg;
514cd14d642Sschwarze 	if (rendarg != NULL)
515cd14d642Sschwarze 		*rendarg = iendarg;
516cd14d642Sschwarze 	if (rend != NULL)
517cd14d642Sschwarze 		*rend = iend;
518e44471f3Sschwarze 	if (ln == 0)
519cd14d642Sschwarze 		return rval;
520cd14d642Sschwarze 
521cd14d642Sschwarze 	/*
522cd14d642Sschwarze 	 * Diagnostic messages are only issued when called
523cd14d642Sschwarze 	 * from the parser, not when called from the formatters.
524cd14d642Sschwarze 	 */
525cd14d642Sschwarze 
526cd14d642Sschwarze 	switch (rval) {
527cd14d642Sschwarze 	case ESCAPE_UNSUPP:
528cd14d642Sschwarze 		err = MANDOCERR_ESC_UNSUPP;
529cd14d642Sschwarze 		break;
530cd14d642Sschwarze 	case ESCAPE_UNDEF:
5317e16d1aeSschwarze 		if (buf[inam] != '\\' && buf[inam] != '.')
532cd14d642Sschwarze 			err = MANDOCERR_ESC_UNDEF;
533cd14d642Sschwarze 		break;
534cd14d642Sschwarze 	case ESCAPE_SPECIAL:
535f8e3b479Sschwarze 		if (mchars_spec2cp(buf + iarg, argl) >= 0)
536f8e3b479Sschwarze 			err = MANDOCERR_OK;
537f8e3b479Sschwarze 		else if (err == MANDOCERR_OK)
538f8e3b479Sschwarze 			err = MANDOCERR_ESC_UNKCHAR;
539cd14d642Sschwarze 		break;
540cd14d642Sschwarze 	default:
5417e16d1aeSschwarze 		break;
542cd14d642Sschwarze 	}
5437e16d1aeSschwarze 	if (err != MANDOCERR_OK)
544cd14d642Sschwarze 		mandoc_msg(err, ln, iesc, "%.*s", iend - iesc, buf + iesc);
545cd14d642Sschwarze 	return rval;
546cd14d642Sschwarze }
547