xref: /netbsd-src/external/bsd/mdocml/dist/mandoc.c (revision 544c191c349c1704c9d5e679d12ec15cff579663)
1*544c191cSchristos /*	Id: mandoc.c,v 1.114 2018/12/30 00:49:55 schwarze Exp  */
24154958bSjoerg /*
3fec65c98Schristos  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4c9bcef03Schristos  * Copyright (c) 2011-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
54154958bSjoerg  *
64154958bSjoerg  * Permission to use, copy, modify, and distribute this software for any
74154958bSjoerg  * purpose with or without fee is hereby granted, provided that the above
84154958bSjoerg  * copyright notice and this permission notice appear in all copies.
94154958bSjoerg  *
10c0d9444aSjoerg  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
114154958bSjoerg  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12c0d9444aSjoerg  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
134154958bSjoerg  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
144154958bSjoerg  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
154154958bSjoerg  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
164154958bSjoerg  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
174154958bSjoerg  */
18d5e63c8dSjoerg #include "config.h"
193514411fSjoerg 
204154958bSjoerg #include <sys/types.h>
214154958bSjoerg 
224154958bSjoerg #include <assert.h>
234154958bSjoerg #include <ctype.h>
24c5f73b34Sjoerg #include <errno.h>
25c5f73b34Sjoerg #include <limits.h>
264154958bSjoerg #include <stdlib.h>
273514411fSjoerg #include <stdio.h>
283514411fSjoerg #include <string.h>
293514411fSjoerg #include <time.h>
304154958bSjoerg 
31fec65c98Schristos #include "mandoc_aux.h"
32c9bcef03Schristos #include "mandoc.h"
33c9bcef03Schristos #include "roff.h"
344154958bSjoerg #include "libmandoc.h"
35*544c191cSchristos #include "roff_int.h"
364154958bSjoerg 
3748741257Sjoerg static	int	 a2time(time_t *, const char *, const char *);
3848741257Sjoerg static	char	*time2a(time_t);
393514411fSjoerg 
40c5f73b34Sjoerg 
41c5f73b34Sjoerg enum mandoc_esc
mandoc_font(const char * cp,int sz)42*544c191cSchristos mandoc_font(const char *cp, int sz)
43*544c191cSchristos {
44*544c191cSchristos 	switch (sz) {
45*544c191cSchristos 	case 0:
46*544c191cSchristos 		return ESCAPE_FONTPREV;
47*544c191cSchristos 	case 1:
48*544c191cSchristos 		switch (cp[0]) {
49*544c191cSchristos 		case 'B':
50*544c191cSchristos 		case '3':
51*544c191cSchristos 			return ESCAPE_FONTBOLD;
52*544c191cSchristos 		case 'I':
53*544c191cSchristos 		case '2':
54*544c191cSchristos 			return ESCAPE_FONTITALIC;
55*544c191cSchristos 		case 'P':
56*544c191cSchristos 			return ESCAPE_FONTPREV;
57*544c191cSchristos 		case 'R':
58*544c191cSchristos 		case '1':
59*544c191cSchristos 			return ESCAPE_FONTROMAN;
60*544c191cSchristos 		case '4':
61*544c191cSchristos 			return ESCAPE_FONTBI;
62*544c191cSchristos 		default:
63*544c191cSchristos 			return ESCAPE_ERROR;
64*544c191cSchristos 		}
65*544c191cSchristos 	case 2:
66*544c191cSchristos 		switch (cp[0]) {
67*544c191cSchristos 		case 'B':
68*544c191cSchristos 			switch (cp[1]) {
69*544c191cSchristos 			case 'I':
70*544c191cSchristos 				return ESCAPE_FONTBI;
71*544c191cSchristos 			default:
72*544c191cSchristos 				return ESCAPE_ERROR;
73*544c191cSchristos 			}
74*544c191cSchristos 		case 'C':
75*544c191cSchristos 			switch (cp[1]) {
76*544c191cSchristos 			case 'B':
77*544c191cSchristos 				return ESCAPE_FONTBOLD;
78*544c191cSchristos 			case 'I':
79*544c191cSchristos 				return ESCAPE_FONTITALIC;
80*544c191cSchristos 			case 'R':
81*544c191cSchristos 			case 'W':
82*544c191cSchristos 				return ESCAPE_FONTCW;
83*544c191cSchristos 			default:
84*544c191cSchristos 				return ESCAPE_ERROR;
85*544c191cSchristos 			}
86*544c191cSchristos 		default:
87*544c191cSchristos 			return ESCAPE_ERROR;
88*544c191cSchristos 		}
89*544c191cSchristos 	default:
90*544c191cSchristos 		return ESCAPE_ERROR;
91*544c191cSchristos 	}
92*544c191cSchristos }
93*544c191cSchristos 
94*544c191cSchristos enum mandoc_esc
mandoc_escape(const char ** end,const char ** start,int * sz)95c5f73b34Sjoerg mandoc_escape(const char **end, const char **start, int *sz)
96c5f73b34Sjoerg {
9770f041f9Sjoerg 	const char	*local_start;
98*544c191cSchristos 	int		 local_sz, c, i;
9970f041f9Sjoerg 	char		 term;
100c5f73b34Sjoerg 	enum mandoc_esc	 gly;
101c5f73b34Sjoerg 
10270f041f9Sjoerg 	/*
10370f041f9Sjoerg 	 * When the caller doesn't provide return storage,
10470f041f9Sjoerg 	 * use local storage.
10570f041f9Sjoerg 	 */
106c5f73b34Sjoerg 
10770f041f9Sjoerg 	if (NULL == start)
10870f041f9Sjoerg 		start = &local_start;
10970f041f9Sjoerg 	if (NULL == sz)
11070f041f9Sjoerg 		sz = &local_sz;
11170f041f9Sjoerg 
11270f041f9Sjoerg 	/*
113*544c191cSchristos 	 * Treat "\E" just like "\";
114*544c191cSchristos 	 * it only makes a difference in copy mode.
115*544c191cSchristos 	 */
116*544c191cSchristos 
117*544c191cSchristos 	if (**end == 'E')
118*544c191cSchristos 		++*end;
119*544c191cSchristos 
120*544c191cSchristos 	/*
12170f041f9Sjoerg 	 * Beyond the backslash, at least one input character
12270f041f9Sjoerg 	 * is part of the escape sequence.  With one exception
12370f041f9Sjoerg 	 * (see below), that character won't be returned.
12470f041f9Sjoerg 	 */
12570f041f9Sjoerg 
12670f041f9Sjoerg 	gly = ESCAPE_ERROR;
12770f041f9Sjoerg 	*start = ++*end;
12870f041f9Sjoerg 	*sz = 0;
12970f041f9Sjoerg 	term = '\0';
13070f041f9Sjoerg 
13170f041f9Sjoerg 	switch ((*start)[-1]) {
132c5f73b34Sjoerg 	/*
133c5f73b34Sjoerg 	 * First the glyphs.  There are several different forms of
134c5f73b34Sjoerg 	 * these, but each eventually returns a substring of the glyph
135c5f73b34Sjoerg 	 * name.
136c5f73b34Sjoerg 	 */
137fec65c98Schristos 	case '(':
138c5f73b34Sjoerg 		gly = ESCAPE_SPECIAL;
13970f041f9Sjoerg 		*sz = 2;
1407da9b934Sjoerg 		break;
141fec65c98Schristos 	case '[':
142*544c191cSchristos 		if (**start == ' ') {
143*544c191cSchristos 			++*end;
144*544c191cSchristos 			return ESCAPE_ERROR;
145*544c191cSchristos 		}
146c5f73b34Sjoerg 		gly = ESCAPE_SPECIAL;
1477da9b934Sjoerg 		term = ']';
1487da9b934Sjoerg 		break;
149fec65c98Schristos 	case 'C':
15070f041f9Sjoerg 		if ('\'' != **start)
1519ff1f2acSchristos 			return ESCAPE_ERROR;
15270f041f9Sjoerg 		*start = ++*end;
153c5f73b34Sjoerg 		gly = ESCAPE_SPECIAL;
1547da9b934Sjoerg 		term = '\'';
1557da9b934Sjoerg 		break;
156c5f73b34Sjoerg 
157c5f73b34Sjoerg 	/*
15870f041f9Sjoerg 	 * Escapes taking no arguments at all.
15970f041f9Sjoerg 	 */
160*544c191cSchristos 	case '!':
161*544c191cSchristos 	case '?':
162*544c191cSchristos 		return ESCAPE_UNSUPP;
163*544c191cSchristos 	case '%':
164*544c191cSchristos 	case '&':
165*544c191cSchristos 	case ')':
1669ff1f2acSchristos 	case ',':
1679ff1f2acSchristos 	case '/':
168*544c191cSchristos 	case '^':
169*544c191cSchristos 	case 'a':
170*544c191cSchristos 	case 'd':
171*544c191cSchristos 	case 'r':
172*544c191cSchristos 	case 't':
173*544c191cSchristos 	case 'u':
174*544c191cSchristos 	case '{':
175*544c191cSchristos 	case '|':
176*544c191cSchristos 	case '}':
1779ff1f2acSchristos 		return ESCAPE_IGNORE;
178*544c191cSchristos 	case 'c':
179*544c191cSchristos 		return ESCAPE_NOSPACE;
180c9bcef03Schristos 	case 'p':
181c9bcef03Schristos 		return ESCAPE_BREAK;
18270f041f9Sjoerg 
18370f041f9Sjoerg 	/*
18470f041f9Sjoerg 	 * The \z escape is supposed to output the following
18570f041f9Sjoerg 	 * character without advancing the cursor position.
18670f041f9Sjoerg 	 * Since we are mostly dealing with terminal mode,
18770f041f9Sjoerg 	 * let us just skip the next character.
18870f041f9Sjoerg 	 */
189fec65c98Schristos 	case 'z':
1909ff1f2acSchristos 		return ESCAPE_SKIPCHAR;
19170f041f9Sjoerg 
19270f041f9Sjoerg 	/*
193c5f73b34Sjoerg 	 * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
194c5f73b34Sjoerg 	 * 'X' is the trigger.  These have opaque sub-strings.
195c5f73b34Sjoerg 	 */
196fec65c98Schristos 	case 'F':
197*544c191cSchristos 	case 'f':
198fec65c98Schristos 	case 'g':
199fec65c98Schristos 	case 'k':
200fec65c98Schristos 	case 'M':
201fec65c98Schristos 	case 'm':
202fec65c98Schristos 	case 'n':
203*544c191cSchristos 	case 'O':
204fec65c98Schristos 	case 'V':
205fec65c98Schristos 	case 'Y':
206*544c191cSchristos 		gly = (*start)[-1] == 'f' ? ESCAPE_FONT : ESCAPE_IGNORE;
20770f041f9Sjoerg 		switch (**start) {
208fec65c98Schristos 		case '(':
209*544c191cSchristos 			if ((*start)[-1] == 'O')
210*544c191cSchristos 				gly = ESCAPE_ERROR;
21170f041f9Sjoerg 			*start = ++*end;
21270f041f9Sjoerg 			*sz = 2;
2134154958bSjoerg 			break;
214fec65c98Schristos 		case '[':
215*544c191cSchristos 			if ((*start)[-1] == 'O')
216*544c191cSchristos 				gly = (*start)[1] == '5' ?
217*544c191cSchristos 				    ESCAPE_UNSUPP : ESCAPE_ERROR;
21870f041f9Sjoerg 			*start = ++*end;
2197da9b934Sjoerg 			term = ']';
2204154958bSjoerg 			break;
2214154958bSjoerg 		default:
222*544c191cSchristos 			if ((*start)[-1] == 'O') {
223*544c191cSchristos 				switch (**start) {
224*544c191cSchristos 				case '0':
225*544c191cSchristos 					gly = ESCAPE_UNSUPP;
226*544c191cSchristos 					break;
227*544c191cSchristos 				case '1':
228*544c191cSchristos 				case '2':
229*544c191cSchristos 				case '3':
230*544c191cSchristos 				case '4':
231*544c191cSchristos 					break;
232*544c191cSchristos 				default:
233*544c191cSchristos 					gly = ESCAPE_ERROR;
234*544c191cSchristos 					break;
235*544c191cSchristos 				}
236*544c191cSchristos 			}
23770f041f9Sjoerg 			*sz = 1;
2387da9b934Sjoerg 			break;
2397da9b934Sjoerg 		}
2407da9b934Sjoerg 		break;
241*544c191cSchristos 	case '*':
242*544c191cSchristos 		if (strncmp(*start, "(.T", 3) != 0)
243*544c191cSchristos 			abort();
244*544c191cSchristos 		gly = ESCAPE_DEVICE;
245*544c191cSchristos 		*start = ++*end;
246*544c191cSchristos 		*sz = 2;
247*544c191cSchristos 		break;
248c5f73b34Sjoerg 
249c5f73b34Sjoerg 	/*
250c5f73b34Sjoerg 	 * These escapes are of the form \X'Y', where 'X' is the trigger
251c5f73b34Sjoerg 	 * and 'Y' is any string.  These have opaque sub-strings.
252fec65c98Schristos 	 * The \B and \w escapes are handled in roff.c, roff_res().
253c5f73b34Sjoerg 	 */
254fec65c98Schristos 	case 'A':
255fec65c98Schristos 	case 'b':
256fec65c98Schristos 	case 'D':
257fec65c98Schristos 	case 'R':
258fec65c98Schristos 	case 'X':
259fec65c98Schristos 	case 'Z':
260c5f73b34Sjoerg 		gly = ESCAPE_IGNORE;
261fec65c98Schristos 		/* FALLTHROUGH */
262fec65c98Schristos 	case 'o':
263fec65c98Schristos 		if (**start == '\0')
2649ff1f2acSchristos 			return ESCAPE_ERROR;
265fec65c98Schristos 		if (gly == ESCAPE_ERROR)
266fec65c98Schristos 			gly = ESCAPE_OVERSTRIKE;
267fec65c98Schristos 		term = **start;
26870f041f9Sjoerg 		*start = ++*end;
269c0d9444aSjoerg 		break;
270c5f73b34Sjoerg 
271c5f73b34Sjoerg 	/*
272c5f73b34Sjoerg 	 * These escapes are of the form \X'N', where 'X' is the trigger
273c5f73b34Sjoerg 	 * and 'N' resolves to a numerical expression.
274c5f73b34Sjoerg 	 */
275fec65c98Schristos 	case 'h':
276fec65c98Schristos 	case 'H':
277fec65c98Schristos 	case 'L':
278fec65c98Schristos 	case 'l':
279fec65c98Schristos 	case 'S':
280fec65c98Schristos 	case 'v':
281fec65c98Schristos 	case 'x':
282fec65c98Schristos 		if (strchr(" %&()*+-./0123456789:<=>", **start)) {
283fec65c98Schristos 			if ('\0' != **start)
284fec65c98Schristos 				++*end;
2859ff1f2acSchristos 			return ESCAPE_ERROR;
286fec65c98Schristos 		}
287c9bcef03Schristos 		switch ((*start)[-1]) {
288c9bcef03Schristos 		case 'h':
289c9bcef03Schristos 			gly = ESCAPE_HORIZ;
290c9bcef03Schristos 			break;
291c9bcef03Schristos 		case 'l':
292c9bcef03Schristos 			gly = ESCAPE_HLINE;
293c9bcef03Schristos 			break;
294c9bcef03Schristos 		default:
29570f041f9Sjoerg 			gly = ESCAPE_IGNORE;
296c9bcef03Schristos 			break;
297c9bcef03Schristos 		}
298fec65c98Schristos 		term = **start;
29970f041f9Sjoerg 		*start = ++*end;
300c5f73b34Sjoerg 		break;
301c5f73b34Sjoerg 
302c5f73b34Sjoerg 	/*
303cf816816Sjoerg 	 * Special handling for the numbered character escape.
304cf816816Sjoerg 	 * XXX Do any other escapes need similar handling?
305cf816816Sjoerg 	 */
306fec65c98Schristos 	case 'N':
30770f041f9Sjoerg 		if ('\0' == **start)
3089ff1f2acSchristos 			return ESCAPE_ERROR;
30970f041f9Sjoerg 		(*end)++;
31070f041f9Sjoerg 		if (isdigit((unsigned char)**start)) {
31170f041f9Sjoerg 			*sz = 1;
3129ff1f2acSchristos 			return ESCAPE_IGNORE;
31370f041f9Sjoerg 		}
31470f041f9Sjoerg 		(*start)++;
315cf816816Sjoerg 		while (isdigit((unsigned char)**end))
316cf816816Sjoerg 			(*end)++;
31770f041f9Sjoerg 		*sz = *end - *start;
318cf816816Sjoerg 		if ('\0' != **end)
319cf816816Sjoerg 			(*end)++;
3209ff1f2acSchristos 		return ESCAPE_NUMBERED;
321cf816816Sjoerg 
322cf816816Sjoerg 	/*
323c5f73b34Sjoerg 	 * Sizes get a special category of their own.
324c5f73b34Sjoerg 	 */
325fec65c98Schristos 	case 's':
326c5f73b34Sjoerg 		gly = ESCAPE_IGNORE;
327c5f73b34Sjoerg 
328c5f73b34Sjoerg 		/* See +/- counts as a sign. */
32970f041f9Sjoerg 		if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
330fec65c98Schristos 			*start = ++*end;
331c5f73b34Sjoerg 
33270f041f9Sjoerg 		switch (**end) {
333fec65c98Schristos 		case '(':
33470f041f9Sjoerg 			*start = ++*end;
33570f041f9Sjoerg 			*sz = 2;
336c5f73b34Sjoerg 			break;
337fec65c98Schristos 		case '[':
33870f041f9Sjoerg 			*start = ++*end;
33970f041f9Sjoerg 			term = ']';
340c5f73b34Sjoerg 			break;
341fec65c98Schristos 		case '\'':
34270f041f9Sjoerg 			*start = ++*end;
34370f041f9Sjoerg 			term = '\'';
344c5f73b34Sjoerg 			break;
345fec65c98Schristos 		case '3':
346fec65c98Schristos 		case '2':
347fec65c98Schristos 		case '1':
348fec65c98Schristos 			*sz = (*end)[-1] == 's' &&
349fec65c98Schristos 			    isdigit((unsigned char)(*end)[1]) ? 2 : 1;
350fec65c98Schristos 			break;
3517da9b934Sjoerg 		default:
35270f041f9Sjoerg 			*sz = 1;
3537da9b934Sjoerg 			break;
3544154958bSjoerg 		}
3554154958bSjoerg 
356c5f73b34Sjoerg 		break;
357c5f73b34Sjoerg 
358c5f73b34Sjoerg 	/*
359*544c191cSchristos 	 * Several special characters can be encoded as
360*544c191cSchristos 	 * one-byte escape sequences without using \[].
361c5f73b34Sjoerg 	 */
362*544c191cSchristos 	case ' ':
363*544c191cSchristos 	case '\'':
364*544c191cSchristos 	case '-':
365*544c191cSchristos 	case '.':
366*544c191cSchristos 	case '0':
367*544c191cSchristos 	case ':':
368*544c191cSchristos 	case '_':
369*544c191cSchristos 	case '`':
370*544c191cSchristos 	case 'e':
371*544c191cSchristos 	case '~':
372c5f73b34Sjoerg 		gly = ESCAPE_SPECIAL;
373*544c191cSchristos 		/* FALLTHROUGH */
374*544c191cSchristos 	default:
375*544c191cSchristos 		if (gly == ESCAPE_ERROR)
376*544c191cSchristos 			gly = ESCAPE_UNDEF;
37770f041f9Sjoerg 		*start = --*end;
37870f041f9Sjoerg 		*sz = 1;
379c5f73b34Sjoerg 		break;
3807da9b934Sjoerg 	}
3814154958bSjoerg 
382c5f73b34Sjoerg 	/*
38370f041f9Sjoerg 	 * Read up to the terminating character,
38470f041f9Sjoerg 	 * paying attention to nested escapes.
385c5f73b34Sjoerg 	 */
386c5f73b34Sjoerg 
387c5f73b34Sjoerg 	if ('\0' != term) {
38870f041f9Sjoerg 		while (**end != term) {
38970f041f9Sjoerg 			switch (**end) {
390fec65c98Schristos 			case '\0':
3919ff1f2acSchristos 				return ESCAPE_ERROR;
392fec65c98Schristos 			case '\\':
393c5f73b34Sjoerg 				(*end)++;
39470f041f9Sjoerg 				if (ESCAPE_ERROR ==
39570f041f9Sjoerg 				    mandoc_escape(end, NULL, NULL))
3969ff1f2acSchristos 					return ESCAPE_ERROR;
39770f041f9Sjoerg 				break;
39870f041f9Sjoerg 			default:
39970f041f9Sjoerg 				(*end)++;
40070f041f9Sjoerg 				break;
40170f041f9Sjoerg 			}
40270f041f9Sjoerg 		}
40370f041f9Sjoerg 		*sz = (*end)++ - *start;
404*544c191cSchristos 
405*544c191cSchristos 		/*
406*544c191cSchristos 		 * The file chars.c only provides one common list
407*544c191cSchristos 		 * of character names, but \[-] == \- is the only
408*544c191cSchristos 		 * one of the characters with one-byte names that
409*544c191cSchristos 		 * allows enclosing the name in brackets.
410*544c191cSchristos 		 */
411*544c191cSchristos 		if (gly == ESCAPE_SPECIAL && *sz == 1 && **start != '-')
412*544c191cSchristos 			return ESCAPE_ERROR;
41370f041f9Sjoerg 	} else {
41470f041f9Sjoerg 		assert(*sz > 0);
41570f041f9Sjoerg 		if ((size_t)*sz > strlen(*start))
4169ff1f2acSchristos 			return ESCAPE_ERROR;
41770f041f9Sjoerg 		*end += *sz;
41870f041f9Sjoerg 	}
419c5f73b34Sjoerg 
420c5f73b34Sjoerg 	/* Run post-processors. */
421c5f73b34Sjoerg 
422c5f73b34Sjoerg 	switch (gly) {
423fec65c98Schristos 	case ESCAPE_FONT:
424*544c191cSchristos 		gly = mandoc_font(*start, *sz);
425c5f73b34Sjoerg 		break;
426fec65c98Schristos 	case ESCAPE_SPECIAL:
427*544c191cSchristos 		if (**start == 'c') {
428*544c191cSchristos 			if (*sz < 6 || *sz > 7 ||
429*544c191cSchristos 			    strncmp(*start, "char", 4) != 0 ||
430*544c191cSchristos 			    (int)strspn(*start + 4, "0123456789") + 4 < *sz)
431*544c191cSchristos 				break;
432*544c191cSchristos 			c = 0;
433*544c191cSchristos 			for (i = 4; i < *sz; i++)
434*544c191cSchristos 				c = 10 * c + ((*start)[i] - '0');
435*544c191cSchristos 			if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff)
436*544c191cSchristos 				break;
437*544c191cSchristos 			*start += 4;
438*544c191cSchristos 			*sz -= 4;
439*544c191cSchristos 			gly = ESCAPE_NUMBERED;
440*544c191cSchristos 			break;
441*544c191cSchristos 		}
442*544c191cSchristos 
443fec65c98Schristos 		/*
444fec65c98Schristos 		 * Unicode escapes are defined in groff as \[u0000]
445fec65c98Schristos 		 * to \[u10FFFF], where the contained value must be
446fec65c98Schristos 		 * a valid Unicode codepoint.  Here, however, only
447fec65c98Schristos 		 * check the length and range.
448fec65c98Schristos 		 */
449fec65c98Schristos 		if (**start != 'u' || *sz < 5 || *sz > 7)
450fec65c98Schristos 			break;
451fec65c98Schristos 		if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0'))
452fec65c98Schristos 			break;
453fec65c98Schristos 		if (*sz == 6 && (*start)[1] == '0')
454fec65c98Schristos 			break;
4559ff1f2acSchristos 		if (*sz == 5 && (*start)[1] == 'D' &&
4569ff1f2acSchristos 		    strchr("89ABCDEF", (*start)[2]) != NULL)
4579ff1f2acSchristos 			break;
458fec65c98Schristos 		if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef")
459fec65c98Schristos 		    + 1 == *sz)
460fec65c98Schristos 			gly = ESCAPE_UNICODE;
461c5f73b34Sjoerg 		break;
462c5f73b34Sjoerg 	default:
463c5f73b34Sjoerg 		break;
464c5f73b34Sjoerg 	}
465c5f73b34Sjoerg 
4669ff1f2acSchristos 	return gly;
467c5f73b34Sjoerg }
4683514411fSjoerg 
4693514411fSjoerg static int
a2time(time_t * t,const char * fmt,const char * p)4703514411fSjoerg a2time(time_t *t, const char *fmt, const char *p)
4713514411fSjoerg {
4723514411fSjoerg 	struct tm	 tm;
4733514411fSjoerg 	char		*pp;
4743514411fSjoerg 
4753514411fSjoerg 	memset(&tm, 0, sizeof(struct tm));
4763514411fSjoerg 
477c5f73b34Sjoerg 	pp = NULL;
478fec65c98Schristos #if HAVE_STRPTIME
4793514411fSjoerg 	pp = strptime(p, fmt, &tm);
480c5f73b34Sjoerg #endif
4813514411fSjoerg 	if (NULL != pp && '\0' == *pp) {
4823514411fSjoerg 		*t = mktime(&tm);
4839ff1f2acSchristos 		return 1;
4843514411fSjoerg 	}
4853514411fSjoerg 
4869ff1f2acSchristos 	return 0;
4873514411fSjoerg }
4883514411fSjoerg 
48948741257Sjoerg static char *
time2a(time_t t)49048741257Sjoerg time2a(time_t t)
49148741257Sjoerg {
492c5f73b34Sjoerg 	struct tm	*tm;
49348741257Sjoerg 	char		*buf, *p;
49448741257Sjoerg 	size_t		 ssz;
49548741257Sjoerg 	int		 isz;
49648741257Sjoerg 
497c5f73b34Sjoerg 	tm = localtime(&t);
498fec65c98Schristos 	if (tm == NULL)
4999ff1f2acSchristos 		return NULL;
5003514411fSjoerg 
5013514411fSjoerg 	/*
50248741257Sjoerg 	 * Reserve space:
50348741257Sjoerg 	 * up to 9 characters for the month (September) + blank
50448741257Sjoerg 	 * up to 2 characters for the day + comma + blank
50548741257Sjoerg 	 * 4 characters for the year and a terminating '\0'
5063514411fSjoerg 	 */
5079ff1f2acSchristos 
50848741257Sjoerg 	p = buf = mandoc_malloc(10 + 4 + 4 + 1);
50948741257Sjoerg 
5109ff1f2acSchristos 	if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0)
51148741257Sjoerg 		goto fail;
51248741257Sjoerg 	p += (int)ssz;
51348741257Sjoerg 
5149ff1f2acSchristos 	/*
5159ff1f2acSchristos 	 * The output format is just "%d" here, not "%2d" or "%02d".
5169ff1f2acSchristos 	 * That's also the reason why we can't just format the
5179ff1f2acSchristos 	 * date as a whole with "%B %e, %Y" or "%B %d, %Y".
5189ff1f2acSchristos 	 * Besides, the present approach is less prone to buffer
5199ff1f2acSchristos 	 * overflows, in case anybody should ever introduce the bug
5209ff1f2acSchristos 	 * of looking at LC_TIME.
5219ff1f2acSchristos 	 */
5229ff1f2acSchristos 
5239ff1f2acSchristos 	if ((isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)) == -1)
52448741257Sjoerg 		goto fail;
52548741257Sjoerg 	p += isz;
52648741257Sjoerg 
5279ff1f2acSchristos 	if (strftime(p, 4 + 1, "%Y", tm) == 0)
52848741257Sjoerg 		goto fail;
5299ff1f2acSchristos 	return buf;
53048741257Sjoerg 
53148741257Sjoerg fail:
53248741257Sjoerg 	free(buf);
5339ff1f2acSchristos 	return NULL;
53448741257Sjoerg }
53548741257Sjoerg 
53648741257Sjoerg char *
mandoc_normdate(struct roff_man * man,char * in,int ln,int pos)537c9bcef03Schristos mandoc_normdate(struct roff_man *man, char *in, int ln, int pos)
5383514411fSjoerg {
539c9bcef03Schristos 	char		*cp;
5403514411fSjoerg 	time_t		 t;
5413514411fSjoerg 
5429ff1f2acSchristos 	/* No date specified: use today's date. */
5439ff1f2acSchristos 
5449ff1f2acSchristos 	if (in == NULL || *in == '\0' || strcmp(in, "$" "Mdocdate$") == 0) {
545*544c191cSchristos 		mandoc_msg(MANDOCERR_DATE_MISSING, ln, pos, NULL);
5469ff1f2acSchristos 		return time2a(time(NULL));
5473514411fSjoerg 	}
5489ff1f2acSchristos 
5499ff1f2acSchristos 	/* Valid mdoc(7) date format. */
5509ff1f2acSchristos 
5519ff1f2acSchristos 	if (a2time(&t, "$" "Mdocdate: %b %d %Y $", in) ||
552c9bcef03Schristos 	    a2time(&t, "%b %d, %Y", in)) {
553c9bcef03Schristos 		cp = time2a(t);
554c9bcef03Schristos 		if (t > time(NULL) + 86400)
555*544c191cSchristos 			mandoc_msg(MANDOCERR_DATE_FUTURE, ln, pos, "%s", cp);
556c9bcef03Schristos 		else if (*in != '$' && strcmp(in, cp) != 0)
557*544c191cSchristos 			mandoc_msg(MANDOCERR_DATE_NORM, ln, pos, "%s", cp);
558c9bcef03Schristos 		return cp;
559c9bcef03Schristos 	}
5609ff1f2acSchristos 
561c9bcef03Schristos 	/* In man(7), do not warn about the legacy format. */
5629ff1f2acSchristos 
563c9bcef03Schristos 	if (a2time(&t, "%Y-%m-%d", in) == 0)
564*544c191cSchristos 		mandoc_msg(MANDOCERR_DATE_BAD, ln, pos, "%s", in);
565c9bcef03Schristos 	else if (t > time(NULL) + 86400)
566*544c191cSchristos 		mandoc_msg(MANDOCERR_DATE_FUTURE, ln, pos, "%s", in);
567*544c191cSchristos 	else if (man->meta.macroset == MACROSET_MDOC)
568*544c191cSchristos 		mandoc_msg(MANDOCERR_DATE_LEGACY, ln, pos, "Dd %s", in);
5699ff1f2acSchristos 
5709ff1f2acSchristos 	/* Use any non-mdoc(7) date verbatim. */
5719ff1f2acSchristos 
5729ff1f2acSchristos 	return mandoc_strdup(in);
5733514411fSjoerg }
5743514411fSjoerg 
5750a84adc5Sjoerg int
mandoc_eos(const char * p,size_t sz)57670f041f9Sjoerg mandoc_eos(const char *p, size_t sz)
5770a84adc5Sjoerg {
5787da9b934Sjoerg 	const char	*q;
57970f041f9Sjoerg 	int		 enclosed, found;
5800a84adc5Sjoerg 
5810a84adc5Sjoerg 	if (0 == sz)
5829ff1f2acSchristos 		return 0;
5830a84adc5Sjoerg 
5840a84adc5Sjoerg 	/*
5850a84adc5Sjoerg 	 * End-of-sentence recognition must include situations where
5860a84adc5Sjoerg 	 * some symbols, such as `)', allow prior EOS punctuation to
587c5f73b34Sjoerg 	 * propagate outward.
5880a84adc5Sjoerg 	 */
5890a84adc5Sjoerg 
59070f041f9Sjoerg 	enclosed = found = 0;
5917da9b934Sjoerg 	for (q = p + (int)sz - 1; q >= p; q--) {
5927da9b934Sjoerg 		switch (*q) {
593fec65c98Schristos 		case '\"':
594fec65c98Schristos 		case '\'':
595fec65c98Schristos 		case ']':
596fec65c98Schristos 		case ')':
5977da9b934Sjoerg 			if (0 == found)
5987da9b934Sjoerg 				enclosed = 1;
5990a84adc5Sjoerg 			break;
600fec65c98Schristos 		case '.':
601fec65c98Schristos 		case '!':
602fec65c98Schristos 		case '?':
6037da9b934Sjoerg 			found = 1;
6047da9b934Sjoerg 			break;
6050a84adc5Sjoerg 		default:
6069ff1f2acSchristos 			return found &&
6079ff1f2acSchristos 			    (!enclosed || isalnum((unsigned char)*q));
6080a84adc5Sjoerg 		}
6090a84adc5Sjoerg 	}
6100a84adc5Sjoerg 
6119ff1f2acSchristos 	return found && !enclosed;
6120a84adc5Sjoerg }
6130a84adc5Sjoerg 
61448741257Sjoerg /*
615c5f73b34Sjoerg  * Convert a string to a long that may not be <0.
616c5f73b34Sjoerg  * If the string is invalid, or is less than 0, return -1.
617c5f73b34Sjoerg  */
618c5f73b34Sjoerg int
mandoc_strntoi(const char * p,size_t sz,int base)619c5f73b34Sjoerg mandoc_strntoi(const char *p, size_t sz, int base)
620c5f73b34Sjoerg {
621c5f73b34Sjoerg 	char		 buf[32];
622c5f73b34Sjoerg 	char		*ep;
623c5f73b34Sjoerg 	long		 v;
624c5f73b34Sjoerg 
625c5f73b34Sjoerg 	if (sz > 31)
6269ff1f2acSchristos 		return -1;
627c5f73b34Sjoerg 
628c5f73b34Sjoerg 	memcpy(buf, p, sz);
629c5f73b34Sjoerg 	buf[(int)sz] = '\0';
630c5f73b34Sjoerg 
631c5f73b34Sjoerg 	errno = 0;
632c5f73b34Sjoerg 	v = strtol(buf, &ep, base);
633c5f73b34Sjoerg 
634c5f73b34Sjoerg 	if (buf[0] == '\0' || *ep != '\0')
6359ff1f2acSchristos 		return -1;
636c5f73b34Sjoerg 
637c5f73b34Sjoerg 	if (v > INT_MAX)
638c5f73b34Sjoerg 		v = INT_MAX;
639c5f73b34Sjoerg 	if (v < INT_MIN)
640c5f73b34Sjoerg 		v = INT_MIN;
641c5f73b34Sjoerg 
6429ff1f2acSchristos 	return (int)v;
643c5f73b34Sjoerg }
644