1*544c191cSchristos /* Id: mandoc.c,v 1.114 2018/12/30 00:49:55 schwarze Exp */
24154958bSjoerg /*
3fec65c98Schristos * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4c9bcef03Schristos * Copyright (c) 2011-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
54154958bSjoerg *
64154958bSjoerg * Permission to use, copy, modify, and distribute this software for any
74154958bSjoerg * purpose with or without fee is hereby granted, provided that the above
84154958bSjoerg * copyright notice and this permission notice appear in all copies.
94154958bSjoerg *
10c0d9444aSjoerg * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
114154958bSjoerg * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12c0d9444aSjoerg * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
134154958bSjoerg * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
144154958bSjoerg * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
154154958bSjoerg * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
164154958bSjoerg * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
174154958bSjoerg */
18d5e63c8dSjoerg #include "config.h"
193514411fSjoerg
204154958bSjoerg #include <sys/types.h>
214154958bSjoerg
224154958bSjoerg #include <assert.h>
234154958bSjoerg #include <ctype.h>
24c5f73b34Sjoerg #include <errno.h>
25c5f73b34Sjoerg #include <limits.h>
264154958bSjoerg #include <stdlib.h>
273514411fSjoerg #include <stdio.h>
283514411fSjoerg #include <string.h>
293514411fSjoerg #include <time.h>
304154958bSjoerg
31fec65c98Schristos #include "mandoc_aux.h"
32c9bcef03Schristos #include "mandoc.h"
33c9bcef03Schristos #include "roff.h"
344154958bSjoerg #include "libmandoc.h"
35*544c191cSchristos #include "roff_int.h"
364154958bSjoerg
3748741257Sjoerg static int a2time(time_t *, const char *, const char *);
3848741257Sjoerg static char *time2a(time_t);
393514411fSjoerg
40c5f73b34Sjoerg
41c5f73b34Sjoerg enum mandoc_esc
mandoc_font(const char * cp,int sz)42*544c191cSchristos mandoc_font(const char *cp, int sz)
43*544c191cSchristos {
44*544c191cSchristos switch (sz) {
45*544c191cSchristos case 0:
46*544c191cSchristos return ESCAPE_FONTPREV;
47*544c191cSchristos case 1:
48*544c191cSchristos switch (cp[0]) {
49*544c191cSchristos case 'B':
50*544c191cSchristos case '3':
51*544c191cSchristos return ESCAPE_FONTBOLD;
52*544c191cSchristos case 'I':
53*544c191cSchristos case '2':
54*544c191cSchristos return ESCAPE_FONTITALIC;
55*544c191cSchristos case 'P':
56*544c191cSchristos return ESCAPE_FONTPREV;
57*544c191cSchristos case 'R':
58*544c191cSchristos case '1':
59*544c191cSchristos return ESCAPE_FONTROMAN;
60*544c191cSchristos case '4':
61*544c191cSchristos return ESCAPE_FONTBI;
62*544c191cSchristos default:
63*544c191cSchristos return ESCAPE_ERROR;
64*544c191cSchristos }
65*544c191cSchristos case 2:
66*544c191cSchristos switch (cp[0]) {
67*544c191cSchristos case 'B':
68*544c191cSchristos switch (cp[1]) {
69*544c191cSchristos case 'I':
70*544c191cSchristos return ESCAPE_FONTBI;
71*544c191cSchristos default:
72*544c191cSchristos return ESCAPE_ERROR;
73*544c191cSchristos }
74*544c191cSchristos case 'C':
75*544c191cSchristos switch (cp[1]) {
76*544c191cSchristos case 'B':
77*544c191cSchristos return ESCAPE_FONTBOLD;
78*544c191cSchristos case 'I':
79*544c191cSchristos return ESCAPE_FONTITALIC;
80*544c191cSchristos case 'R':
81*544c191cSchristos case 'W':
82*544c191cSchristos return ESCAPE_FONTCW;
83*544c191cSchristos default:
84*544c191cSchristos return ESCAPE_ERROR;
85*544c191cSchristos }
86*544c191cSchristos default:
87*544c191cSchristos return ESCAPE_ERROR;
88*544c191cSchristos }
89*544c191cSchristos default:
90*544c191cSchristos return ESCAPE_ERROR;
91*544c191cSchristos }
92*544c191cSchristos }
93*544c191cSchristos
94*544c191cSchristos enum mandoc_esc
mandoc_escape(const char ** end,const char ** start,int * sz)95c5f73b34Sjoerg mandoc_escape(const char **end, const char **start, int *sz)
96c5f73b34Sjoerg {
9770f041f9Sjoerg const char *local_start;
98*544c191cSchristos int local_sz, c, i;
9970f041f9Sjoerg char term;
100c5f73b34Sjoerg enum mandoc_esc gly;
101c5f73b34Sjoerg
10270f041f9Sjoerg /*
10370f041f9Sjoerg * When the caller doesn't provide return storage,
10470f041f9Sjoerg * use local storage.
10570f041f9Sjoerg */
106c5f73b34Sjoerg
10770f041f9Sjoerg if (NULL == start)
10870f041f9Sjoerg start = &local_start;
10970f041f9Sjoerg if (NULL == sz)
11070f041f9Sjoerg sz = &local_sz;
11170f041f9Sjoerg
11270f041f9Sjoerg /*
113*544c191cSchristos * Treat "\E" just like "\";
114*544c191cSchristos * it only makes a difference in copy mode.
115*544c191cSchristos */
116*544c191cSchristos
117*544c191cSchristos if (**end == 'E')
118*544c191cSchristos ++*end;
119*544c191cSchristos
120*544c191cSchristos /*
12170f041f9Sjoerg * Beyond the backslash, at least one input character
12270f041f9Sjoerg * is part of the escape sequence. With one exception
12370f041f9Sjoerg * (see below), that character won't be returned.
12470f041f9Sjoerg */
12570f041f9Sjoerg
12670f041f9Sjoerg gly = ESCAPE_ERROR;
12770f041f9Sjoerg *start = ++*end;
12870f041f9Sjoerg *sz = 0;
12970f041f9Sjoerg term = '\0';
13070f041f9Sjoerg
13170f041f9Sjoerg switch ((*start)[-1]) {
132c5f73b34Sjoerg /*
133c5f73b34Sjoerg * First the glyphs. There are several different forms of
134c5f73b34Sjoerg * these, but each eventually returns a substring of the glyph
135c5f73b34Sjoerg * name.
136c5f73b34Sjoerg */
137fec65c98Schristos case '(':
138c5f73b34Sjoerg gly = ESCAPE_SPECIAL;
13970f041f9Sjoerg *sz = 2;
1407da9b934Sjoerg break;
141fec65c98Schristos case '[':
142*544c191cSchristos if (**start == ' ') {
143*544c191cSchristos ++*end;
144*544c191cSchristos return ESCAPE_ERROR;
145*544c191cSchristos }
146c5f73b34Sjoerg gly = ESCAPE_SPECIAL;
1477da9b934Sjoerg term = ']';
1487da9b934Sjoerg break;
149fec65c98Schristos case 'C':
15070f041f9Sjoerg if ('\'' != **start)
1519ff1f2acSchristos return ESCAPE_ERROR;
15270f041f9Sjoerg *start = ++*end;
153c5f73b34Sjoerg gly = ESCAPE_SPECIAL;
1547da9b934Sjoerg term = '\'';
1557da9b934Sjoerg break;
156c5f73b34Sjoerg
157c5f73b34Sjoerg /*
15870f041f9Sjoerg * Escapes taking no arguments at all.
15970f041f9Sjoerg */
160*544c191cSchristos case '!':
161*544c191cSchristos case '?':
162*544c191cSchristos return ESCAPE_UNSUPP;
163*544c191cSchristos case '%':
164*544c191cSchristos case '&':
165*544c191cSchristos case ')':
1669ff1f2acSchristos case ',':
1679ff1f2acSchristos case '/':
168*544c191cSchristos case '^':
169*544c191cSchristos case 'a':
170*544c191cSchristos case 'd':
171*544c191cSchristos case 'r':
172*544c191cSchristos case 't':
173*544c191cSchristos case 'u':
174*544c191cSchristos case '{':
175*544c191cSchristos case '|':
176*544c191cSchristos case '}':
1779ff1f2acSchristos return ESCAPE_IGNORE;
178*544c191cSchristos case 'c':
179*544c191cSchristos return ESCAPE_NOSPACE;
180c9bcef03Schristos case 'p':
181c9bcef03Schristos return ESCAPE_BREAK;
18270f041f9Sjoerg
18370f041f9Sjoerg /*
18470f041f9Sjoerg * The \z escape is supposed to output the following
18570f041f9Sjoerg * character without advancing the cursor position.
18670f041f9Sjoerg * Since we are mostly dealing with terminal mode,
18770f041f9Sjoerg * let us just skip the next character.
18870f041f9Sjoerg */
189fec65c98Schristos case 'z':
1909ff1f2acSchristos return ESCAPE_SKIPCHAR;
19170f041f9Sjoerg
19270f041f9Sjoerg /*
193c5f73b34Sjoerg * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
194c5f73b34Sjoerg * 'X' is the trigger. These have opaque sub-strings.
195c5f73b34Sjoerg */
196fec65c98Schristos case 'F':
197*544c191cSchristos case 'f':
198fec65c98Schristos case 'g':
199fec65c98Schristos case 'k':
200fec65c98Schristos case 'M':
201fec65c98Schristos case 'm':
202fec65c98Schristos case 'n':
203*544c191cSchristos case 'O':
204fec65c98Schristos case 'V':
205fec65c98Schristos case 'Y':
206*544c191cSchristos gly = (*start)[-1] == 'f' ? ESCAPE_FONT : ESCAPE_IGNORE;
20770f041f9Sjoerg switch (**start) {
208fec65c98Schristos case '(':
209*544c191cSchristos if ((*start)[-1] == 'O')
210*544c191cSchristos gly = ESCAPE_ERROR;
21170f041f9Sjoerg *start = ++*end;
21270f041f9Sjoerg *sz = 2;
2134154958bSjoerg break;
214fec65c98Schristos case '[':
215*544c191cSchristos if ((*start)[-1] == 'O')
216*544c191cSchristos gly = (*start)[1] == '5' ?
217*544c191cSchristos ESCAPE_UNSUPP : ESCAPE_ERROR;
21870f041f9Sjoerg *start = ++*end;
2197da9b934Sjoerg term = ']';
2204154958bSjoerg break;
2214154958bSjoerg default:
222*544c191cSchristos if ((*start)[-1] == 'O') {
223*544c191cSchristos switch (**start) {
224*544c191cSchristos case '0':
225*544c191cSchristos gly = ESCAPE_UNSUPP;
226*544c191cSchristos break;
227*544c191cSchristos case '1':
228*544c191cSchristos case '2':
229*544c191cSchristos case '3':
230*544c191cSchristos case '4':
231*544c191cSchristos break;
232*544c191cSchristos default:
233*544c191cSchristos gly = ESCAPE_ERROR;
234*544c191cSchristos break;
235*544c191cSchristos }
236*544c191cSchristos }
23770f041f9Sjoerg *sz = 1;
2387da9b934Sjoerg break;
2397da9b934Sjoerg }
2407da9b934Sjoerg break;
241*544c191cSchristos case '*':
242*544c191cSchristos if (strncmp(*start, "(.T", 3) != 0)
243*544c191cSchristos abort();
244*544c191cSchristos gly = ESCAPE_DEVICE;
245*544c191cSchristos *start = ++*end;
246*544c191cSchristos *sz = 2;
247*544c191cSchristos break;
248c5f73b34Sjoerg
249c5f73b34Sjoerg /*
250c5f73b34Sjoerg * These escapes are of the form \X'Y', where 'X' is the trigger
251c5f73b34Sjoerg * and 'Y' is any string. These have opaque sub-strings.
252fec65c98Schristos * The \B and \w escapes are handled in roff.c, roff_res().
253c5f73b34Sjoerg */
254fec65c98Schristos case 'A':
255fec65c98Schristos case 'b':
256fec65c98Schristos case 'D':
257fec65c98Schristos case 'R':
258fec65c98Schristos case 'X':
259fec65c98Schristos case 'Z':
260c5f73b34Sjoerg gly = ESCAPE_IGNORE;
261fec65c98Schristos /* FALLTHROUGH */
262fec65c98Schristos case 'o':
263fec65c98Schristos if (**start == '\0')
2649ff1f2acSchristos return ESCAPE_ERROR;
265fec65c98Schristos if (gly == ESCAPE_ERROR)
266fec65c98Schristos gly = ESCAPE_OVERSTRIKE;
267fec65c98Schristos term = **start;
26870f041f9Sjoerg *start = ++*end;
269c0d9444aSjoerg break;
270c5f73b34Sjoerg
271c5f73b34Sjoerg /*
272c5f73b34Sjoerg * These escapes are of the form \X'N', where 'X' is the trigger
273c5f73b34Sjoerg * and 'N' resolves to a numerical expression.
274c5f73b34Sjoerg */
275fec65c98Schristos case 'h':
276fec65c98Schristos case 'H':
277fec65c98Schristos case 'L':
278fec65c98Schristos case 'l':
279fec65c98Schristos case 'S':
280fec65c98Schristos case 'v':
281fec65c98Schristos case 'x':
282fec65c98Schristos if (strchr(" %&()*+-./0123456789:<=>", **start)) {
283fec65c98Schristos if ('\0' != **start)
284fec65c98Schristos ++*end;
2859ff1f2acSchristos return ESCAPE_ERROR;
286fec65c98Schristos }
287c9bcef03Schristos switch ((*start)[-1]) {
288c9bcef03Schristos case 'h':
289c9bcef03Schristos gly = ESCAPE_HORIZ;
290c9bcef03Schristos break;
291c9bcef03Schristos case 'l':
292c9bcef03Schristos gly = ESCAPE_HLINE;
293c9bcef03Schristos break;
294c9bcef03Schristos default:
29570f041f9Sjoerg gly = ESCAPE_IGNORE;
296c9bcef03Schristos break;
297c9bcef03Schristos }
298fec65c98Schristos term = **start;
29970f041f9Sjoerg *start = ++*end;
300c5f73b34Sjoerg break;
301c5f73b34Sjoerg
302c5f73b34Sjoerg /*
303cf816816Sjoerg * Special handling for the numbered character escape.
304cf816816Sjoerg * XXX Do any other escapes need similar handling?
305cf816816Sjoerg */
306fec65c98Schristos case 'N':
30770f041f9Sjoerg if ('\0' == **start)
3089ff1f2acSchristos return ESCAPE_ERROR;
30970f041f9Sjoerg (*end)++;
31070f041f9Sjoerg if (isdigit((unsigned char)**start)) {
31170f041f9Sjoerg *sz = 1;
3129ff1f2acSchristos return ESCAPE_IGNORE;
31370f041f9Sjoerg }
31470f041f9Sjoerg (*start)++;
315cf816816Sjoerg while (isdigit((unsigned char)**end))
316cf816816Sjoerg (*end)++;
31770f041f9Sjoerg *sz = *end - *start;
318cf816816Sjoerg if ('\0' != **end)
319cf816816Sjoerg (*end)++;
3209ff1f2acSchristos return ESCAPE_NUMBERED;
321cf816816Sjoerg
322cf816816Sjoerg /*
323c5f73b34Sjoerg * Sizes get a special category of their own.
324c5f73b34Sjoerg */
325fec65c98Schristos case 's':
326c5f73b34Sjoerg gly = ESCAPE_IGNORE;
327c5f73b34Sjoerg
328c5f73b34Sjoerg /* See +/- counts as a sign. */
32970f041f9Sjoerg if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
330fec65c98Schristos *start = ++*end;
331c5f73b34Sjoerg
33270f041f9Sjoerg switch (**end) {
333fec65c98Schristos case '(':
33470f041f9Sjoerg *start = ++*end;
33570f041f9Sjoerg *sz = 2;
336c5f73b34Sjoerg break;
337fec65c98Schristos case '[':
33870f041f9Sjoerg *start = ++*end;
33970f041f9Sjoerg term = ']';
340c5f73b34Sjoerg break;
341fec65c98Schristos case '\'':
34270f041f9Sjoerg *start = ++*end;
34370f041f9Sjoerg term = '\'';
344c5f73b34Sjoerg break;
345fec65c98Schristos case '3':
346fec65c98Schristos case '2':
347fec65c98Schristos case '1':
348fec65c98Schristos *sz = (*end)[-1] == 's' &&
349fec65c98Schristos isdigit((unsigned char)(*end)[1]) ? 2 : 1;
350fec65c98Schristos break;
3517da9b934Sjoerg default:
35270f041f9Sjoerg *sz = 1;
3537da9b934Sjoerg break;
3544154958bSjoerg }
3554154958bSjoerg
356c5f73b34Sjoerg break;
357c5f73b34Sjoerg
358c5f73b34Sjoerg /*
359*544c191cSchristos * Several special characters can be encoded as
360*544c191cSchristos * one-byte escape sequences without using \[].
361c5f73b34Sjoerg */
362*544c191cSchristos case ' ':
363*544c191cSchristos case '\'':
364*544c191cSchristos case '-':
365*544c191cSchristos case '.':
366*544c191cSchristos case '0':
367*544c191cSchristos case ':':
368*544c191cSchristos case '_':
369*544c191cSchristos case '`':
370*544c191cSchristos case 'e':
371*544c191cSchristos case '~':
372c5f73b34Sjoerg gly = ESCAPE_SPECIAL;
373*544c191cSchristos /* FALLTHROUGH */
374*544c191cSchristos default:
375*544c191cSchristos if (gly == ESCAPE_ERROR)
376*544c191cSchristos gly = ESCAPE_UNDEF;
37770f041f9Sjoerg *start = --*end;
37870f041f9Sjoerg *sz = 1;
379c5f73b34Sjoerg break;
3807da9b934Sjoerg }
3814154958bSjoerg
382c5f73b34Sjoerg /*
38370f041f9Sjoerg * Read up to the terminating character,
38470f041f9Sjoerg * paying attention to nested escapes.
385c5f73b34Sjoerg */
386c5f73b34Sjoerg
387c5f73b34Sjoerg if ('\0' != term) {
38870f041f9Sjoerg while (**end != term) {
38970f041f9Sjoerg switch (**end) {
390fec65c98Schristos case '\0':
3919ff1f2acSchristos return ESCAPE_ERROR;
392fec65c98Schristos case '\\':
393c5f73b34Sjoerg (*end)++;
39470f041f9Sjoerg if (ESCAPE_ERROR ==
39570f041f9Sjoerg mandoc_escape(end, NULL, NULL))
3969ff1f2acSchristos return ESCAPE_ERROR;
39770f041f9Sjoerg break;
39870f041f9Sjoerg default:
39970f041f9Sjoerg (*end)++;
40070f041f9Sjoerg break;
40170f041f9Sjoerg }
40270f041f9Sjoerg }
40370f041f9Sjoerg *sz = (*end)++ - *start;
404*544c191cSchristos
405*544c191cSchristos /*
406*544c191cSchristos * The file chars.c only provides one common list
407*544c191cSchristos * of character names, but \[-] == \- is the only
408*544c191cSchristos * one of the characters with one-byte names that
409*544c191cSchristos * allows enclosing the name in brackets.
410*544c191cSchristos */
411*544c191cSchristos if (gly == ESCAPE_SPECIAL && *sz == 1 && **start != '-')
412*544c191cSchristos return ESCAPE_ERROR;
41370f041f9Sjoerg } else {
41470f041f9Sjoerg assert(*sz > 0);
41570f041f9Sjoerg if ((size_t)*sz > strlen(*start))
4169ff1f2acSchristos return ESCAPE_ERROR;
41770f041f9Sjoerg *end += *sz;
41870f041f9Sjoerg }
419c5f73b34Sjoerg
420c5f73b34Sjoerg /* Run post-processors. */
421c5f73b34Sjoerg
422c5f73b34Sjoerg switch (gly) {
423fec65c98Schristos case ESCAPE_FONT:
424*544c191cSchristos gly = mandoc_font(*start, *sz);
425c5f73b34Sjoerg break;
426fec65c98Schristos case ESCAPE_SPECIAL:
427*544c191cSchristos if (**start == 'c') {
428*544c191cSchristos if (*sz < 6 || *sz > 7 ||
429*544c191cSchristos strncmp(*start, "char", 4) != 0 ||
430*544c191cSchristos (int)strspn(*start + 4, "0123456789") + 4 < *sz)
431*544c191cSchristos break;
432*544c191cSchristos c = 0;
433*544c191cSchristos for (i = 4; i < *sz; i++)
434*544c191cSchristos c = 10 * c + ((*start)[i] - '0');
435*544c191cSchristos if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff)
436*544c191cSchristos break;
437*544c191cSchristos *start += 4;
438*544c191cSchristos *sz -= 4;
439*544c191cSchristos gly = ESCAPE_NUMBERED;
440*544c191cSchristos break;
441*544c191cSchristos }
442*544c191cSchristos
443fec65c98Schristos /*
444fec65c98Schristos * Unicode escapes are defined in groff as \[u0000]
445fec65c98Schristos * to \[u10FFFF], where the contained value must be
446fec65c98Schristos * a valid Unicode codepoint. Here, however, only
447fec65c98Schristos * check the length and range.
448fec65c98Schristos */
449fec65c98Schristos if (**start != 'u' || *sz < 5 || *sz > 7)
450fec65c98Schristos break;
451fec65c98Schristos if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0'))
452fec65c98Schristos break;
453fec65c98Schristos if (*sz == 6 && (*start)[1] == '0')
454fec65c98Schristos break;
4559ff1f2acSchristos if (*sz == 5 && (*start)[1] == 'D' &&
4569ff1f2acSchristos strchr("89ABCDEF", (*start)[2]) != NULL)
4579ff1f2acSchristos break;
458fec65c98Schristos if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef")
459fec65c98Schristos + 1 == *sz)
460fec65c98Schristos gly = ESCAPE_UNICODE;
461c5f73b34Sjoerg break;
462c5f73b34Sjoerg default:
463c5f73b34Sjoerg break;
464c5f73b34Sjoerg }
465c5f73b34Sjoerg
4669ff1f2acSchristos return gly;
467c5f73b34Sjoerg }
4683514411fSjoerg
4693514411fSjoerg static int
a2time(time_t * t,const char * fmt,const char * p)4703514411fSjoerg a2time(time_t *t, const char *fmt, const char *p)
4713514411fSjoerg {
4723514411fSjoerg struct tm tm;
4733514411fSjoerg char *pp;
4743514411fSjoerg
4753514411fSjoerg memset(&tm, 0, sizeof(struct tm));
4763514411fSjoerg
477c5f73b34Sjoerg pp = NULL;
478fec65c98Schristos #if HAVE_STRPTIME
4793514411fSjoerg pp = strptime(p, fmt, &tm);
480c5f73b34Sjoerg #endif
4813514411fSjoerg if (NULL != pp && '\0' == *pp) {
4823514411fSjoerg *t = mktime(&tm);
4839ff1f2acSchristos return 1;
4843514411fSjoerg }
4853514411fSjoerg
4869ff1f2acSchristos return 0;
4873514411fSjoerg }
4883514411fSjoerg
48948741257Sjoerg static char *
time2a(time_t t)49048741257Sjoerg time2a(time_t t)
49148741257Sjoerg {
492c5f73b34Sjoerg struct tm *tm;
49348741257Sjoerg char *buf, *p;
49448741257Sjoerg size_t ssz;
49548741257Sjoerg int isz;
49648741257Sjoerg
497c5f73b34Sjoerg tm = localtime(&t);
498fec65c98Schristos if (tm == NULL)
4999ff1f2acSchristos return NULL;
5003514411fSjoerg
5013514411fSjoerg /*
50248741257Sjoerg * Reserve space:
50348741257Sjoerg * up to 9 characters for the month (September) + blank
50448741257Sjoerg * up to 2 characters for the day + comma + blank
50548741257Sjoerg * 4 characters for the year and a terminating '\0'
5063514411fSjoerg */
5079ff1f2acSchristos
50848741257Sjoerg p = buf = mandoc_malloc(10 + 4 + 4 + 1);
50948741257Sjoerg
5109ff1f2acSchristos if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0)
51148741257Sjoerg goto fail;
51248741257Sjoerg p += (int)ssz;
51348741257Sjoerg
5149ff1f2acSchristos /*
5159ff1f2acSchristos * The output format is just "%d" here, not "%2d" or "%02d".
5169ff1f2acSchristos * That's also the reason why we can't just format the
5179ff1f2acSchristos * date as a whole with "%B %e, %Y" or "%B %d, %Y".
5189ff1f2acSchristos * Besides, the present approach is less prone to buffer
5199ff1f2acSchristos * overflows, in case anybody should ever introduce the bug
5209ff1f2acSchristos * of looking at LC_TIME.
5219ff1f2acSchristos */
5229ff1f2acSchristos
5239ff1f2acSchristos if ((isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)) == -1)
52448741257Sjoerg goto fail;
52548741257Sjoerg p += isz;
52648741257Sjoerg
5279ff1f2acSchristos if (strftime(p, 4 + 1, "%Y", tm) == 0)
52848741257Sjoerg goto fail;
5299ff1f2acSchristos return buf;
53048741257Sjoerg
53148741257Sjoerg fail:
53248741257Sjoerg free(buf);
5339ff1f2acSchristos return NULL;
53448741257Sjoerg }
53548741257Sjoerg
53648741257Sjoerg char *
mandoc_normdate(struct roff_man * man,char * in,int ln,int pos)537c9bcef03Schristos mandoc_normdate(struct roff_man *man, char *in, int ln, int pos)
5383514411fSjoerg {
539c9bcef03Schristos char *cp;
5403514411fSjoerg time_t t;
5413514411fSjoerg
5429ff1f2acSchristos /* No date specified: use today's date. */
5439ff1f2acSchristos
5449ff1f2acSchristos if (in == NULL || *in == '\0' || strcmp(in, "$" "Mdocdate$") == 0) {
545*544c191cSchristos mandoc_msg(MANDOCERR_DATE_MISSING, ln, pos, NULL);
5469ff1f2acSchristos return time2a(time(NULL));
5473514411fSjoerg }
5489ff1f2acSchristos
5499ff1f2acSchristos /* Valid mdoc(7) date format. */
5509ff1f2acSchristos
5519ff1f2acSchristos if (a2time(&t, "$" "Mdocdate: %b %d %Y $", in) ||
552c9bcef03Schristos a2time(&t, "%b %d, %Y", in)) {
553c9bcef03Schristos cp = time2a(t);
554c9bcef03Schristos if (t > time(NULL) + 86400)
555*544c191cSchristos mandoc_msg(MANDOCERR_DATE_FUTURE, ln, pos, "%s", cp);
556c9bcef03Schristos else if (*in != '$' && strcmp(in, cp) != 0)
557*544c191cSchristos mandoc_msg(MANDOCERR_DATE_NORM, ln, pos, "%s", cp);
558c9bcef03Schristos return cp;
559c9bcef03Schristos }
5609ff1f2acSchristos
561c9bcef03Schristos /* In man(7), do not warn about the legacy format. */
5629ff1f2acSchristos
563c9bcef03Schristos if (a2time(&t, "%Y-%m-%d", in) == 0)
564*544c191cSchristos mandoc_msg(MANDOCERR_DATE_BAD, ln, pos, "%s", in);
565c9bcef03Schristos else if (t > time(NULL) + 86400)
566*544c191cSchristos mandoc_msg(MANDOCERR_DATE_FUTURE, ln, pos, "%s", in);
567*544c191cSchristos else if (man->meta.macroset == MACROSET_MDOC)
568*544c191cSchristos mandoc_msg(MANDOCERR_DATE_LEGACY, ln, pos, "Dd %s", in);
5699ff1f2acSchristos
5709ff1f2acSchristos /* Use any non-mdoc(7) date verbatim. */
5719ff1f2acSchristos
5729ff1f2acSchristos return mandoc_strdup(in);
5733514411fSjoerg }
5743514411fSjoerg
5750a84adc5Sjoerg int
mandoc_eos(const char * p,size_t sz)57670f041f9Sjoerg mandoc_eos(const char *p, size_t sz)
5770a84adc5Sjoerg {
5787da9b934Sjoerg const char *q;
57970f041f9Sjoerg int enclosed, found;
5800a84adc5Sjoerg
5810a84adc5Sjoerg if (0 == sz)
5829ff1f2acSchristos return 0;
5830a84adc5Sjoerg
5840a84adc5Sjoerg /*
5850a84adc5Sjoerg * End-of-sentence recognition must include situations where
5860a84adc5Sjoerg * some symbols, such as `)', allow prior EOS punctuation to
587c5f73b34Sjoerg * propagate outward.
5880a84adc5Sjoerg */
5890a84adc5Sjoerg
59070f041f9Sjoerg enclosed = found = 0;
5917da9b934Sjoerg for (q = p + (int)sz - 1; q >= p; q--) {
5927da9b934Sjoerg switch (*q) {
593fec65c98Schristos case '\"':
594fec65c98Schristos case '\'':
595fec65c98Schristos case ']':
596fec65c98Schristos case ')':
5977da9b934Sjoerg if (0 == found)
5987da9b934Sjoerg enclosed = 1;
5990a84adc5Sjoerg break;
600fec65c98Schristos case '.':
601fec65c98Schristos case '!':
602fec65c98Schristos case '?':
6037da9b934Sjoerg found = 1;
6047da9b934Sjoerg break;
6050a84adc5Sjoerg default:
6069ff1f2acSchristos return found &&
6079ff1f2acSchristos (!enclosed || isalnum((unsigned char)*q));
6080a84adc5Sjoerg }
6090a84adc5Sjoerg }
6100a84adc5Sjoerg
6119ff1f2acSchristos return found && !enclosed;
6120a84adc5Sjoerg }
6130a84adc5Sjoerg
61448741257Sjoerg /*
615c5f73b34Sjoerg * Convert a string to a long that may not be <0.
616c5f73b34Sjoerg * If the string is invalid, or is less than 0, return -1.
617c5f73b34Sjoerg */
618c5f73b34Sjoerg int
mandoc_strntoi(const char * p,size_t sz,int base)619c5f73b34Sjoerg mandoc_strntoi(const char *p, size_t sz, int base)
620c5f73b34Sjoerg {
621c5f73b34Sjoerg char buf[32];
622c5f73b34Sjoerg char *ep;
623c5f73b34Sjoerg long v;
624c5f73b34Sjoerg
625c5f73b34Sjoerg if (sz > 31)
6269ff1f2acSchristos return -1;
627c5f73b34Sjoerg
628c5f73b34Sjoerg memcpy(buf, p, sz);
629c5f73b34Sjoerg buf[(int)sz] = '\0';
630c5f73b34Sjoerg
631c5f73b34Sjoerg errno = 0;
632c5f73b34Sjoerg v = strtol(buf, &ep, base);
633c5f73b34Sjoerg
634c5f73b34Sjoerg if (buf[0] == '\0' || *ep != '\0')
6359ff1f2acSchristos return -1;
636c5f73b34Sjoerg
637c5f73b34Sjoerg if (v > INT_MAX)
638c5f73b34Sjoerg v = INT_MAX;
639c5f73b34Sjoerg if (v < INT_MIN)
640c5f73b34Sjoerg v = INT_MIN;
641c5f73b34Sjoerg
6429ff1f2acSchristos return (int)v;
643c5f73b34Sjoerg }
644