1*cd14d642Sschwarze /* $OpenBSD: mandoc.c,v 1.89 2022/05/19 15:17:50 schwarze Exp $ */
2f6854d5cSschwarze /*
3*cd14d642Sschwarze * Copyright (c) 2010, 2011, 2015, 2017, 2018, 2019, 2020, 2021
4*cd14d642Sschwarze * Ingo Schwarze <schwarze@openbsd.org>
5*cd14d642Sschwarze * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
6f6854d5cSschwarze *
7f6854d5cSschwarze * Permission to use, copy, modify, and distribute this software for any
8f6854d5cSschwarze * purpose with or without fee is hereby granted, provided that the above
9f6854d5cSschwarze * copyright notice and this permission notice appear in all copies.
10f6854d5cSschwarze *
116bb6f064Sschwarze * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12f6854d5cSschwarze * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
136bb6f064Sschwarze * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14f6854d5cSschwarze * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15f6854d5cSschwarze * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16f6854d5cSschwarze * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17f6854d5cSschwarze * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18*cd14d642Sschwarze *
19*cd14d642Sschwarze * Utility functions to handle end of sentence punctuation
20*cd14d642Sschwarze * and dates and times, for use by mdoc(7) and man(7) parsers.
21*cd14d642Sschwarze * Utility functions to handle fonts and numbers,
22*cd14d642Sschwarze * for use by mandoc(1) parsers and formatters.
23f6854d5cSschwarze */
247fe29c29Sschwarze #include <sys/types.h>
257fe29c29Sschwarze
26f6854d5cSschwarze #include <assert.h>
27f6854d5cSschwarze #include <ctype.h>
28a5e11edeSschwarze #include <errno.h>
29a5e11edeSschwarze #include <limits.h>
30f6854d5cSschwarze #include <stdlib.h>
31a66b65d0Sschwarze #include <stdio.h>
32a66b65d0Sschwarze #include <string.h>
33aa2d850aSschwarze #include <time.h>
34f6854d5cSschwarze
354f4f7972Sschwarze #include "mandoc_aux.h"
363427e516Sschwarze #include "mandoc.h"
373427e516Sschwarze #include "roff.h"
38f6854d5cSschwarze #include "libmandoc.h"
396b86842eSschwarze #include "roff_int.h"
40f6854d5cSschwarze
41b058e777Sschwarze static int a2time(time_t *, const char *, const char *);
42b058e777Sschwarze static char *time2a(time_t);
43aa2d850aSschwarze
44a5e11edeSschwarze
45a5e11edeSschwarze enum mandoc_esc
mandoc_font(const char * cp,int sz)462e362670Sschwarze mandoc_font(const char *cp, int sz)
472e362670Sschwarze {
482e362670Sschwarze switch (sz) {
492e362670Sschwarze case 0:
502e362670Sschwarze return ESCAPE_FONTPREV;
512e362670Sschwarze case 1:
522e362670Sschwarze switch (cp[0]) {
532e362670Sschwarze case 'B':
542e362670Sschwarze case '3':
552e362670Sschwarze return ESCAPE_FONTBOLD;
562e362670Sschwarze case 'I':
572e362670Sschwarze case '2':
582e362670Sschwarze return ESCAPE_FONTITALIC;
592e362670Sschwarze case 'P':
602e362670Sschwarze return ESCAPE_FONTPREV;
612e362670Sschwarze case 'R':
622e362670Sschwarze case '1':
632e362670Sschwarze return ESCAPE_FONTROMAN;
642e362670Sschwarze case '4':
652e362670Sschwarze return ESCAPE_FONTBI;
662e362670Sschwarze default:
672e362670Sschwarze return ESCAPE_ERROR;
682e362670Sschwarze }
692e362670Sschwarze case 2:
702e362670Sschwarze switch (cp[0]) {
712e362670Sschwarze case 'B':
722e362670Sschwarze switch (cp[1]) {
732e362670Sschwarze case 'I':
742e362670Sschwarze return ESCAPE_FONTBI;
752e362670Sschwarze default:
762e362670Sschwarze return ESCAPE_ERROR;
772e362670Sschwarze }
782e362670Sschwarze case 'C':
792e362670Sschwarze switch (cp[1]) {
802e362670Sschwarze case 'B':
817d063611Sschwarze return ESCAPE_FONTCB;
822e362670Sschwarze case 'I':
837d063611Sschwarze return ESCAPE_FONTCI;
842e362670Sschwarze case 'R':
852e362670Sschwarze case 'W':
867d063611Sschwarze return ESCAPE_FONTCR;
872e362670Sschwarze default:
882e362670Sschwarze return ESCAPE_ERROR;
892e362670Sschwarze }
902e362670Sschwarze default:
912e362670Sschwarze return ESCAPE_ERROR;
922e362670Sschwarze }
932e362670Sschwarze default:
942e362670Sschwarze return ESCAPE_ERROR;
952e362670Sschwarze }
962e362670Sschwarze }
972e362670Sschwarze
98aa2d850aSschwarze static int
a2time(time_t * t,const char * fmt,const char * p)99aa2d850aSschwarze a2time(time_t *t, const char *fmt, const char *p)
100aa2d850aSschwarze {
101aa2d850aSschwarze struct tm tm;
102aa2d850aSschwarze char *pp;
103aa2d850aSschwarze
104aa2d850aSschwarze memset(&tm, 0, sizeof(struct tm));
105aa2d850aSschwarze
106aa2d850aSschwarze pp = strptime(p, fmt, &tm);
107aa2d850aSschwarze if (NULL != pp && '\0' == *pp) {
108aa2d850aSschwarze *t = mktime(&tm);
109526e306bSschwarze return 1;
110aa2d850aSschwarze }
111aa2d850aSschwarze
112526e306bSschwarze return 0;
113aa2d850aSschwarze }
114aa2d850aSschwarze
115b058e777Sschwarze static char *
time2a(time_t t)116b058e777Sschwarze time2a(time_t t)
117aa2d850aSschwarze {
11804e980cbSschwarze struct tm *tm;
119f9372087Sschwarze char *buf, *p;
120f9372087Sschwarze size_t ssz;
121b058e777Sschwarze int isz;
122b058e777Sschwarze
1230c0f292bSschwarze buf = NULL;
12404e980cbSschwarze tm = localtime(&t);
125c21727c0Sschwarze if (tm == NULL)
1260c0f292bSschwarze goto fail;
127b058e777Sschwarze
128f9372087Sschwarze /*
129f9372087Sschwarze * Reserve space:
130f9372087Sschwarze * up to 9 characters for the month (September) + blank
131f9372087Sschwarze * up to 2 characters for the day + comma + blank
132f9372087Sschwarze * 4 characters for the year and a terminating '\0'
133f9372087Sschwarze */
13449248053Sschwarze
135f9372087Sschwarze p = buf = mandoc_malloc(10 + 4 + 4 + 1);
136b058e777Sschwarze
13749248053Sschwarze if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0)
138f9372087Sschwarze goto fail;
139f9372087Sschwarze p += (int)ssz;
140b058e777Sschwarze
14149248053Sschwarze /*
14249248053Sschwarze * The output format is just "%d" here, not "%2d" or "%02d".
14349248053Sschwarze * That's also the reason why we can't just format the
14449248053Sschwarze * date as a whole with "%B %e, %Y" or "%B %d, %Y".
14549248053Sschwarze * Besides, the present approach is less prone to buffer
14649248053Sschwarze * overflows, in case anybody should ever introduce the bug
14749248053Sschwarze * of looking at LC_TIME.
14849248053Sschwarze */
14949248053Sschwarze
1500c0f292bSschwarze isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday);
1510c0f292bSschwarze if (isz < 0 || isz > 4)
152f9372087Sschwarze goto fail;
153b058e777Sschwarze p += isz;
154b058e777Sschwarze
15549248053Sschwarze if (strftime(p, 4 + 1, "%Y", tm) == 0)
156f9372087Sschwarze goto fail;
157526e306bSschwarze return buf;
158f9372087Sschwarze
159f9372087Sschwarze fail:
160f9372087Sschwarze free(buf);
1610c0f292bSschwarze return mandoc_strdup("");
162b058e777Sschwarze }
163b058e777Sschwarze
164b058e777Sschwarze char *
mandoc_normdate(struct roff_node * nch,struct roff_node * nbl)165ea5923abSschwarze mandoc_normdate(struct roff_node *nch, struct roff_node *nbl)
166b058e777Sschwarze {
167e41d5b43Sschwarze char *cp;
168aa2d850aSschwarze time_t t;
169aa2d850aSschwarze
170ea5923abSschwarze /* No date specified. */
1710c0f292bSschwarze
172ea5923abSschwarze if (nch == NULL) {
173ea5923abSschwarze if (nbl == NULL)
174ea5923abSschwarze mandoc_msg(MANDOCERR_DATE_MISSING, 0, 0, NULL);
175ea5923abSschwarze else
176ea5923abSschwarze mandoc_msg(MANDOCERR_DATE_MISSING, nbl->line,
177ea5923abSschwarze nbl->pos, "%s", roff_name[nbl->tok]);
178ea5923abSschwarze return mandoc_strdup("");
179ea5923abSschwarze }
180ea5923abSschwarze if (*nch->string == '\0') {
181ea5923abSschwarze mandoc_msg(MANDOCERR_DATE_MISSING, nch->line,
182ea5923abSschwarze nch->pos, "%s", roff_name[nbl->tok]);
183ea5923abSschwarze return mandoc_strdup("");
184ea5923abSschwarze }
185ea5923abSschwarze if (strcmp(nch->string, "$" "Mdocdate$") == 0)
18649248053Sschwarze return time2a(time(NULL));
18749248053Sschwarze
18849248053Sschwarze /* Valid mdoc(7) date format. */
18949248053Sschwarze
190ea5923abSschwarze if (a2time(&t, "$" "Mdocdate: %b %d %Y $", nch->string) ||
191ea5923abSschwarze a2time(&t, "%b %d, %Y", nch->string)) {
192e41d5b43Sschwarze cp = time2a(t);
193e41d5b43Sschwarze if (t > time(NULL) + 86400)
194ea5923abSschwarze mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line,
195ea5923abSschwarze nch->pos, "%s %s", roff_name[nbl->tok], cp);
196ea5923abSschwarze else if (*nch->string != '$' &&
197ea5923abSschwarze strcmp(nch->string, cp) != 0)
198ea5923abSschwarze mandoc_msg(MANDOCERR_DATE_NORM, nch->line,
199ea5923abSschwarze nch->pos, "%s %s", roff_name[nbl->tok], cp);
200e41d5b43Sschwarze return cp;
201e41d5b43Sschwarze }
20249248053Sschwarze
2033427e516Sschwarze /* In man(7), do not warn about the legacy format. */
20449248053Sschwarze
205ea5923abSschwarze if (a2time(&t, "%Y-%m-%d", nch->string) == 0)
206ea5923abSschwarze mandoc_msg(MANDOCERR_DATE_BAD, nch->line, nch->pos,
207ea5923abSschwarze "%s %s", roff_name[nbl->tok], nch->string);
208e41d5b43Sschwarze else if (t > time(NULL) + 86400)
209ea5923abSschwarze mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line, nch->pos,
210ea5923abSschwarze "%s %s", roff_name[nbl->tok], nch->string);
211ea5923abSschwarze else if (nbl->tok == MDOC_Dd)
212ea5923abSschwarze mandoc_msg(MANDOCERR_DATE_LEGACY, nch->line, nch->pos,
213ea5923abSschwarze "Dd %s", nch->string);
21449248053Sschwarze
21549248053Sschwarze /* Use any non-mdoc(7) date verbatim. */
21649248053Sschwarze
217ea5923abSschwarze return mandoc_strdup(nch->string);
218aa2d850aSschwarze }
219aa2d850aSschwarze
220bc49dbe1Sschwarze int
mandoc_eos(const char * p,size_t sz)221f29433e9Sschwarze mandoc_eos(const char *p, size_t sz)
222bc49dbe1Sschwarze {
223a5f9a6eaSschwarze const char *q;
224f29433e9Sschwarze int enclosed, found;
225bc49dbe1Sschwarze
2265234190cSschwarze if (0 == sz)
227526e306bSschwarze return 0;
228bc49dbe1Sschwarze
229c9513b01Sschwarze /*
230c9513b01Sschwarze * End-of-sentence recognition must include situations where
231c9513b01Sschwarze * some symbols, such as `)', allow prior EOS punctuation to
232a5e11edeSschwarze * propagate outward.
233c9513b01Sschwarze */
234c9513b01Sschwarze
235f29433e9Sschwarze enclosed = found = 0;
236ddce0b0cSschwarze for (q = p + (int)sz - 1; q >= p; q--) {
237a5f9a6eaSschwarze switch (*q) {
23849aff9f8Sschwarze case '\"':
23949aff9f8Sschwarze case '\'':
24049aff9f8Sschwarze case ']':
24149aff9f8Sschwarze case ')':
242a5f9a6eaSschwarze if (0 == found)
243a5f9a6eaSschwarze enclosed = 1;
244c9513b01Sschwarze break;
24549aff9f8Sschwarze case '.':
24649aff9f8Sschwarze case '!':
24749aff9f8Sschwarze case '?':
248a5f9a6eaSschwarze found = 1;
249a5f9a6eaSschwarze break;
250bc49dbe1Sschwarze default:
251526e306bSschwarze return found &&
252526e306bSschwarze (!enclosed || isalnum((unsigned char)*q));
253bc49dbe1Sschwarze }
254c9513b01Sschwarze }
255bc49dbe1Sschwarze
256526e306bSschwarze return found && !enclosed;
257bc49dbe1Sschwarze }
2581068637fSschwarze
25919a69263Sschwarze /*
260a5e11edeSschwarze * Convert a string to a long that may not be <0.
261a5e11edeSschwarze * If the string is invalid, or is less than 0, return -1.
262a5e11edeSschwarze */
263a5e11edeSschwarze int
mandoc_strntoi(const char * p,size_t sz,int base)264f8618d99Sschwarze mandoc_strntoi(const char *p, size_t sz, int base)
265a5e11edeSschwarze {
266a5e11edeSschwarze char buf[32];
267a5e11edeSschwarze char *ep;
268a5e11edeSschwarze long v;
269a5e11edeSschwarze
270a5e11edeSschwarze if (sz > 31)
271526e306bSschwarze return -1;
272a5e11edeSschwarze
273a5e11edeSschwarze memcpy(buf, p, sz);
274a5e11edeSschwarze buf[(int)sz] = '\0';
275a5e11edeSschwarze
276a5e11edeSschwarze errno = 0;
277a5e11edeSschwarze v = strtol(buf, &ep, base);
278a5e11edeSschwarze
279a5e11edeSschwarze if (buf[0] == '\0' || *ep != '\0')
280526e306bSschwarze return -1;
281a5e11edeSschwarze
282f8618d99Sschwarze if (v > INT_MAX)
283f8618d99Sschwarze v = INT_MAX;
284f8618d99Sschwarze if (v < INT_MIN)
285f8618d99Sschwarze v = INT_MIN;
286a5e11edeSschwarze
287526e306bSschwarze return (int)v;
288a5e11edeSschwarze }
289