xref: /openbsd-src/usr.bin/mandoc/mandoc.c (revision cd14d6427aeef325d3c90ad8c20d91d41a3d371e)
1*cd14d642Sschwarze /* $OpenBSD: mandoc.c,v 1.89 2022/05/19 15:17:50 schwarze Exp $ */
2f6854d5cSschwarze /*
3*cd14d642Sschwarze  * Copyright (c) 2010, 2011, 2015, 2017, 2018, 2019, 2020, 2021
4*cd14d642Sschwarze  *               Ingo Schwarze <schwarze@openbsd.org>
5*cd14d642Sschwarze  * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
6f6854d5cSschwarze  *
7f6854d5cSschwarze  * Permission to use, copy, modify, and distribute this software for any
8f6854d5cSschwarze  * purpose with or without fee is hereby granted, provided that the above
9f6854d5cSschwarze  * copyright notice and this permission notice appear in all copies.
10f6854d5cSschwarze  *
116bb6f064Sschwarze  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12f6854d5cSschwarze  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
136bb6f064Sschwarze  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14f6854d5cSschwarze  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15f6854d5cSschwarze  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16f6854d5cSschwarze  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17f6854d5cSschwarze  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18*cd14d642Sschwarze  *
19*cd14d642Sschwarze  * Utility functions to handle end of sentence punctuation
20*cd14d642Sschwarze  * and dates and times, for use by mdoc(7) and man(7) parsers.
21*cd14d642Sschwarze  * Utility functions to handle fonts and numbers,
22*cd14d642Sschwarze  * for use by mandoc(1) parsers and formatters.
23f6854d5cSschwarze  */
247fe29c29Sschwarze #include <sys/types.h>
257fe29c29Sschwarze 
26f6854d5cSschwarze #include <assert.h>
27f6854d5cSschwarze #include <ctype.h>
28a5e11edeSschwarze #include <errno.h>
29a5e11edeSschwarze #include <limits.h>
30f6854d5cSschwarze #include <stdlib.h>
31a66b65d0Sschwarze #include <stdio.h>
32a66b65d0Sschwarze #include <string.h>
33aa2d850aSschwarze #include <time.h>
34f6854d5cSschwarze 
354f4f7972Sschwarze #include "mandoc_aux.h"
363427e516Sschwarze #include "mandoc.h"
373427e516Sschwarze #include "roff.h"
38f6854d5cSschwarze #include "libmandoc.h"
396b86842eSschwarze #include "roff_int.h"
40f6854d5cSschwarze 
41b058e777Sschwarze static	int	 a2time(time_t *, const char *, const char *);
42b058e777Sschwarze static	char	*time2a(time_t);
43aa2d850aSschwarze 
44a5e11edeSschwarze 
45a5e11edeSschwarze enum mandoc_esc
mandoc_font(const char * cp,int sz)462e362670Sschwarze mandoc_font(const char *cp, int sz)
472e362670Sschwarze {
482e362670Sschwarze 	switch (sz) {
492e362670Sschwarze 	case 0:
502e362670Sschwarze 		return ESCAPE_FONTPREV;
512e362670Sschwarze 	case 1:
522e362670Sschwarze 		switch (cp[0]) {
532e362670Sschwarze 		case 'B':
542e362670Sschwarze 		case '3':
552e362670Sschwarze 			return ESCAPE_FONTBOLD;
562e362670Sschwarze 		case 'I':
572e362670Sschwarze 		case '2':
582e362670Sschwarze 			return ESCAPE_FONTITALIC;
592e362670Sschwarze 		case 'P':
602e362670Sschwarze 			return ESCAPE_FONTPREV;
612e362670Sschwarze 		case 'R':
622e362670Sschwarze 		case '1':
632e362670Sschwarze 			return ESCAPE_FONTROMAN;
642e362670Sschwarze 		case '4':
652e362670Sschwarze 			return ESCAPE_FONTBI;
662e362670Sschwarze 		default:
672e362670Sschwarze 			return ESCAPE_ERROR;
682e362670Sschwarze 		}
692e362670Sschwarze 	case 2:
702e362670Sschwarze 		switch (cp[0]) {
712e362670Sschwarze 		case 'B':
722e362670Sschwarze 			switch (cp[1]) {
732e362670Sschwarze 			case 'I':
742e362670Sschwarze 				return ESCAPE_FONTBI;
752e362670Sschwarze 			default:
762e362670Sschwarze 				return ESCAPE_ERROR;
772e362670Sschwarze 			}
782e362670Sschwarze 		case 'C':
792e362670Sschwarze 			switch (cp[1]) {
802e362670Sschwarze 			case 'B':
817d063611Sschwarze 				return ESCAPE_FONTCB;
822e362670Sschwarze 			case 'I':
837d063611Sschwarze 				return ESCAPE_FONTCI;
842e362670Sschwarze 			case 'R':
852e362670Sschwarze 			case 'W':
867d063611Sschwarze 				return ESCAPE_FONTCR;
872e362670Sschwarze 			default:
882e362670Sschwarze 				return ESCAPE_ERROR;
892e362670Sschwarze 			}
902e362670Sschwarze 		default:
912e362670Sschwarze 			return ESCAPE_ERROR;
922e362670Sschwarze 		}
932e362670Sschwarze 	default:
942e362670Sschwarze 		return ESCAPE_ERROR;
952e362670Sschwarze 	}
962e362670Sschwarze }
972e362670Sschwarze 
98aa2d850aSschwarze static int
a2time(time_t * t,const char * fmt,const char * p)99aa2d850aSschwarze a2time(time_t *t, const char *fmt, const char *p)
100aa2d850aSschwarze {
101aa2d850aSschwarze 	struct tm	 tm;
102aa2d850aSschwarze 	char		*pp;
103aa2d850aSschwarze 
104aa2d850aSschwarze 	memset(&tm, 0, sizeof(struct tm));
105aa2d850aSschwarze 
106aa2d850aSschwarze 	pp = strptime(p, fmt, &tm);
107aa2d850aSschwarze 	if (NULL != pp && '\0' == *pp) {
108aa2d850aSschwarze 		*t = mktime(&tm);
109526e306bSschwarze 		return 1;
110aa2d850aSschwarze 	}
111aa2d850aSschwarze 
112526e306bSschwarze 	return 0;
113aa2d850aSschwarze }
114aa2d850aSschwarze 
115b058e777Sschwarze static char *
time2a(time_t t)116b058e777Sschwarze time2a(time_t t)
117aa2d850aSschwarze {
11804e980cbSschwarze 	struct tm	*tm;
119f9372087Sschwarze 	char		*buf, *p;
120f9372087Sschwarze 	size_t		 ssz;
121b058e777Sschwarze 	int		 isz;
122b058e777Sschwarze 
1230c0f292bSschwarze 	buf = NULL;
12404e980cbSschwarze 	tm = localtime(&t);
125c21727c0Sschwarze 	if (tm == NULL)
1260c0f292bSschwarze 		goto fail;
127b058e777Sschwarze 
128f9372087Sschwarze 	/*
129f9372087Sschwarze 	 * Reserve space:
130f9372087Sschwarze 	 * up to 9 characters for the month (September) + blank
131f9372087Sschwarze 	 * up to 2 characters for the day + comma + blank
132f9372087Sschwarze 	 * 4 characters for the year and a terminating '\0'
133f9372087Sschwarze 	 */
13449248053Sschwarze 
135f9372087Sschwarze 	p = buf = mandoc_malloc(10 + 4 + 4 + 1);
136b058e777Sschwarze 
13749248053Sschwarze 	if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0)
138f9372087Sschwarze 		goto fail;
139f9372087Sschwarze 	p += (int)ssz;
140b058e777Sschwarze 
14149248053Sschwarze 	/*
14249248053Sschwarze 	 * The output format is just "%d" here, not "%2d" or "%02d".
14349248053Sschwarze 	 * That's also the reason why we can't just format the
14449248053Sschwarze 	 * date as a whole with "%B %e, %Y" or "%B %d, %Y".
14549248053Sschwarze 	 * Besides, the present approach is less prone to buffer
14649248053Sschwarze 	 * overflows, in case anybody should ever introduce the bug
14749248053Sschwarze 	 * of looking at LC_TIME.
14849248053Sschwarze 	 */
14949248053Sschwarze 
1500c0f292bSschwarze 	isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday);
1510c0f292bSschwarze 	if (isz < 0 || isz > 4)
152f9372087Sschwarze 		goto fail;
153b058e777Sschwarze 	p += isz;
154b058e777Sschwarze 
15549248053Sschwarze 	if (strftime(p, 4 + 1, "%Y", tm) == 0)
156f9372087Sschwarze 		goto fail;
157526e306bSschwarze 	return buf;
158f9372087Sschwarze 
159f9372087Sschwarze fail:
160f9372087Sschwarze 	free(buf);
1610c0f292bSschwarze 	return mandoc_strdup("");
162b058e777Sschwarze }
163b058e777Sschwarze 
164b058e777Sschwarze char *
mandoc_normdate(struct roff_node * nch,struct roff_node * nbl)165ea5923abSschwarze mandoc_normdate(struct roff_node *nch, struct roff_node *nbl)
166b058e777Sschwarze {
167e41d5b43Sschwarze 	char		*cp;
168aa2d850aSschwarze 	time_t		 t;
169aa2d850aSschwarze 
170ea5923abSschwarze 	/* No date specified. */
1710c0f292bSschwarze 
172ea5923abSschwarze 	if (nch == NULL) {
173ea5923abSschwarze 		if (nbl == NULL)
174ea5923abSschwarze 			mandoc_msg(MANDOCERR_DATE_MISSING, 0, 0, NULL);
175ea5923abSschwarze 		else
176ea5923abSschwarze 			mandoc_msg(MANDOCERR_DATE_MISSING, nbl->line,
177ea5923abSschwarze 			    nbl->pos, "%s", roff_name[nbl->tok]);
178ea5923abSschwarze 		return mandoc_strdup("");
179ea5923abSschwarze 	}
180ea5923abSschwarze 	if (*nch->string == '\0') {
181ea5923abSschwarze 		mandoc_msg(MANDOCERR_DATE_MISSING, nch->line,
182ea5923abSschwarze 		    nch->pos, "%s", roff_name[nbl->tok]);
183ea5923abSschwarze 		return mandoc_strdup("");
184ea5923abSschwarze 	}
185ea5923abSschwarze 	if (strcmp(nch->string, "$" "Mdocdate$") == 0)
18649248053Sschwarze 		return time2a(time(NULL));
18749248053Sschwarze 
18849248053Sschwarze 	/* Valid mdoc(7) date format. */
18949248053Sschwarze 
190ea5923abSschwarze 	if (a2time(&t, "$" "Mdocdate: %b %d %Y $", nch->string) ||
191ea5923abSschwarze 	    a2time(&t, "%b %d, %Y", nch->string)) {
192e41d5b43Sschwarze 		cp = time2a(t);
193e41d5b43Sschwarze 		if (t > time(NULL) + 86400)
194ea5923abSschwarze 			mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line,
195ea5923abSschwarze 			    nch->pos, "%s %s", roff_name[nbl->tok], cp);
196ea5923abSschwarze 		else if (*nch->string != '$' &&
197ea5923abSschwarze 		    strcmp(nch->string, cp) != 0)
198ea5923abSschwarze 			mandoc_msg(MANDOCERR_DATE_NORM, nch->line,
199ea5923abSschwarze 			    nch->pos, "%s %s", roff_name[nbl->tok], cp);
200e41d5b43Sschwarze 		return cp;
201e41d5b43Sschwarze 	}
20249248053Sschwarze 
2033427e516Sschwarze 	/* In man(7), do not warn about the legacy format. */
20449248053Sschwarze 
205ea5923abSschwarze 	if (a2time(&t, "%Y-%m-%d", nch->string) == 0)
206ea5923abSschwarze 		mandoc_msg(MANDOCERR_DATE_BAD, nch->line, nch->pos,
207ea5923abSschwarze 		    "%s %s", roff_name[nbl->tok], nch->string);
208e41d5b43Sschwarze 	else if (t > time(NULL) + 86400)
209ea5923abSschwarze 		mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line, nch->pos,
210ea5923abSschwarze 		    "%s %s", roff_name[nbl->tok], nch->string);
211ea5923abSschwarze 	else if (nbl->tok == MDOC_Dd)
212ea5923abSschwarze 		mandoc_msg(MANDOCERR_DATE_LEGACY, nch->line, nch->pos,
213ea5923abSschwarze 		    "Dd %s", nch->string);
21449248053Sschwarze 
21549248053Sschwarze 	/* Use any non-mdoc(7) date verbatim. */
21649248053Sschwarze 
217ea5923abSschwarze 	return mandoc_strdup(nch->string);
218aa2d850aSschwarze }
219aa2d850aSschwarze 
220bc49dbe1Sschwarze int
mandoc_eos(const char * p,size_t sz)221f29433e9Sschwarze mandoc_eos(const char *p, size_t sz)
222bc49dbe1Sschwarze {
223a5f9a6eaSschwarze 	const char	*q;
224f29433e9Sschwarze 	int		 enclosed, found;
225bc49dbe1Sschwarze 
2265234190cSschwarze 	if (0 == sz)
227526e306bSschwarze 		return 0;
228bc49dbe1Sschwarze 
229c9513b01Sschwarze 	/*
230c9513b01Sschwarze 	 * End-of-sentence recognition must include situations where
231c9513b01Sschwarze 	 * some symbols, such as `)', allow prior EOS punctuation to
232a5e11edeSschwarze 	 * propagate outward.
233c9513b01Sschwarze 	 */
234c9513b01Sschwarze 
235f29433e9Sschwarze 	enclosed = found = 0;
236ddce0b0cSschwarze 	for (q = p + (int)sz - 1; q >= p; q--) {
237a5f9a6eaSschwarze 		switch (*q) {
23849aff9f8Sschwarze 		case '\"':
23949aff9f8Sschwarze 		case '\'':
24049aff9f8Sschwarze 		case ']':
24149aff9f8Sschwarze 		case ')':
242a5f9a6eaSschwarze 			if (0 == found)
243a5f9a6eaSschwarze 				enclosed = 1;
244c9513b01Sschwarze 			break;
24549aff9f8Sschwarze 		case '.':
24649aff9f8Sschwarze 		case '!':
24749aff9f8Sschwarze 		case '?':
248a5f9a6eaSschwarze 			found = 1;
249a5f9a6eaSschwarze 			break;
250bc49dbe1Sschwarze 		default:
251526e306bSschwarze 			return found &&
252526e306bSschwarze 			    (!enclosed || isalnum((unsigned char)*q));
253bc49dbe1Sschwarze 		}
254c9513b01Sschwarze 	}
255bc49dbe1Sschwarze 
256526e306bSschwarze 	return found && !enclosed;
257bc49dbe1Sschwarze }
2581068637fSschwarze 
25919a69263Sschwarze /*
260a5e11edeSschwarze  * Convert a string to a long that may not be <0.
261a5e11edeSschwarze  * If the string is invalid, or is less than 0, return -1.
262a5e11edeSschwarze  */
263a5e11edeSschwarze int
mandoc_strntoi(const char * p,size_t sz,int base)264f8618d99Sschwarze mandoc_strntoi(const char *p, size_t sz, int base)
265a5e11edeSschwarze {
266a5e11edeSschwarze 	char		 buf[32];
267a5e11edeSschwarze 	char		*ep;
268a5e11edeSschwarze 	long		 v;
269a5e11edeSschwarze 
270a5e11edeSschwarze 	if (sz > 31)
271526e306bSschwarze 		return -1;
272a5e11edeSschwarze 
273a5e11edeSschwarze 	memcpy(buf, p, sz);
274a5e11edeSschwarze 	buf[(int)sz] = '\0';
275a5e11edeSschwarze 
276a5e11edeSschwarze 	errno = 0;
277a5e11edeSschwarze 	v = strtol(buf, &ep, base);
278a5e11edeSschwarze 
279a5e11edeSschwarze 	if (buf[0] == '\0' || *ep != '\0')
280526e306bSschwarze 		return -1;
281a5e11edeSschwarze 
282f8618d99Sschwarze 	if (v > INT_MAX)
283f8618d99Sschwarze 		v = INT_MAX;
284f8618d99Sschwarze 	if (v < INT_MIN)
285f8618d99Sschwarze 		v = INT_MIN;
286a5e11edeSschwarze 
287526e306bSschwarze 	return (int)v;
288a5e11edeSschwarze }
289