1 /* $OpenBSD: mandoc.c,v 1.89 2022/05/19 15:17:50 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010, 2011, 2015, 2017, 2018, 2019, 2020, 2021 4 * Ingo Schwarze <schwarze@openbsd.org> 5 * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * Utility functions to handle end of sentence punctuation 20 * and dates and times, for use by mdoc(7) and man(7) parsers. 21 * Utility functions to handle fonts and numbers, 22 * for use by mandoc(1) parsers and formatters. 23 */ 24 #include <sys/types.h> 25 26 #include <assert.h> 27 #include <ctype.h> 28 #include <errno.h> 29 #include <limits.h> 30 #include <stdlib.h> 31 #include <stdio.h> 32 #include <string.h> 33 #include <time.h> 34 35 #include "mandoc_aux.h" 36 #include "mandoc.h" 37 #include "roff.h" 38 #include "libmandoc.h" 39 #include "roff_int.h" 40 41 static int a2time(time_t *, const char *, const char *); 42 static char *time2a(time_t); 43 44 45 enum mandoc_esc 46 mandoc_font(const char *cp, int sz) 47 { 48 switch (sz) { 49 case 0: 50 return ESCAPE_FONTPREV; 51 case 1: 52 switch (cp[0]) { 53 case 'B': 54 case '3': 55 return ESCAPE_FONTBOLD; 56 case 'I': 57 case '2': 58 return ESCAPE_FONTITALIC; 59 case 'P': 60 return ESCAPE_FONTPREV; 61 case 'R': 62 case '1': 63 return ESCAPE_FONTROMAN; 64 case '4': 65 return ESCAPE_FONTBI; 66 default: 67 return ESCAPE_ERROR; 68 } 69 case 2: 70 switch (cp[0]) { 71 case 'B': 72 switch (cp[1]) { 73 case 'I': 74 return ESCAPE_FONTBI; 75 default: 76 return ESCAPE_ERROR; 77 } 78 case 'C': 79 switch (cp[1]) { 80 case 'B': 81 return ESCAPE_FONTCB; 82 case 'I': 83 return ESCAPE_FONTCI; 84 case 'R': 85 case 'W': 86 return ESCAPE_FONTCR; 87 default: 88 return ESCAPE_ERROR; 89 } 90 default: 91 return ESCAPE_ERROR; 92 } 93 default: 94 return ESCAPE_ERROR; 95 } 96 } 97 98 static int 99 a2time(time_t *t, const char *fmt, const char *p) 100 { 101 struct tm tm; 102 char *pp; 103 104 memset(&tm, 0, sizeof(struct tm)); 105 106 pp = strptime(p, fmt, &tm); 107 if (NULL != pp && '\0' == *pp) { 108 *t = mktime(&tm); 109 return 1; 110 } 111 112 return 0; 113 } 114 115 static char * 116 time2a(time_t t) 117 { 118 struct tm *tm; 119 char *buf, *p; 120 size_t ssz; 121 int isz; 122 123 buf = NULL; 124 tm = localtime(&t); 125 if (tm == NULL) 126 goto fail; 127 128 /* 129 * Reserve space: 130 * up to 9 characters for the month (September) + blank 131 * up to 2 characters for the day + comma + blank 132 * 4 characters for the year and a terminating '\0' 133 */ 134 135 p = buf = mandoc_malloc(10 + 4 + 4 + 1); 136 137 if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0) 138 goto fail; 139 p += (int)ssz; 140 141 /* 142 * The output format is just "%d" here, not "%2d" or "%02d". 143 * That's also the reason why we can't just format the 144 * date as a whole with "%B %e, %Y" or "%B %d, %Y". 145 * Besides, the present approach is less prone to buffer 146 * overflows, in case anybody should ever introduce the bug 147 * of looking at LC_TIME. 148 */ 149 150 isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday); 151 if (isz < 0 || isz > 4) 152 goto fail; 153 p += isz; 154 155 if (strftime(p, 4 + 1, "%Y", tm) == 0) 156 goto fail; 157 return buf; 158 159 fail: 160 free(buf); 161 return mandoc_strdup(""); 162 } 163 164 char * 165 mandoc_normdate(struct roff_node *nch, struct roff_node *nbl) 166 { 167 char *cp; 168 time_t t; 169 170 /* No date specified. */ 171 172 if (nch == NULL) { 173 if (nbl == NULL) 174 mandoc_msg(MANDOCERR_DATE_MISSING, 0, 0, NULL); 175 else 176 mandoc_msg(MANDOCERR_DATE_MISSING, nbl->line, 177 nbl->pos, "%s", roff_name[nbl->tok]); 178 return mandoc_strdup(""); 179 } 180 if (*nch->string == '\0') { 181 mandoc_msg(MANDOCERR_DATE_MISSING, nch->line, 182 nch->pos, "%s", roff_name[nbl->tok]); 183 return mandoc_strdup(""); 184 } 185 if (strcmp(nch->string, "$" "Mdocdate$") == 0) 186 return time2a(time(NULL)); 187 188 /* Valid mdoc(7) date format. */ 189 190 if (a2time(&t, "$" "Mdocdate: %b %d %Y $", nch->string) || 191 a2time(&t, "%b %d, %Y", nch->string)) { 192 cp = time2a(t); 193 if (t > time(NULL) + 86400) 194 mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line, 195 nch->pos, "%s %s", roff_name[nbl->tok], cp); 196 else if (*nch->string != '$' && 197 strcmp(nch->string, cp) != 0) 198 mandoc_msg(MANDOCERR_DATE_NORM, nch->line, 199 nch->pos, "%s %s", roff_name[nbl->tok], cp); 200 return cp; 201 } 202 203 /* In man(7), do not warn about the legacy format. */ 204 205 if (a2time(&t, "%Y-%m-%d", nch->string) == 0) 206 mandoc_msg(MANDOCERR_DATE_BAD, nch->line, nch->pos, 207 "%s %s", roff_name[nbl->tok], nch->string); 208 else if (t > time(NULL) + 86400) 209 mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line, nch->pos, 210 "%s %s", roff_name[nbl->tok], nch->string); 211 else if (nbl->tok == MDOC_Dd) 212 mandoc_msg(MANDOCERR_DATE_LEGACY, nch->line, nch->pos, 213 "Dd %s", nch->string); 214 215 /* Use any non-mdoc(7) date verbatim. */ 216 217 return mandoc_strdup(nch->string); 218 } 219 220 int 221 mandoc_eos(const char *p, size_t sz) 222 { 223 const char *q; 224 int enclosed, found; 225 226 if (0 == sz) 227 return 0; 228 229 /* 230 * End-of-sentence recognition must include situations where 231 * some symbols, such as `)', allow prior EOS punctuation to 232 * propagate outward. 233 */ 234 235 enclosed = found = 0; 236 for (q = p + (int)sz - 1; q >= p; q--) { 237 switch (*q) { 238 case '\"': 239 case '\'': 240 case ']': 241 case ')': 242 if (0 == found) 243 enclosed = 1; 244 break; 245 case '.': 246 case '!': 247 case '?': 248 found = 1; 249 break; 250 default: 251 return found && 252 (!enclosed || isalnum((unsigned char)*q)); 253 } 254 } 255 256 return found && !enclosed; 257 } 258 259 /* 260 * Convert a string to a long that may not be <0. 261 * If the string is invalid, or is less than 0, return -1. 262 */ 263 int 264 mandoc_strntoi(const char *p, size_t sz, int base) 265 { 266 char buf[32]; 267 char *ep; 268 long v; 269 270 if (sz > 31) 271 return -1; 272 273 memcpy(buf, p, sz); 274 buf[(int)sz] = '\0'; 275 276 errno = 0; 277 v = strtol(buf, &ep, base); 278 279 if (buf[0] == '\0' || *ep != '\0') 280 return -1; 281 282 if (v > INT_MAX) 283 v = INT_MAX; 284 if (v < INT_MIN) 285 v = INT_MIN; 286 287 return (int)v; 288 } 289