xref: /openbsd-src/lib/libc/time/strptime.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: strptime.c,v 1.30 2019/05/12 12:49:52 schwarze Exp $ */
2 /*	$NetBSD: strptime.c,v 1.12 1998/01/20 21:39:40 mycroft Exp $	*/
3 /*-
4  * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code was contributed to The NetBSD Foundation by Klaus Klein.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <ctype.h>
32 #include <locale.h>
33 #include <stdint.h>
34 #include <string.h>
35 #include <time.h>
36 
37 #include "localedef.h"
38 #include "private.h"
39 #include "tzfile.h"
40 
41 #define	_ctloc(x)		(_CurrentTimeLocale->x)
42 
43 /*
44  * We do not implement alternate representations. However, we always
45  * check whether a given modifier is allowed for a certain conversion.
46  */
47 #define _ALT_E			0x01
48 #define _ALT_O			0x02
49 #define	_LEGAL_ALT(x)		{ if (alt_format & ~(x)) return (0); }
50 
51 /*
52  * We keep track of some of the fields we set in order to compute missing ones.
53  */
54 #define FIELD_TM_MON	(1 << 0)
55 #define FIELD_TM_MDAY	(1 << 1)
56 #define FIELD_TM_WDAY	(1 << 2)
57 #define FIELD_TM_YDAY	(1 << 3)
58 #define FIELD_TM_YEAR	(1 << 4)
59 
60 static char gmt[] = { "GMT" };
61 static char utc[] = { "UTC" };
62 /* RFC-822/RFC-2822 */
63 static const char * const nast[5] = {
64        "EST",    "CST",    "MST",    "PST",    "\0\0\0"
65 };
66 static const char * const nadt[5] = {
67        "EDT",    "CDT",    "MDT",    "PDT",    "\0\0\0"
68 };
69 
70 static const int mon_lengths[2][MONSPERYEAR] = {
71         { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
72         { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
73 };
74 
75 static	int _conv_num64(const unsigned char **, int64_t *, int64_t, int64_t);
76 static	int _conv_num(const unsigned char **, int *, int, int);
77 static	int leaps_thru_end_of(const int y);
78 static	char *_strptime(const char *, const char *, struct tm *, int);
79 static	const u_char *_find_string(const u_char *, int *, const char * const *,
80 	    const char * const *, int);
81 
82 
83 char *
84 strptime(const char *buf, const char *fmt, struct tm *tm)
85 {
86 	return(_strptime(buf, fmt, tm, 1));
87 }
88 DEF_WEAK(strptime);
89 
90 static char *
91 _strptime(const char *buf, const char *fmt, struct tm *tm, int initialize)
92 {
93 	unsigned char c;
94 	const unsigned char *bp, *ep;
95 	size_t len;
96 	int alt_format, i, offs;
97 	int neg = 0;
98 	static int century, relyear, fields;
99 
100 	if (initialize) {
101 		century = TM_YEAR_BASE;
102 		relyear = -1;
103 		fields = 0;
104 	}
105 
106 	bp = (const unsigned char *)buf;
107 	while ((c = *fmt) != '\0') {
108 		/* Clear `alternate' modifier prior to new conversion. */
109 		alt_format = 0;
110 
111 		/* Eat up white-space. */
112 		if (isspace(c)) {
113 			while (isspace(*bp))
114 				bp++;
115 
116 			fmt++;
117 			continue;
118 		}
119 
120 		if ((c = *fmt++) != '%')
121 			goto literal;
122 
123 
124 again:		switch (c = *fmt++) {
125 		case '%':	/* "%%" is converted to "%". */
126 literal:
127 		if (c != *bp++)
128 			return (NULL);
129 
130 		break;
131 
132 		/*
133 		 * "Alternative" modifiers. Just set the appropriate flag
134 		 * and start over again.
135 		 */
136 		case 'E':	/* "%E?" alternative conversion modifier. */
137 			_LEGAL_ALT(0);
138 			alt_format |= _ALT_E;
139 			goto again;
140 
141 		case 'O':	/* "%O?" alternative conversion modifier. */
142 			_LEGAL_ALT(0);
143 			alt_format |= _ALT_O;
144 			goto again;
145 
146 		/*
147 		 * "Complex" conversion rules, implemented through recursion.
148 		 */
149 		case 'c':	/* Date and time, using the locale's format. */
150 			_LEGAL_ALT(_ALT_E);
151 			if (!(bp = _strptime(bp, _ctloc(d_t_fmt), tm, 0)))
152 				return (NULL);
153 			break;
154 
155 		case 'D':	/* The date as "%m/%d/%y". */
156 			_LEGAL_ALT(0);
157 			if (!(bp = _strptime(bp, "%m/%d/%y", tm, 0)))
158 				return (NULL);
159 			break;
160 
161 		case 'F':	/* The date as "%Y-%m-%d". */
162 			_LEGAL_ALT(0);
163 			if (!(bp = _strptime(bp, "%Y-%m-%d", tm, 0)))
164 				return (NULL);
165 			continue;
166 
167 		case 'R':	/* The time as "%H:%M". */
168 			_LEGAL_ALT(0);
169 			if (!(bp = _strptime(bp, "%H:%M", tm, 0)))
170 				return (NULL);
171 			break;
172 
173 		case 'r':	/* The time as "%I:%M:%S %p". */
174 			_LEGAL_ALT(0);
175 			if (!(bp = _strptime(bp, "%I:%M:%S %p", tm, 0)))
176 				return (NULL);
177 			break;
178 
179 		case 'T':	/* The time as "%H:%M:%S". */
180 			_LEGAL_ALT(0);
181 			if (!(bp = _strptime(bp, "%H:%M:%S", tm, 0)))
182 				return (NULL);
183 			break;
184 
185 		case 'X':	/* The time, using the locale's format. */
186 			_LEGAL_ALT(_ALT_E);
187 			if (!(bp = _strptime(bp, _ctloc(t_fmt), tm, 0)))
188 				return (NULL);
189 			break;
190 
191 		case 'x':	/* The date, using the locale's format. */
192 			_LEGAL_ALT(_ALT_E);
193 			if (!(bp = _strptime(bp, _ctloc(d_fmt), tm, 0)))
194 				return (NULL);
195 			break;
196 
197 		/*
198 		 * "Elementary" conversion rules.
199 		 */
200 		case 'A':	/* The day of week, using the locale's form. */
201 		case 'a':
202 			_LEGAL_ALT(0);
203 			for (i = 0; i < 7; i++) {
204 				/* Full name. */
205 				len = strlen(_ctloc(day[i]));
206 				if (strncasecmp(_ctloc(day[i]), bp, len) == 0)
207 					break;
208 
209 				/* Abbreviated name. */
210 				len = strlen(_ctloc(abday[i]));
211 				if (strncasecmp(_ctloc(abday[i]), bp, len) == 0)
212 					break;
213 			}
214 
215 			/* Nothing matched. */
216 			if (i == 7)
217 				return (NULL);
218 
219 			tm->tm_wday = i;
220 			bp += len;
221 			fields |= FIELD_TM_WDAY;
222 			break;
223 
224 		case 'B':	/* The month, using the locale's form. */
225 		case 'b':
226 		case 'h':
227 			_LEGAL_ALT(0);
228 			for (i = 0; i < 12; i++) {
229 				/* Full name. */
230 				len = strlen(_ctloc(mon[i]));
231 				if (strncasecmp(_ctloc(mon[i]), bp, len) == 0)
232 					break;
233 
234 				/* Abbreviated name. */
235 				len = strlen(_ctloc(abmon[i]));
236 				if (strncasecmp(_ctloc(abmon[i]), bp, len) == 0)
237 					break;
238 			}
239 
240 			/* Nothing matched. */
241 			if (i == 12)
242 				return (NULL);
243 
244 			tm->tm_mon = i;
245 			bp += len;
246 			fields |= FIELD_TM_MON;
247 			break;
248 
249 		case 'C':	/* The century number. */
250 			_LEGAL_ALT(_ALT_E);
251 			if (!(_conv_num(&bp, &i, 0, 99)))
252 				return (NULL);
253 
254 			century = i * 100;
255 			break;
256 
257 		case 'e':	/* The day of month. */
258 			if (isspace(*bp))
259 				bp++;
260 			/* FALLTHROUGH */
261 		case 'd':
262 			_LEGAL_ALT(_ALT_O);
263 			if (!(_conv_num(&bp, &tm->tm_mday, 1, 31)))
264 				return (NULL);
265 			fields |= FIELD_TM_MDAY;
266 			break;
267 
268 		case 'k':	/* The hour (24-hour clock representation). */
269 			_LEGAL_ALT(0);
270 			/* FALLTHROUGH */
271 		case 'H':
272 			_LEGAL_ALT(_ALT_O);
273 			if (!(_conv_num(&bp, &tm->tm_hour, 0, 23)))
274 				return (NULL);
275 			break;
276 
277 		case 'l':	/* The hour (12-hour clock representation). */
278 			_LEGAL_ALT(0);
279 			/* FALLTHROUGH */
280 		case 'I':
281 			_LEGAL_ALT(_ALT_O);
282 			if (!(_conv_num(&bp, &tm->tm_hour, 1, 12)))
283 				return (NULL);
284 			break;
285 
286 		case 'j':	/* The day of year. */
287 			_LEGAL_ALT(0);
288 			if (!(_conv_num(&bp, &tm->tm_yday, 1, 366)))
289 				return (NULL);
290 			tm->tm_yday--;
291 			fields |= FIELD_TM_YDAY;
292 			break;
293 
294 		case 'M':	/* The minute. */
295 			_LEGAL_ALT(_ALT_O);
296 			if (!(_conv_num(&bp, &tm->tm_min, 0, 59)))
297 				return (NULL);
298 			break;
299 
300 		case 'm':	/* The month. */
301 			_LEGAL_ALT(_ALT_O);
302 			if (!(_conv_num(&bp, &tm->tm_mon, 1, 12)))
303 				return (NULL);
304 			tm->tm_mon--;
305 			fields |= FIELD_TM_MON;
306 			break;
307 
308 		case 'p':	/* The locale's equivalent of AM/PM. */
309 			_LEGAL_ALT(0);
310 			/* AM? */
311 			len = strlen(_ctloc(am_pm[0]));
312 			if (strncasecmp(_ctloc(am_pm[0]), bp, len) == 0) {
313 				if (tm->tm_hour > 12)	/* i.e., 13:00 AM ?! */
314 					return (NULL);
315 				else if (tm->tm_hour == 12)
316 					tm->tm_hour = 0;
317 
318 				bp += len;
319 				break;
320 			}
321 			/* PM? */
322 			len = strlen(_ctloc(am_pm[1]));
323 			if (strncasecmp(_ctloc(am_pm[1]), bp, len) == 0) {
324 				if (tm->tm_hour > 12)	/* i.e., 13:00 PM ?! */
325 					return (NULL);
326 				else if (tm->tm_hour < 12)
327 					tm->tm_hour += 12;
328 
329 				bp += len;
330 				break;
331 			}
332 
333 			/* Nothing matched. */
334 			return (NULL);
335 
336 		case 'S':	/* The seconds. */
337 			_LEGAL_ALT(_ALT_O);
338 			if (!(_conv_num(&bp, &tm->tm_sec, 0, 60)))
339 				return (NULL);
340 			break;
341 		case 's':	/* Seconds since epoch */
342 			{
343 				int64_t i64;
344 				if (!(_conv_num64(&bp, &i64, 0, INT64_MAX)))
345 					return (NULL);
346 				if (!gmtime_r(&i64, tm))
347 					return (NULL);
348 				fields = 0xffff;	 /* everything */
349 			}
350 			break;
351 		case 'U':	/* The week of year, beginning on sunday. */
352 		case 'W':	/* The week of year, beginning on monday. */
353 			_LEGAL_ALT(_ALT_O);
354 			/*
355 			 * XXX This is bogus, as we can not assume any valid
356 			 * information present in the tm structure at this
357 			 * point to calculate a real value, so just check the
358 			 * range for now.
359 			 */
360 			 if (!(_conv_num(&bp, &i, 0, 53)))
361 				return (NULL);
362 			 break;
363 
364 		case 'w':	/* The day of week, beginning on sunday. */
365 			_LEGAL_ALT(_ALT_O);
366 			if (!(_conv_num(&bp, &tm->tm_wday, 0, 6)))
367 				return (NULL);
368 			fields |= FIELD_TM_WDAY;
369 			break;
370 
371 		case 'u':	/* The day of week, monday = 1. */
372 			_LEGAL_ALT(_ALT_O);
373 			if (!(_conv_num(&bp, &i, 1, 7)))
374 				return (NULL);
375 			tm->tm_wday = i % 7;
376 			fields |= FIELD_TM_WDAY;
377 			continue;
378 
379 		case 'g':	/* The year corresponding to the ISO week
380 				 * number but without the century.
381 				 */
382 			if (!(_conv_num(&bp, &i, 0, 99)))
383 				return (NULL);
384 			continue;
385 
386 		case 'G':	/* The year corresponding to the ISO week
387 				 * number with century.
388 				 */
389 			do
390 				bp++;
391 			while (isdigit(*bp));
392 			continue;
393 
394 		case 'V':	/* The ISO 8601:1988 week number as decimal */
395 			if (!(_conv_num(&bp, &i, 0, 53)))
396 				return (NULL);
397 			continue;
398 
399 		case 'Y':	/* The year. */
400 			_LEGAL_ALT(_ALT_E);
401 			if (!(_conv_num(&bp, &i, 0, 9999)))
402 				return (NULL);
403 
404 			relyear = -1;
405 			tm->tm_year = i - TM_YEAR_BASE;
406 			fields |= FIELD_TM_YEAR;
407 			break;
408 
409 		case 'y':	/* The year within the century (2 digits). */
410 			_LEGAL_ALT(_ALT_E | _ALT_O);
411 			if (!(_conv_num(&bp, &relyear, 0, 99)))
412 				return (NULL);
413 			break;
414 
415 		case 'Z':
416 			tzset();
417 			if (strncmp((const char *)bp, gmt, 3) == 0) {
418 				tm->tm_isdst = 0;
419 				tm->tm_gmtoff = 0;
420 				tm->tm_zone = gmt;
421 				bp += 3;
422 			} else if (strncmp((const char *)bp, utc, 3) == 0) {
423 				tm->tm_isdst = 0;
424 				tm->tm_gmtoff = 0;
425 				tm->tm_zone = utc;
426 				bp += 3;
427 			} else {
428 				ep = _find_string(bp, &i,
429 						 (const char * const *)tzname,
430 						  NULL, 2);
431 				if (ep == NULL)
432 					return (NULL);
433 
434 				tm->tm_isdst = i;
435 				tm->tm_gmtoff = -(timezone);
436 				tm->tm_zone = tzname[i];
437 				bp = ep;
438 			}
439 			continue;
440 
441 		case 'z':
442 			/*
443 			 * We recognize all ISO 8601 formats:
444 			 * Z	= Zulu time/UTC
445 			 * [+-]hhmm
446 			 * [+-]hh:mm
447 			 * [+-]hh
448 			 * We recognize all RFC-822/RFC-2822 formats:
449 			 * UT|GMT
450 			 *          North American : UTC offsets
451 			 * E[DS]T = Eastern : -4 | -5
452 			 * C[DS]T = Central : -5 | -6
453 			 * M[DS]T = Mountain: -6 | -7
454 			 * P[DS]T = Pacific : -7 | -8
455 			 */
456 			while (isspace(*bp))
457 				bp++;
458 
459 			switch (*bp++) {
460 			case 'G':
461 				if (*bp++ != 'M')
462 					return NULL;
463 				/*FALLTHROUGH*/
464 			case 'U':
465 				if (*bp++ != 'T')
466 					return NULL;
467 				/*FALLTHROUGH*/
468 			case 'Z':
469 				tm->tm_isdst = 0;
470 				tm->tm_gmtoff = 0;
471 				tm->tm_zone = utc;
472 				continue;
473 			case '+':
474 				neg = 0;
475 				break;
476 			case '-':
477 				neg = 1;
478 				break;
479 			default:
480 				--bp;
481 				ep = _find_string(bp, &i, nast, NULL, 4);
482 				if (ep != NULL) {
483 					tm->tm_gmtoff = (-5 - i) * SECSPERHOUR;
484 					tm->tm_zone = (char *)nast[i];
485 					bp = ep;
486 					continue;
487 				}
488 				ep = _find_string(bp, &i, nadt, NULL, 4);
489 				if (ep != NULL) {
490 					tm->tm_isdst = 1;
491 					tm->tm_gmtoff = (-4 - i) * SECSPERHOUR;
492 					tm->tm_zone = (char *)nadt[i];
493 					bp = ep;
494 					continue;
495 				}
496 				return NULL;
497 			}
498 			if (!isdigit(bp[0]) || !isdigit(bp[1]))
499 				return NULL;
500 			offs = ((bp[0]-'0') * 10 + (bp[1]-'0')) * SECSPERHOUR;
501 			bp += 2;
502 			if (*bp == ':')
503 				bp++;
504 			if (isdigit(*bp)) {
505 				offs += (*bp++ - '0') * 10 * SECSPERMIN;
506 				if (!isdigit(*bp))
507 					return NULL;
508 				offs += (*bp++ - '0') * SECSPERMIN;
509 			}
510 			if (neg)
511 				offs = -offs;
512 			tm->tm_isdst = 0;	/* XXX */
513 			tm->tm_gmtoff = offs;
514 			tm->tm_zone = NULL;	/* XXX */
515 			continue;
516 
517 		/*
518 		 * Miscellaneous conversions.
519 		 */
520 		case 'n':	/* Any kind of white-space. */
521 		case 't':
522 			_LEGAL_ALT(0);
523 			while (isspace(*bp))
524 				bp++;
525 			break;
526 
527 
528 		default:	/* Unknown/unsupported conversion. */
529 			return (NULL);
530 		}
531 
532 
533 	}
534 
535 	/*
536 	 * We need to evaluate the two digit year spec (%y)
537 	 * last as we can get a century spec (%C) at any time.
538 	 */
539 	if (relyear != -1) {
540 		if (century == TM_YEAR_BASE) {
541 			if (relyear <= 68)
542 				tm->tm_year = relyear + 2000 - TM_YEAR_BASE;
543 			else
544 				tm->tm_year = relyear + 1900 - TM_YEAR_BASE;
545 		} else {
546 			tm->tm_year = relyear + century - TM_YEAR_BASE;
547 		}
548 		fields |= FIELD_TM_YEAR;
549 	}
550 
551 	/* Compute some missing values when possible. */
552 	if (fields & FIELD_TM_YEAR) {
553 		const int year = tm->tm_year + TM_YEAR_BASE;
554 		const int *mon_lens = mon_lengths[isleap(year)];
555 		if (!(fields & FIELD_TM_YDAY) &&
556 		    (fields & FIELD_TM_MON) && (fields & FIELD_TM_MDAY)) {
557 			tm->tm_yday = tm->tm_mday - 1;
558 			for (i = 0; i < tm->tm_mon; i++)
559 				tm->tm_yday += mon_lens[i];
560 			fields |= FIELD_TM_YDAY;
561 		}
562 		if (fields & FIELD_TM_YDAY) {
563 			int days = tm->tm_yday;
564 			if (!(fields & FIELD_TM_WDAY)) {
565 				tm->tm_wday = EPOCH_WDAY +
566 				    ((year - EPOCH_YEAR) % DAYSPERWEEK) *
567 				    (DAYSPERNYEAR % DAYSPERWEEK) +
568 				    leaps_thru_end_of(year - 1) -
569 				    leaps_thru_end_of(EPOCH_YEAR - 1) +
570 				    tm->tm_yday;
571 				tm->tm_wday %= DAYSPERWEEK;
572 				if (tm->tm_wday < 0)
573 					tm->tm_wday += DAYSPERWEEK;
574 			}
575 			if (!(fields & FIELD_TM_MON)) {
576 				tm->tm_mon = 0;
577 				while (tm->tm_mon < MONSPERYEAR && days >= mon_lens[tm->tm_mon])
578 					days -= mon_lens[tm->tm_mon++];
579 			}
580 			if (!(fields & FIELD_TM_MDAY))
581 				tm->tm_mday = days + 1;
582 		}
583 	}
584 
585 	return ((char *)bp);
586 }
587 
588 
589 static int
590 _conv_num(const unsigned char **buf, int *dest, int llim, int ulim)
591 {
592 	int result = 0;
593 	int rulim = ulim;
594 
595 	if (**buf < '0' || **buf > '9')
596 		return (0);
597 
598 	/* we use rulim to break out of the loop when we run out of digits */
599 	do {
600 		result *= 10;
601 		result += *(*buf)++ - '0';
602 		rulim /= 10;
603 	} while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9');
604 
605 	if (result < llim || result > ulim)
606 		return (0);
607 
608 	*dest = result;
609 	return (1);
610 }
611 
612 static int
613 _conv_num64(const unsigned char **buf, int64_t *dest, int64_t llim, int64_t ulim)
614 {
615 	int result = 0;
616 	int64_t rulim = ulim;
617 
618 	if (**buf < '0' || **buf > '9')
619 		return (0);
620 
621 	/* we use rulim to break out of the loop when we run out of digits */
622 	do {
623 		result *= 10;
624 		result += *(*buf)++ - '0';
625 		rulim /= 10;
626 	} while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9');
627 
628 	if (result < llim || result > ulim)
629 		return (0);
630 
631 	*dest = result;
632 	return (1);
633 }
634 
635 static const u_char *
636 _find_string(const u_char *bp, int *tgt, const char * const *n1,
637 		const char * const *n2, int c)
638 {
639 	int i;
640 	unsigned int len;
641 
642 	/* check full name - then abbreviated ones */
643 	for (; n1 != NULL; n1 = n2, n2 = NULL) {
644 		for (i = 0; i < c; i++, n1++) {
645 			len = strlen(*n1);
646 			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
647 				*tgt = i;
648 				return bp + len;
649 			}
650 		}
651 	}
652 
653 	/* Nothing matched */
654 	return NULL;
655 }
656 
657 static int
658 leaps_thru_end_of(const int y)
659 {
660 	return (y >= 0) ? (y / 4 - y / 100 + y / 400) :
661 		-(leaps_thru_end_of(-(y + 1)) + 1);
662 }
663