xref: /netbsd-src/lib/libc/time/strptime.c (revision f89f6560d453f5e37386cc7938c072d2f528b9fa)
1 /*	$NetBSD: strptime.c,v 1.39 2015/04/06 14:38:22 ginsbach Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code was contributed to The NetBSD Foundation by Klaus Klein.
8  * Heavily optimised by David Laight
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 __RCSID("$NetBSD: strptime.c,v 1.39 2015/04/06 14:38:22 ginsbach Exp $");
35 #endif
36 
37 #include "namespace.h"
38 #include <sys/localedef.h>
39 #include <ctype.h>
40 #include <locale.h>
41 #include <string.h>
42 #include <time.h>
43 #include <tzfile.h>
44 #include "private.h"
45 #include "setlocale_local.h"
46 
47 #ifdef __weak_alias
48 __weak_alias(strptime,_strptime)
49 __weak_alias(strptime_l, _strptime_l)
50 #endif
51 
52 #define _TIME_LOCALE(loc) \
53     ((_TimeLocale *)((loc)->part_impl[(size_t)LC_TIME]))
54 
55 /*
56  * We do not implement alternate representations. However, we always
57  * check whether a given modifier is allowed for a certain conversion.
58  */
59 #define ALT_E			0x01
60 #define ALT_O			0x02
61 #define	LEGAL_ALT(x)		{ if (alt_format & ~(x)) return NULL; }
62 
63 static char gmt[] = { "GMT" };
64 static char utc[] = { "UTC" };
65 /* RFC-822/RFC-2822 */
66 static const char * const nast[5] = {
67        "EST",    "CST",    "MST",    "PST",    "\0\0\0"
68 };
69 static const char * const nadt[5] = {
70        "EDT",    "CDT",    "MDT",    "PDT",    "\0\0\0"
71 };
72 
73 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
74 static const u_char *find_string(const u_char *, int *, const char * const *,
75 	const char * const *, int);
76 
77 char *
78 strptime(const char *buf, const char *fmt, struct tm *tm)
79 {
80 	return strptime_l(buf, fmt, tm, _current_locale());
81 }
82 
83 char *
84 strptime_l(const char *buf, const char *fmt, struct tm *tm, locale_t loc)
85 {
86 	unsigned char c;
87 	const unsigned char *bp, *ep;
88 	int alt_format, i, split_year = 0, neg = 0, offs;
89 	const char *new_fmt;
90 
91 	bp = (const u_char *)buf;
92 
93 	while (bp != NULL && (c = *fmt++) != '\0') {
94 		/* Clear `alternate' modifier prior to new conversion. */
95 		alt_format = 0;
96 		i = 0;
97 
98 		/* Eat up white-space. */
99 		if (isspace(c)) {
100 			while (isspace(*bp))
101 				bp++;
102 			continue;
103 		}
104 
105 		if (c != '%')
106 			goto literal;
107 
108 
109 again:		switch (c = *fmt++) {
110 		case '%':	/* "%%" is converted to "%". */
111 literal:
112 			if (c != *bp++)
113 				return NULL;
114 			LEGAL_ALT(0);
115 			continue;
116 
117 		/*
118 		 * "Alternative" modifiers. Just set the appropriate flag
119 		 * and start over again.
120 		 */
121 		case 'E':	/* "%E?" alternative conversion modifier. */
122 			LEGAL_ALT(0);
123 			alt_format |= ALT_E;
124 			goto again;
125 
126 		case 'O':	/* "%O?" alternative conversion modifier. */
127 			LEGAL_ALT(0);
128 			alt_format |= ALT_O;
129 			goto again;
130 
131 		/*
132 		 * "Complex" conversion rules, implemented through recursion.
133 		 */
134 		case 'c':	/* Date and time, using the locale's format. */
135 			new_fmt = _TIME_LOCALE(loc)->d_t_fmt;
136 			goto recurse;
137 
138 		case 'D':	/* The date as "%m/%d/%y". */
139 			new_fmt = "%m/%d/%y";
140 			LEGAL_ALT(0);
141 			goto recurse;
142 
143 		case 'F':	/* The date as "%Y-%m-%d". */
144 			new_fmt = "%Y-%m-%d";
145 			LEGAL_ALT(0);
146 			goto recurse;
147 
148 		case 'R':	/* The time as "%H:%M". */
149 			new_fmt = "%H:%M";
150 			LEGAL_ALT(0);
151 			goto recurse;
152 
153 		case 'r':	/* The time in 12-hour clock representation. */
154 			new_fmt = _TIME_LOCALE(loc)->t_fmt_ampm;
155 			LEGAL_ALT(0);
156 			goto recurse;
157 
158 		case 'T':	/* The time as "%H:%M:%S". */
159 			new_fmt = "%H:%M:%S";
160 			LEGAL_ALT(0);
161 			goto recurse;
162 
163 		case 'X':	/* The time, using the locale's format. */
164 			new_fmt = _TIME_LOCALE(loc)->t_fmt;
165 			goto recurse;
166 
167 		case 'x':	/* The date, using the locale's format. */
168 			new_fmt = _TIME_LOCALE(loc)->d_fmt;
169 		    recurse:
170 			bp = (const u_char *)strptime((const char *)bp,
171 							    new_fmt, tm);
172 			LEGAL_ALT(ALT_E);
173 			continue;
174 
175 		/*
176 		 * "Elementary" conversion rules.
177 		 */
178 		case 'A':	/* The day of week, using the locale's form. */
179 		case 'a':
180 			bp = find_string(bp, &tm->tm_wday,
181 			    _TIME_LOCALE(loc)->day, _TIME_LOCALE(loc)->abday, 7);
182 			LEGAL_ALT(0);
183 			continue;
184 
185 		case 'B':	/* The month, using the locale's form. */
186 		case 'b':
187 		case 'h':
188 			bp = find_string(bp, &tm->tm_mon,
189 			    _TIME_LOCALE(loc)->mon, _TIME_LOCALE(loc)->abmon,
190 			    12);
191 			LEGAL_ALT(0);
192 			continue;
193 
194 		case 'C':	/* The century number. */
195 			i = 20;
196 			bp = conv_num(bp, &i, 0, 99);
197 
198 			i = i * 100 - TM_YEAR_BASE;
199 			if (split_year)
200 				i += tm->tm_year % 100;
201 			split_year = 1;
202 			tm->tm_year = i;
203 			LEGAL_ALT(ALT_E);
204 			continue;
205 
206 		case 'd':	/* The day of month. */
207 		case 'e':
208 			bp = conv_num(bp, &tm->tm_mday, 1, 31);
209 			LEGAL_ALT(ALT_O);
210 			continue;
211 
212 		case 'k':	/* The hour (24-hour clock representation). */
213 			LEGAL_ALT(0);
214 			/* FALLTHROUGH */
215 		case 'H':
216 			bp = conv_num(bp, &tm->tm_hour, 0, 23);
217 			LEGAL_ALT(ALT_O);
218 			continue;
219 
220 		case 'l':	/* The hour (12-hour clock representation). */
221 			LEGAL_ALT(0);
222 			/* FALLTHROUGH */
223 		case 'I':
224 			bp = conv_num(bp, &tm->tm_hour, 1, 12);
225 			if (tm->tm_hour == 12)
226 				tm->tm_hour = 0;
227 			LEGAL_ALT(ALT_O);
228 			continue;
229 
230 		case 'j':	/* The day of year. */
231 			i = 1;
232 			bp = conv_num(bp, &i, 1, 366);
233 			tm->tm_yday = i - 1;
234 			LEGAL_ALT(0);
235 			continue;
236 
237 		case 'M':	/* The minute. */
238 			bp = conv_num(bp, &tm->tm_min, 0, 59);
239 			LEGAL_ALT(ALT_O);
240 			continue;
241 
242 		case 'm':	/* The month. */
243 			i = 1;
244 			bp = conv_num(bp, &i, 1, 12);
245 			tm->tm_mon = i - 1;
246 			LEGAL_ALT(ALT_O);
247 			continue;
248 
249 		case 'p':	/* The locale's equivalent of AM/PM. */
250 			bp = find_string(bp, &i, _TIME_LOCALE(loc)->am_pm,
251 			    NULL, 2);
252 			if (tm->tm_hour > 11)
253 				return NULL;
254 			tm->tm_hour += i * 12;
255 			LEGAL_ALT(0);
256 			continue;
257 
258 		case 'S':	/* The seconds. */
259 			bp = conv_num(bp, &tm->tm_sec, 0, 61);
260 			LEGAL_ALT(ALT_O);
261 			continue;
262 
263 #ifndef TIME_MAX
264 #define TIME_MAX	INT64_MAX
265 #endif
266 		case 's':	/* seconds since the epoch */
267 			{
268 				time_t sse = 0;
269 				uint64_t rulim = TIME_MAX;
270 
271 				if (*bp < '0' || *bp > '9') {
272 					bp = NULL;
273 					continue;
274 				}
275 
276 				do {
277 					sse *= 10;
278 					sse += *bp++ - '0';
279 					rulim /= 10;
280 				} while ((sse * 10 <= TIME_MAX) &&
281 					 rulim && *bp >= '0' && *bp <= '9');
282 
283 				if (sse < 0 || (uint64_t)sse > TIME_MAX) {
284 					bp = NULL;
285 					continue;
286 				}
287 
288 				if (localtime_r(&sse, tm) == NULL)
289 					bp = NULL;
290 			}
291 			continue;
292 
293 		case 'U':	/* The week of year, beginning on sunday. */
294 		case 'W':	/* The week of year, beginning on monday. */
295 			/*
296 			 * XXX This is bogus, as we can not assume any valid
297 			 * information present in the tm structure at this
298 			 * point to calculate a real value, so just check the
299 			 * range for now.
300 			 */
301 			 bp = conv_num(bp, &i, 0, 53);
302 			 LEGAL_ALT(ALT_O);
303 			 continue;
304 
305 		case 'w':	/* The day of week, beginning on sunday. */
306 			bp = conv_num(bp, &tm->tm_wday, 0, 6);
307 			LEGAL_ALT(ALT_O);
308 			continue;
309 
310 		case 'u':	/* The day of week, monday = 1. */
311 			bp = conv_num(bp, &i, 1, 7);
312 			tm->tm_wday = i % 7;
313 			LEGAL_ALT(ALT_O);
314 			continue;
315 
316 		case 'g':	/* The year corresponding to the ISO week
317 				 * number but without the century.
318 				 */
319 			bp = conv_num(bp, &i, 0, 99);
320 			continue;
321 
322 		case 'G':	/* The year corresponding to the ISO week
323 				 * number with century.
324 				 */
325 			do
326 				bp++;
327 			while (isdigit(*bp));
328 			continue;
329 
330 		case 'V':	/* The ISO 8601:1988 week number as decimal */
331 			bp = conv_num(bp, &i, 0, 53);
332 			continue;
333 
334 		case 'Y':	/* The year. */
335 			i = TM_YEAR_BASE;	/* just for data sanity... */
336 			bp = conv_num(bp, &i, 0, 9999);
337 			tm->tm_year = i - TM_YEAR_BASE;
338 			LEGAL_ALT(ALT_E);
339 			continue;
340 
341 		case 'y':	/* The year within 100 years of the epoch. */
342 			/* LEGAL_ALT(ALT_E | ALT_O); */
343 			bp = conv_num(bp, &i, 0, 99);
344 
345 			if (split_year)
346 				/* preserve century */
347 				i += (tm->tm_year / 100) * 100;
348 			else {
349 				split_year = 1;
350 				if (i <= 68)
351 					i = i + 2000 - TM_YEAR_BASE;
352 				else
353 					i = i + 1900 - TM_YEAR_BASE;
354 			}
355 			tm->tm_year = i;
356 			continue;
357 
358 		case 'Z':
359 			tzset();
360 			if (strncmp((const char *)bp, gmt, 3) == 0 ||
361 			    strncmp((const char *)bp, utc, 3) == 0) {
362 				tm->tm_isdst = 0;
363 #ifdef TM_GMTOFF
364 				tm->TM_GMTOFF = 0;
365 #endif
366 #ifdef TM_ZONE
367 				tm->TM_ZONE = gmt;
368 #endif
369 				bp += 3;
370 			} else {
371 				ep = find_string(bp, &i,
372 					       	 (const char * const *)tzname,
373 					       	  NULL, 2);
374 				if (ep != NULL) {
375 					tm->tm_isdst = i;
376 #ifdef TM_GMTOFF
377 					tm->TM_GMTOFF = -(timezone);
378 #endif
379 #ifdef TM_ZONE
380 					tm->TM_ZONE = tzname[i];
381 #endif
382 				}
383 				bp = ep;
384 			}
385 			continue;
386 
387 		case 'z':
388 			/*
389 			 * We recognize all ISO 8601 formats:
390 			 * Z	= Zulu time/UTC
391 			 * [+-]hhmm
392 			 * [+-]hh:mm
393 			 * [+-]hh
394 			 * We recognize all RFC-822/RFC-2822 formats:
395 			 * UT|GMT
396 			 *          North American : UTC offsets
397 			 * E[DS]T = Eastern : -4 | -5
398 			 * C[DS]T = Central : -5 | -6
399 			 * M[DS]T = Mountain: -6 | -7
400 			 * P[DS]T = Pacific : -7 | -8
401 			 *          Military
402 			 * [A-IL-M] = -1 ... -9 (J not used)
403 			 * [N-Y]  = +1 ... +12
404 			 */
405 			while (isspace(*bp))
406 				bp++;
407 
408 			switch (*bp++) {
409 			case 'G':
410 				if (*bp++ != 'M')
411 					return NULL;
412 				/*FALLTHROUGH*/
413 			case 'U':
414 				if (*bp++ != 'T')
415 					return NULL;
416 				/*FALLTHROUGH*/
417 			case 'Z':
418 				tm->tm_isdst = 0;
419 #ifdef TM_GMTOFF
420 				tm->TM_GMTOFF = 0;
421 #endif
422 #ifdef TM_ZONE
423 				tm->TM_ZONE = utc;
424 #endif
425 				continue;
426 			case '+':
427 				neg = 0;
428 				break;
429 			case '-':
430 				neg = 1;
431 				break;
432 			default:
433 				--bp;
434 				ep = find_string(bp, &i, nast, NULL, 4);
435 				if (ep != NULL) {
436 #ifdef TM_GMTOFF
437 					tm->TM_GMTOFF = -5 - i;
438 #endif
439 #ifdef TM_ZONE
440 					tm->TM_ZONE = __UNCONST(nast[i]);
441 #endif
442 					bp = ep;
443 					continue;
444 				}
445 				ep = find_string(bp, &i, nadt, NULL, 4);
446 				if (ep != NULL) {
447 					tm->tm_isdst = 1;
448 #ifdef TM_GMTOFF
449 					tm->TM_GMTOFF = -4 - i;
450 #endif
451 #ifdef TM_ZONE
452 					tm->TM_ZONE = __UNCONST(nadt[i]);
453 #endif
454 					bp = ep;
455 					continue;
456 				}
457 
458 				if ((*bp >= 'A' && *bp <= 'I') ||
459 				    (*bp >= 'L' && *bp <= 'Y')) {
460 #ifdef TM_GMTOFF
461 					/* Argh! No 'J'! */
462 					if (*bp >= 'A' && *bp <= 'I')
463 						tm->TM_GMTOFF =
464 						    ('A' - 1) - (int)*bp;
465 					else if (*bp >= 'L' && *bp <= 'M')
466 						tm->TM_GMTOFF = 'A' - (int)*bp;
467 					else if (*bp >= 'N' && *bp <= 'Y')
468 						tm->TM_GMTOFF = (int)*bp - 'M';
469 #endif
470 #ifdef TM_ZONE
471 					tm->TM_ZONE = NULL; /* XXX */
472 #endif
473 					bp++;
474 					continue;
475 				}
476 				return NULL;
477 			}
478 			offs = 0;
479 			for (i = 0; i < 4; ) {
480 				if (isdigit(*bp)) {
481 					offs = offs * 10 + (*bp++ - '0');
482 					i++;
483 					continue;
484 				}
485 				if (i == 2 && *bp == ':') {
486 					bp++;
487 					continue;
488 				}
489 				break;
490 			}
491 			switch (i) {
492 			case 2:
493 				offs *= 100;
494 				break;
495 			case 4:
496 				i = offs % 100;
497 				if (i >= 60)
498 					return NULL;
499 				/* Convert minutes into decimal */
500 				offs = (offs / 100) * 100 + (i * 50) / 30;
501 				break;
502 			default:
503 				return NULL;
504 			}
505 			if (neg)
506 				offs = -offs;
507 			tm->tm_isdst = 0;	/* XXX */
508 #ifdef TM_GMTOFF
509 			tm->TM_GMTOFF = offs;
510 #endif
511 #ifdef TM_ZONE
512 			tm->TM_ZONE = NULL;	/* XXX */
513 #endif
514 			continue;
515 
516 		/*
517 		 * Miscellaneous conversions.
518 		 */
519 		case 'n':	/* Any kind of white-space. */
520 		case 't':
521 			while (isspace(*bp))
522 				bp++;
523 			LEGAL_ALT(0);
524 			continue;
525 
526 
527 		default:	/* Unknown/unsupported conversion. */
528 			return NULL;
529 		}
530 	}
531 
532 	return __UNCONST(bp);
533 }
534 
535 
536 static const u_char *
537 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
538 {
539 	uint result = 0;
540 	unsigned char ch;
541 
542 	/* The limit also determines the number of valid digits. */
543 	uint rulim = ulim;
544 
545 	ch = *buf;
546 	if (ch < '0' || ch > '9')
547 		return NULL;
548 
549 	do {
550 		result *= 10;
551 		result += ch - '0';
552 		rulim /= 10;
553 		ch = *++buf;
554 	} while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
555 
556 	if (result < llim || result > ulim)
557 		return NULL;
558 
559 	*dest = result;
560 	return buf;
561 }
562 
563 static const u_char *
564 find_string(const u_char *bp, int *tgt, const char * const *n1,
565 		const char * const *n2, int c)
566 {
567 	int i;
568 	size_t len;
569 
570 	/* check full name - then abbreviated ones */
571 	for (; n1 != NULL; n1 = n2, n2 = NULL) {
572 		for (i = 0; i < c; i++, n1++) {
573 			len = strlen(*n1);
574 			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
575 				*tgt = i;
576 				return bp + len;
577 			}
578 		}
579 	}
580 
581 	/* Nothing matched */
582 	return NULL;
583 }
584