xref: /netbsd-src/lib/libc/time/strptime.c (revision da9817918ec7e88db2912a2882967c7570a83f47)
1 /*	$NetBSD: strptime.c,v 1.33 2009/05/24 02:25:43 ginsbach Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code was contributed to The NetBSD Foundation by Klaus Klein.
8  * Heavily optimised by David Laight
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 __RCSID("$NetBSD: strptime.c,v 1.33 2009/05/24 02:25:43 ginsbach Exp $");
35 #endif
36 
37 #include "namespace.h"
38 #include <sys/localedef.h>
39 #include <ctype.h>
40 #include <locale.h>
41 #include <string.h>
42 #include <time.h>
43 #include <tzfile.h>
44 #include "private.h"
45 
46 #ifdef __weak_alias
47 __weak_alias(strptime,_strptime)
48 #endif
49 
50 #define	_ctloc(x)		(_CurrentTimeLocale->x)
51 
52 /*
53  * We do not implement alternate representations. However, we always
54  * check whether a given modifier is allowed for a certain conversion.
55  */
56 #define ALT_E			0x01
57 #define ALT_O			0x02
58 #define	LEGAL_ALT(x)		{ if (alt_format & ~(x)) return NULL; }
59 
60 static char gmt[] = { "GMT" };
61 static char utc[] = { "UTC" };
62 /* RFC-822/RFC-2822 */
63 static const char * const nast[5] = {
64        "EST",    "CST",    "MST",    "PST",    "\0\0\0"
65 };
66 static const char * const nadt[5] = {
67        "EDT",    "CDT",    "MDT",    "PDT",    "\0\0\0"
68 };
69 
70 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
71 static const u_char *find_string(const u_char *, int *, const char * const *,
72 	const char * const *, int);
73 
74 
75 char *
76 strptime(const char *buf, const char *fmt, struct tm *tm)
77 {
78 	unsigned char c;
79 	const unsigned char *bp, *ep;
80 	int alt_format, i, split_year = 0, neg = 0, offs;
81 	const char *new_fmt;
82 
83 	bp = (const u_char *)buf;
84 
85 	while (bp != NULL && (c = *fmt++) != '\0') {
86 		/* Clear `alternate' modifier prior to new conversion. */
87 		alt_format = 0;
88 		i = 0;
89 
90 		/* Eat up white-space. */
91 		if (isspace(c)) {
92 			while (isspace(*bp))
93 				bp++;
94 			continue;
95 		}
96 
97 		if (c != '%')
98 			goto literal;
99 
100 
101 again:		switch (c = *fmt++) {
102 		case '%':	/* "%%" is converted to "%". */
103 literal:
104 			if (c != *bp++)
105 				return NULL;
106 			LEGAL_ALT(0);
107 			continue;
108 
109 		/*
110 		 * "Alternative" modifiers. Just set the appropriate flag
111 		 * and start over again.
112 		 */
113 		case 'E':	/* "%E?" alternative conversion modifier. */
114 			LEGAL_ALT(0);
115 			alt_format |= ALT_E;
116 			goto again;
117 
118 		case 'O':	/* "%O?" alternative conversion modifier. */
119 			LEGAL_ALT(0);
120 			alt_format |= ALT_O;
121 			goto again;
122 
123 		/*
124 		 * "Complex" conversion rules, implemented through recursion.
125 		 */
126 		case 'c':	/* Date and time, using the locale's format. */
127 			new_fmt = _ctloc(d_t_fmt);
128 			goto recurse;
129 
130 		case 'D':	/* The date as "%m/%d/%y". */
131 			new_fmt = "%m/%d/%y";
132 			LEGAL_ALT(0);
133 			goto recurse;
134 
135 		case 'F':	/* The date as "%Y-%m-%d". */
136 			new_fmt = "%Y-%m-%d";
137 			LEGAL_ALT(0);
138 			goto recurse;
139 
140 		case 'R':	/* The time as "%H:%M". */
141 			new_fmt = "%H:%M";
142 			LEGAL_ALT(0);
143 			goto recurse;
144 
145 		case 'r':	/* The time in 12-hour clock representation. */
146 			new_fmt =_ctloc(t_fmt_ampm);
147 			LEGAL_ALT(0);
148 			goto recurse;
149 
150 		case 'T':	/* The time as "%H:%M:%S". */
151 			new_fmt = "%H:%M:%S";
152 			LEGAL_ALT(0);
153 			goto recurse;
154 
155 		case 'X':	/* The time, using the locale's format. */
156 			new_fmt =_ctloc(t_fmt);
157 			goto recurse;
158 
159 		case 'x':	/* The date, using the locale's format. */
160 			new_fmt =_ctloc(d_fmt);
161 		    recurse:
162 			bp = (const u_char *)strptime((const char *)bp,
163 							    new_fmt, tm);
164 			LEGAL_ALT(ALT_E);
165 			continue;
166 
167 		/*
168 		 * "Elementary" conversion rules.
169 		 */
170 		case 'A':	/* The day of week, using the locale's form. */
171 		case 'a':
172 			bp = find_string(bp, &tm->tm_wday, _ctloc(day),
173 					_ctloc(abday), 7);
174 			LEGAL_ALT(0);
175 			continue;
176 
177 		case 'B':	/* The month, using the locale's form. */
178 		case 'b':
179 		case 'h':
180 			bp = find_string(bp, &tm->tm_mon, _ctloc(mon),
181 					_ctloc(abmon), 12);
182 			LEGAL_ALT(0);
183 			continue;
184 
185 		case 'C':	/* The century number. */
186 			i = 20;
187 			bp = conv_num(bp, &i, 0, 99);
188 
189 			i = i * 100 - TM_YEAR_BASE;
190 			if (split_year)
191 				i += tm->tm_year % 100;
192 			split_year = 1;
193 			tm->tm_year = i;
194 			LEGAL_ALT(ALT_E);
195 			continue;
196 
197 		case 'd':	/* The day of month. */
198 		case 'e':
199 			bp = conv_num(bp, &tm->tm_mday, 1, 31);
200 			LEGAL_ALT(ALT_O);
201 			continue;
202 
203 		case 'k':	/* The hour (24-hour clock representation). */
204 			LEGAL_ALT(0);
205 			/* FALLTHROUGH */
206 		case 'H':
207 			bp = conv_num(bp, &tm->tm_hour, 0, 23);
208 			LEGAL_ALT(ALT_O);
209 			continue;
210 
211 		case 'l':	/* The hour (12-hour clock representation). */
212 			LEGAL_ALT(0);
213 			/* FALLTHROUGH */
214 		case 'I':
215 			bp = conv_num(bp, &tm->tm_hour, 1, 12);
216 			if (tm->tm_hour == 12)
217 				tm->tm_hour = 0;
218 			LEGAL_ALT(ALT_O);
219 			continue;
220 
221 		case 'j':	/* The day of year. */
222 			i = 1;
223 			bp = conv_num(bp, &i, 1, 366);
224 			tm->tm_yday = i - 1;
225 			LEGAL_ALT(0);
226 			continue;
227 
228 		case 'M':	/* The minute. */
229 			bp = conv_num(bp, &tm->tm_min, 0, 59);
230 			LEGAL_ALT(ALT_O);
231 			continue;
232 
233 		case 'm':	/* The month. */
234 			i = 1;
235 			bp = conv_num(bp, &i, 1, 12);
236 			tm->tm_mon = i - 1;
237 			LEGAL_ALT(ALT_O);
238 			continue;
239 
240 		case 'p':	/* The locale's equivalent of AM/PM. */
241 			bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2);
242 			if (tm->tm_hour > 11)
243 				return NULL;
244 			tm->tm_hour += i * 12;
245 			LEGAL_ALT(0);
246 			continue;
247 
248 		case 'S':	/* The seconds. */
249 			bp = conv_num(bp, &tm->tm_sec, 0, 61);
250 			LEGAL_ALT(ALT_O);
251 			continue;
252 
253 #ifndef TIME_MAX
254 #define TIME_MAX	INT64_MAX
255 #endif
256 		case 's':	/* seconds since the epoch */
257 			{
258 				time_t sse = 0;
259 				uint64_t rulim = TIME_MAX;
260 
261 				if (*bp < '0' || *bp > '9') {
262 					bp = NULL;
263 					continue;
264 				}
265 
266 				do {
267 					sse *= 10;
268 					sse += *bp++ - '0';
269 					rulim /= 10;
270 				} while ((sse * 10 <= TIME_MAX) &&
271 					 rulim && *bp >= '0' && *bp <= '9');
272 
273 				if (sse < 0 || (uint64_t)sse > TIME_MAX) {
274 					bp = NULL;
275 					continue;
276 				}
277 
278 				if (localtime_r(&sse, tm) == NULL)
279 					bp = NULL;
280 			}
281 			continue;
282 
283 		case 'U':	/* The week of year, beginning on sunday. */
284 		case 'W':	/* The week of year, beginning on monday. */
285 			/*
286 			 * XXX This is bogus, as we can not assume any valid
287 			 * information present in the tm structure at this
288 			 * point to calculate a real value, so just check the
289 			 * range for now.
290 			 */
291 			 bp = conv_num(bp, &i, 0, 53);
292 			 LEGAL_ALT(ALT_O);
293 			 continue;
294 
295 		case 'w':	/* The day of week, beginning on sunday. */
296 			bp = conv_num(bp, &tm->tm_wday, 0, 6);
297 			LEGAL_ALT(ALT_O);
298 			continue;
299 
300 		case 'u':	/* The day of week, monday = 1. */
301 			bp = conv_num(bp, &i, 1, 7);
302 			tm->tm_wday = i % 7;
303 			LEGAL_ALT(ALT_O);
304 			continue;
305 
306 		case 'g':	/* The year corresponding to the ISO week
307 				 * number but without the century.
308 				 */
309 			bp = conv_num(bp, &i, 0, 99);
310 			continue;
311 
312 		case 'G':	/* The year corresponding to the ISO week
313 				 * number with century.
314 				 */
315 			do
316 				bp++;
317 			while (isdigit(*bp));
318 			continue;
319 
320 		case 'V':	/* The ISO 8601:1988 week number as decimal */
321 			bp = conv_num(bp, &i, 0, 53);
322 			continue;
323 
324 		case 'Y':	/* The year. */
325 			i = TM_YEAR_BASE;	/* just for data sanity... */
326 			bp = conv_num(bp, &i, 0, 9999);
327 			tm->tm_year = i - TM_YEAR_BASE;
328 			LEGAL_ALT(ALT_E);
329 			continue;
330 
331 		case 'y':	/* The year within 100 years of the epoch. */
332 			/* LEGAL_ALT(ALT_E | ALT_O); */
333 			bp = conv_num(bp, &i, 0, 99);
334 
335 			if (split_year)
336 				/* preserve century */
337 				i += (tm->tm_year / 100) * 100;
338 			else {
339 				split_year = 1;
340 				if (i <= 68)
341 					i = i + 2000 - TM_YEAR_BASE;
342 				else
343 					i = i + 1900 - TM_YEAR_BASE;
344 			}
345 			tm->tm_year = i;
346 			continue;
347 
348 		case 'Z':
349 			tzset();
350 			if (strncmp((const char *)bp, gmt, 3) == 0) {
351 				tm->tm_isdst = 0;
352 #ifdef TM_GMTOFF
353 				tm->TM_GMTOFF = 0;
354 #endif
355 #ifdef TM_ZONE
356 				tm->TM_ZONE = gmt;
357 #endif
358 				bp += 3;
359 			} else {
360 				ep = find_string(bp, &i,
361 					       	 (const char * const *)tzname,
362 					       	  NULL, 2);
363 				if (ep != NULL) {
364 					tm->tm_isdst = i;
365 #ifdef TM_GMTOFF
366 					tm->TM_GMTOFF = -(timezone);
367 #endif
368 #ifdef TM_ZONE
369 					tm->TM_ZONE = tzname[i];
370 #endif
371 				}
372 				bp = ep;
373 			}
374 			continue;
375 
376 		case 'z':
377 			/*
378 			 * We recognize all ISO 8601 formats:
379 			 * Z	= Zulu time/UTC
380 			 * [+-]hhmm
381 			 * [+-]hh:mm
382 			 * [+-]hh
383 			 * We recognize all RFC-822/RFC-2822 formats:
384 			 * UT|GMT
385 			 *          North American : UTC offsets
386 			 * E[DS]T = Eastern : -4 | -5
387 			 * C[DS]T = Central : -5 | -6
388 			 * M[DS]T = Mountain: -6 | -7
389 			 * P[DS]T = Pacific : -7 | -8
390 			 *          Military
391 			 * [A-IL-M] = -1 ... -9 (J not used)
392 			 * [N-Y]  = +1 ... +12
393 			 */
394 			while (isspace(*bp))
395 				bp++;
396 
397 			switch (*bp++) {
398 			case 'G':
399 				if (*bp++ != 'M')
400 					return NULL;
401 				/*FALLTHROUGH*/
402 			case 'U':
403 				if (*bp++ != 'T')
404 					return NULL;
405 				/*FALLTHROUGH*/
406 			case 'Z':
407 				tm->tm_isdst = 0;
408 #ifdef TM_GMTOFF
409 				tm->TM_GMTOFF = 0;
410 #endif
411 #ifdef TM_ZONE
412 				tm->TM_ZONE = utc;
413 #endif
414 				continue;
415 			case '+':
416 				neg = 0;
417 				break;
418 			case '-':
419 				neg = 1;
420 				break;
421 			default:
422 				--bp;
423 				ep = find_string(bp, &i, nast, NULL, 4);
424 				if (ep != NULL) {
425 #ifdef TM_GMTOFF
426 					tm->TM_GMTOFF = -5 - i;
427 #endif
428 #ifdef TM_ZONE
429 					tm->TM_ZONE = __UNCONST(nast[i]);
430 #endif
431 					bp = ep;
432 					continue;
433 				}
434 				ep = find_string(bp, &i, nadt, NULL, 4);
435 				if (ep != NULL) {
436 					tm->tm_isdst = 1;
437 #ifdef TM_GMTOFF
438 					tm->TM_GMTOFF = -4 - i;
439 #endif
440 #ifdef TM_ZONE
441 					tm->TM_ZONE = __UNCONST(nadt[i]);
442 #endif
443 					bp = ep;
444 					continue;
445 				}
446 
447 				if ((*bp >= 'A' && *bp <= 'I') ||
448 				    (*bp >= 'L' && *bp <= 'Y')) {
449 #ifdef TM_GMTOFF
450 					/* Argh! No 'J'! */
451 					if (*bp >= 'A' && *bp <= 'I')
452 						tm->TM_GMTOFF =
453 						    ('A' - 1) - (int)*bp;
454 					else if (*bp >= 'L' && *bp <= 'M')
455 						tm->TM_GMTOFF = 'A' - (int)*bp;
456 					else if (*bp >= 'N' && *bp <= 'Y')
457 						tm->TM_GMTOFF = (int)*bp - 'M';
458 #endif
459 #ifdef TM_ZONE
460 					tm->TM_ZONE = NULL; /* XXX */
461 #endif
462 					bp++;
463 					continue;
464 				}
465 				return NULL;
466 			}
467 			offs = 0;
468 			for (i = 0; i < 4; ) {
469 				if (isdigit(*bp)) {
470 					offs = offs * 10 + (*bp++ - '0');
471 					i++;
472 					continue;
473 				}
474 				if (i == 2 && *bp == ':') {
475 					bp++;
476 					continue;
477 				}
478 				break;
479 			}
480 			switch (i) {
481 			case 2:
482 				offs *= 100;
483 				break;
484 			case 4:
485 				i = offs % 100;
486 				if (i >= 60)
487 					return NULL;
488 				/* Convert minutes into decimal */
489 				offs = (offs / 100) * 100 + (i * 50) / 30;
490 				break;
491 			default:
492 				return NULL;
493 			}
494 			if (neg)
495 				offs = -offs;
496 			tm->tm_isdst = 0;	/* XXX */
497 #ifdef TM_GMTOFF
498 			tm->TM_GMTOFF = offs;
499 #endif
500 #ifdef TM_ZONE
501 			tm->TM_ZONE = NULL;	/* XXX */
502 #endif
503 			continue;
504 
505 		/*
506 		 * Miscellaneous conversions.
507 		 */
508 		case 'n':	/* Any kind of white-space. */
509 		case 't':
510 			while (isspace(*bp))
511 				bp++;
512 			LEGAL_ALT(0);
513 			continue;
514 
515 
516 		default:	/* Unknown/unsupported conversion. */
517 			return NULL;
518 		}
519 	}
520 
521 	return __UNCONST(bp);
522 }
523 
524 
525 static const u_char *
526 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
527 {
528 	uint result = 0;
529 	unsigned char ch;
530 
531 	/* The limit also determines the number of valid digits. */
532 	uint rulim = ulim;
533 
534 	ch = *buf;
535 	if (ch < '0' || ch > '9')
536 		return NULL;
537 
538 	do {
539 		result *= 10;
540 		result += ch - '0';
541 		rulim /= 10;
542 		ch = *++buf;
543 	} while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
544 
545 	if (result < llim || result > ulim)
546 		return NULL;
547 
548 	*dest = result;
549 	return buf;
550 }
551 
552 static const u_char *
553 find_string(const u_char *bp, int *tgt, const char * const *n1,
554 		const char * const *n2, int c)
555 {
556 	int i;
557 	unsigned int len;
558 
559 	/* check full name - then abbreviated ones */
560 	for (; n1 != NULL; n1 = n2, n2 = NULL) {
561 		for (i = 0; i < c; i++, n1++) {
562 			len = strlen(*n1);
563 			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
564 				*tgt = i;
565 				return bp + len;
566 			}
567 		}
568 	}
569 
570 	/* Nothing matched */
571 	return NULL;
572 }
573