xref: /netbsd-src/lib/libc/time/strptime.c (revision b5677b36047b601b9addaaa494a58ceae82c2a6c)
1 /*	$NetBSD: strptime.c,v 1.31 2008/11/04 21:08:33 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code was contributed to The NetBSD Foundation by Klaus Klein.
8  * Heavily optimised by David Laight
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 __RCSID("$NetBSD: strptime.c,v 1.31 2008/11/04 21:08:33 christos Exp $");
35 #endif
36 
37 #include "namespace.h"
38 #include <sys/localedef.h>
39 #include <ctype.h>
40 #include <locale.h>
41 #include <string.h>
42 #include <time.h>
43 #include <tzfile.h>
44 #include "private.h"
45 
46 #ifdef __weak_alias
47 __weak_alias(strptime,_strptime)
48 #endif
49 
50 #define	_ctloc(x)		(_CurrentTimeLocale->x)
51 
52 /*
53  * We do not implement alternate representations. However, we always
54  * check whether a given modifier is allowed for a certain conversion.
55  */
56 #define ALT_E			0x01
57 #define ALT_O			0x02
58 #define	LEGAL_ALT(x)		{ if (alt_format & ~(x)) return NULL; }
59 
60 static char gmt[] = { "GMT" };
61 static char utc[] = { "UTC" };
62 
63 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
64 static const u_char *find_string(const u_char *, int *, const char * const *,
65 	const char * const *, int);
66 
67 
68 char *
69 strptime(const char *buf, const char *fmt, struct tm *tm)
70 {
71 	unsigned char c;
72 	const unsigned char *bp;
73 	int alt_format, i, split_year = 0, neg, offs;
74 	const char *new_fmt;
75 
76 	bp = (const u_char *)buf;
77 
78 	while (bp != NULL && (c = *fmt++) != '\0') {
79 		/* Clear `alternate' modifier prior to new conversion. */
80 		alt_format = 0;
81 		i = 0;
82 
83 		/* Eat up white-space. */
84 		if (isspace(c)) {
85 			while (isspace(*bp))
86 				bp++;
87 			continue;
88 		}
89 
90 		if (c != '%')
91 			goto literal;
92 
93 
94 again:		switch (c = *fmt++) {
95 		case '%':	/* "%%" is converted to "%". */
96 literal:
97 			if (c != *bp++)
98 				return NULL;
99 			LEGAL_ALT(0);
100 			continue;
101 
102 		/*
103 		 * "Alternative" modifiers. Just set the appropriate flag
104 		 * and start over again.
105 		 */
106 		case 'E':	/* "%E?" alternative conversion modifier. */
107 			LEGAL_ALT(0);
108 			alt_format |= ALT_E;
109 			goto again;
110 
111 		case 'O':	/* "%O?" alternative conversion modifier. */
112 			LEGAL_ALT(0);
113 			alt_format |= ALT_O;
114 			goto again;
115 
116 		/*
117 		 * "Complex" conversion rules, implemented through recursion.
118 		 */
119 		case 'c':	/* Date and time, using the locale's format. */
120 			new_fmt = _ctloc(d_t_fmt);
121 			goto recurse;
122 
123 		case 'D':	/* The date as "%m/%d/%y". */
124 			new_fmt = "%m/%d/%y";
125 			LEGAL_ALT(0);
126 			goto recurse;
127 
128 		case 'F':	/* The date as "%Y-%m-%d". */
129 			new_fmt = "%Y-%m-%d";
130 			LEGAL_ALT(0);
131 			goto recurse;
132 
133 		case 'R':	/* The time as "%H:%M". */
134 			new_fmt = "%H:%M";
135 			LEGAL_ALT(0);
136 			goto recurse;
137 
138 		case 'r':	/* The time in 12-hour clock representation. */
139 			new_fmt =_ctloc(t_fmt_ampm);
140 			LEGAL_ALT(0);
141 			goto recurse;
142 
143 		case 'T':	/* The time as "%H:%M:%S". */
144 			new_fmt = "%H:%M:%S";
145 			LEGAL_ALT(0);
146 			goto recurse;
147 
148 		case 'X':	/* The time, using the locale's format. */
149 			new_fmt =_ctloc(t_fmt);
150 			goto recurse;
151 
152 		case 'x':	/* The date, using the locale's format. */
153 			new_fmt =_ctloc(d_fmt);
154 		    recurse:
155 			bp = (const u_char *)strptime((const char *)bp,
156 							    new_fmt, tm);
157 			LEGAL_ALT(ALT_E);
158 			continue;
159 
160 		/*
161 		 * "Elementary" conversion rules.
162 		 */
163 		case 'A':	/* The day of week, using the locale's form. */
164 		case 'a':
165 			bp = find_string(bp, &tm->tm_wday, _ctloc(day),
166 					_ctloc(abday), 7);
167 			LEGAL_ALT(0);
168 			continue;
169 
170 		case 'B':	/* The month, using the locale's form. */
171 		case 'b':
172 		case 'h':
173 			bp = find_string(bp, &tm->tm_mon, _ctloc(mon),
174 					_ctloc(abmon), 12);
175 			LEGAL_ALT(0);
176 			continue;
177 
178 		case 'C':	/* The century number. */
179 			i = 20;
180 			bp = conv_num(bp, &i, 0, 99);
181 
182 			i = i * 100 - TM_YEAR_BASE;
183 			if (split_year)
184 				i += tm->tm_year % 100;
185 			split_year = 1;
186 			tm->tm_year = i;
187 			LEGAL_ALT(ALT_E);
188 			continue;
189 
190 		case 'd':	/* The day of month. */
191 		case 'e':
192 			bp = conv_num(bp, &tm->tm_mday, 1, 31);
193 			LEGAL_ALT(ALT_O);
194 			continue;
195 
196 		case 'k':	/* The hour (24-hour clock representation). */
197 			LEGAL_ALT(0);
198 			/* FALLTHROUGH */
199 		case 'H':
200 			bp = conv_num(bp, &tm->tm_hour, 0, 23);
201 			LEGAL_ALT(ALT_O);
202 			continue;
203 
204 		case 'l':	/* The hour (12-hour clock representation). */
205 			LEGAL_ALT(0);
206 			/* FALLTHROUGH */
207 		case 'I':
208 			bp = conv_num(bp, &tm->tm_hour, 1, 12);
209 			if (tm->tm_hour == 12)
210 				tm->tm_hour = 0;
211 			LEGAL_ALT(ALT_O);
212 			continue;
213 
214 		case 'j':	/* The day of year. */
215 			i = 1;
216 			bp = conv_num(bp, &i, 1, 366);
217 			tm->tm_yday = i - 1;
218 			LEGAL_ALT(0);
219 			continue;
220 
221 		case 'M':	/* The minute. */
222 			bp = conv_num(bp, &tm->tm_min, 0, 59);
223 			LEGAL_ALT(ALT_O);
224 			continue;
225 
226 		case 'm':	/* The month. */
227 			i = 1;
228 			bp = conv_num(bp, &i, 1, 12);
229 			tm->tm_mon = i - 1;
230 			LEGAL_ALT(ALT_O);
231 			continue;
232 
233 		case 'p':	/* The locale's equivalent of AM/PM. */
234 			bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2);
235 			if (tm->tm_hour > 11)
236 				return NULL;
237 			tm->tm_hour += i * 12;
238 			LEGAL_ALT(0);
239 			continue;
240 
241 		case 'S':	/* The seconds. */
242 			bp = conv_num(bp, &tm->tm_sec, 0, 61);
243 			LEGAL_ALT(ALT_O);
244 			continue;
245 
246 		case 'U':	/* The week of year, beginning on sunday. */
247 		case 'W':	/* The week of year, beginning on monday. */
248 			/*
249 			 * XXX This is bogus, as we can not assume any valid
250 			 * information present in the tm structure at this
251 			 * point to calculate a real value, so just check the
252 			 * range for now.
253 			 */
254 			 bp = conv_num(bp, &i, 0, 53);
255 			 LEGAL_ALT(ALT_O);
256 			 continue;
257 
258 		case 'w':	/* The day of week, beginning on sunday. */
259 			bp = conv_num(bp, &tm->tm_wday, 0, 6);
260 			LEGAL_ALT(ALT_O);
261 			continue;
262 
263 		case 'u':	/* The day of week, monday = 1. */
264 			bp = conv_num(bp, &i, 1, 7);
265 			tm->tm_wday = i % 7;
266 			LEGAL_ALT(ALT_O);
267 			continue;
268 
269 		case 'g':	/* The year corresponding to the ISO week
270 				 * number but without the century.
271 				 */
272 			bp = conv_num(bp, &i, 0, 99);
273 			continue;
274 
275 		case 'G':	/* The year corresponding to the ISO week
276 				 * number with century.
277 				 */
278 			do
279 				bp++;
280 			while (isdigit(*bp));
281 			continue;
282 
283 		case 'V':	/* The ISO 8601:1988 week number as decimal */
284 			bp = conv_num(bp, &i, 0, 53);
285 			continue;
286 
287 		case 'Y':	/* The year. */
288 			i = TM_YEAR_BASE;	/* just for data sanity... */
289 			bp = conv_num(bp, &i, 0, 9999);
290 			tm->tm_year = i - TM_YEAR_BASE;
291 			LEGAL_ALT(ALT_E);
292 			continue;
293 
294 		case 'y':	/* The year within 100 years of the epoch. */
295 			/* LEGAL_ALT(ALT_E | ALT_O); */
296 			bp = conv_num(bp, &i, 0, 99);
297 
298 			if (split_year)
299 				/* preserve century */
300 				i += (tm->tm_year / 100) * 100;
301 			else {
302 				split_year = 1;
303 				if (i <= 68)
304 					i = i + 2000 - TM_YEAR_BASE;
305 				else
306 					i = i + 1900 - TM_YEAR_BASE;
307 			}
308 			tm->tm_year = i;
309 			continue;
310 
311 		case 'Z':
312 			tzset();
313 			if (strncmp((const char *)bp, gmt, 3) == 0) {
314 				tm->tm_isdst = 0;
315 #ifdef TM_GMTOFF
316 				tm->TM_GMTOFF = 0;
317 #endif
318 #ifdef TM_ZONE
319 				tm->TM_ZONE = gmt;
320 #endif
321 				bp += 3;
322 			} else {
323 				const unsigned char *ep;
324 
325 				ep = find_string(bp, &i,
326 					       	 (const char * const *)tzname,
327 					       	  NULL, 2);
328 				if (ep != NULL) {
329 					tm->tm_isdst = i;
330 #ifdef TM_GMTOFF
331 					tm->TM_GMTOFF = -(timezone);
332 #endif
333 #ifdef TM_ZONE
334 					tm->TM_ZONE = tzname[i];
335 #endif
336 				}
337 				bp = ep;
338 			}
339 			continue;
340 
341 		case 'z':
342 			/*
343 			 * We recognize all ISO 8601 formats:
344 			 * Z	= Zulu time/UTC
345 			 * [+-]hhmm
346 			 * [+-]hh:mm
347 			 * [+-]hh
348 			 */
349 			while (isspace(*bp))
350 				bp++;
351 
352 			switch (*bp++) {
353 			case 'Z':
354 				tm->tm_isdst = 0;
355 #ifdef TM_GMTOFF
356 				tm->TM_GMTOFF = 0;
357 #endif
358 #ifdef TM_ZONE
359 				tm->TM_ZONE = utc;
360 #endif
361 				continue;
362 			case '+':
363 				neg = 0;
364 				break;
365 			case '-':
366 				neg = 1;
367 				break;
368 			default:
369 				return NULL;
370 			}
371 			offs = 0;
372 			for (i = 0; i < 4; ) {
373 				if (isdigit(*bp)) {
374 					offs = offs * 10 + (*bp++ - '0');
375 					i++;
376 					continue;
377 				}
378 				if (i == 2 && *bp == ':') {
379 					bp++;
380 					continue;
381 				}
382 				break;
383 			}
384 			switch (i) {
385 			case 2:
386 				offs *= 100;
387 				break;
388 			case 4:
389 				i = offs % 100;
390 				if (i >= 60)
391 					return NULL;
392 				/* Convert minutes into decimal */
393 				offs = (offs / 100) * 100 + (i * 50) / 30;
394 				break;
395 			default:
396 				return NULL;
397 			}
398 			if (neg)
399 				offs = -offs;
400 			tm->tm_isdst = 0;	/* XXX */
401 #ifdef TM_GMTOFF
402 			tm->TM_GMTOFF = offs;
403 #endif
404 #ifdef TM_ZONE
405 			tm->TM_ZONE = NULL;	/* XXX */
406 #endif
407 			continue;
408 
409 		/*
410 		 * Miscellaneous conversions.
411 		 */
412 		case 'n':	/* Any kind of white-space. */
413 		case 't':
414 			while (isspace(*bp))
415 				bp++;
416 			LEGAL_ALT(0);
417 			continue;
418 
419 
420 		default:	/* Unknown/unsupported conversion. */
421 			return NULL;
422 		}
423 	}
424 
425 	return __UNCONST(bp);
426 }
427 
428 
429 static const u_char *
430 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
431 {
432 	uint result = 0;
433 	unsigned char ch;
434 
435 	/* The limit also determines the number of valid digits. */
436 	uint rulim = ulim;
437 
438 	ch = *buf;
439 	if (ch < '0' || ch > '9')
440 		return NULL;
441 
442 	do {
443 		result *= 10;
444 		result += ch - '0';
445 		rulim /= 10;
446 		ch = *++buf;
447 	} while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
448 
449 	if (result < llim || result > ulim)
450 		return NULL;
451 
452 	*dest = result;
453 	return buf;
454 }
455 
456 static const u_char *
457 find_string(const u_char *bp, int *tgt, const char * const *n1,
458 		const char * const *n2, int c)
459 {
460 	int i;
461 	unsigned int len;
462 
463 	/* check full name - then abbreviated ones */
464 	for (; n1 != NULL; n1 = n2, n2 = NULL) {
465 		for (i = 0; i < c; i++, n1++) {
466 			len = strlen(*n1);
467 			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
468 				*tgt = i;
469 				return bp + len;
470 			}
471 		}
472 	}
473 
474 	/* Nothing matched */
475 	return NULL;
476 }
477