xref: /netbsd-src/lib/libc/time/strptime.c (revision cac8e449158efc7261bebc8657cbb0125a2cfdde)
1 /*	$NetBSD: strptime.c,v 1.28 2008/04/28 20:23:01 martin Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code was contributed to The NetBSD Foundation by Klaus Klein.
8  * Heavily optimised by David Laight
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 __RCSID("$NetBSD: strptime.c,v 1.28 2008/04/28 20:23:01 martin Exp $");
35 #endif
36 
37 #include "namespace.h"
38 #include <sys/localedef.h>
39 #include <ctype.h>
40 #include <locale.h>
41 #include <string.h>
42 #include <time.h>
43 #include <tzfile.h>
44 
45 #ifdef __weak_alias
46 __weak_alias(strptime,_strptime)
47 #endif
48 
49 #define	_ctloc(x)		(_CurrentTimeLocale->x)
50 
51 /*
52  * We do not implement alternate representations. However, we always
53  * check whether a given modifier is allowed for a certain conversion.
54  */
55 #define ALT_E			0x01
56 #define ALT_O			0x02
57 #define	LEGAL_ALT(x)		{ if (alt_format & ~(x)) return NULL; }
58 
59 static const char gmt[4] = { "GMT" };
60 
61 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
62 static const u_char *find_string(const u_char *, int *, const char * const *,
63 	const char * const *, int);
64 
65 
66 char *
67 strptime(const char *buf, const char *fmt, struct tm *tm)
68 {
69 	unsigned char c;
70 	const unsigned char *bp;
71 	int alt_format, i, split_year = 0;
72 	const char *new_fmt;
73 
74 	bp = (const u_char *)buf;
75 
76 	while (bp != NULL && (c = *fmt++) != '\0') {
77 		/* Clear `alternate' modifier prior to new conversion. */
78 		alt_format = 0;
79 		i = 0;
80 
81 		/* Eat up white-space. */
82 		if (isspace(c)) {
83 			while (isspace(*bp))
84 				bp++;
85 			continue;
86 		}
87 
88 		if (c != '%')
89 			goto literal;
90 
91 
92 again:		switch (c = *fmt++) {
93 		case '%':	/* "%%" is converted to "%". */
94 literal:
95 			if (c != *bp++)
96 				return NULL;
97 			LEGAL_ALT(0);
98 			continue;
99 
100 		/*
101 		 * "Alternative" modifiers. Just set the appropriate flag
102 		 * and start over again.
103 		 */
104 		case 'E':	/* "%E?" alternative conversion modifier. */
105 			LEGAL_ALT(0);
106 			alt_format |= ALT_E;
107 			goto again;
108 
109 		case 'O':	/* "%O?" alternative conversion modifier. */
110 			LEGAL_ALT(0);
111 			alt_format |= ALT_O;
112 			goto again;
113 
114 		/*
115 		 * "Complex" conversion rules, implemented through recursion.
116 		 */
117 		case 'c':	/* Date and time, using the locale's format. */
118 			new_fmt = _ctloc(d_t_fmt);
119 			goto recurse;
120 
121 		case 'D':	/* The date as "%m/%d/%y". */
122 			new_fmt = "%m/%d/%y";
123 			LEGAL_ALT(0);
124 			goto recurse;
125 
126 		case 'F':	/* The date as "%Y-%m-%d". */
127 			new_fmt = "%Y-%m-%d";
128 			LEGAL_ALT(0);
129 			goto recurse;
130 
131 		case 'R':	/* The time as "%H:%M". */
132 			new_fmt = "%H:%M";
133 			LEGAL_ALT(0);
134 			goto recurse;
135 
136 		case 'r':	/* The time in 12-hour clock representation. */
137 			new_fmt =_ctloc(t_fmt_ampm);
138 			LEGAL_ALT(0);
139 			goto recurse;
140 
141 		case 'T':	/* The time as "%H:%M:%S". */
142 			new_fmt = "%H:%M:%S";
143 			LEGAL_ALT(0);
144 			goto recurse;
145 
146 		case 'X':	/* The time, using the locale's format. */
147 			new_fmt =_ctloc(t_fmt);
148 			goto recurse;
149 
150 		case 'x':	/* The date, using the locale's format. */
151 			new_fmt =_ctloc(d_fmt);
152 		    recurse:
153 			bp = (const u_char *)strptime((const char *)bp,
154 							    new_fmt, tm);
155 			LEGAL_ALT(ALT_E);
156 			continue;
157 
158 		/*
159 		 * "Elementary" conversion rules.
160 		 */
161 		case 'A':	/* The day of week, using the locale's form. */
162 		case 'a':
163 			bp = find_string(bp, &tm->tm_wday, _ctloc(day),
164 					_ctloc(abday), 7);
165 			LEGAL_ALT(0);
166 			continue;
167 
168 		case 'B':	/* The month, using the locale's form. */
169 		case 'b':
170 		case 'h':
171 			bp = find_string(bp, &tm->tm_mon, _ctloc(mon),
172 					_ctloc(abmon), 12);
173 			LEGAL_ALT(0);
174 			continue;
175 
176 		case 'C':	/* The century number. */
177 			i = 20;
178 			bp = conv_num(bp, &i, 0, 99);
179 
180 			i = i * 100 - TM_YEAR_BASE;
181 			if (split_year)
182 				i += tm->tm_year % 100;
183 			split_year = 1;
184 			tm->tm_year = i;
185 			LEGAL_ALT(ALT_E);
186 			continue;
187 
188 		case 'd':	/* The day of month. */
189 		case 'e':
190 			bp = conv_num(bp, &tm->tm_mday, 1, 31);
191 			LEGAL_ALT(ALT_O);
192 			continue;
193 
194 		case 'k':	/* The hour (24-hour clock representation). */
195 			LEGAL_ALT(0);
196 			/* FALLTHROUGH */
197 		case 'H':
198 			bp = conv_num(bp, &tm->tm_hour, 0, 23);
199 			LEGAL_ALT(ALT_O);
200 			continue;
201 
202 		case 'l':	/* The hour (12-hour clock representation). */
203 			LEGAL_ALT(0);
204 			/* FALLTHROUGH */
205 		case 'I':
206 			bp = conv_num(bp, &tm->tm_hour, 1, 12);
207 			if (tm->tm_hour == 12)
208 				tm->tm_hour = 0;
209 			LEGAL_ALT(ALT_O);
210 			continue;
211 
212 		case 'j':	/* The day of year. */
213 			i = 1;
214 			bp = conv_num(bp, &i, 1, 366);
215 			tm->tm_yday = i - 1;
216 			LEGAL_ALT(0);
217 			continue;
218 
219 		case 'M':	/* The minute. */
220 			bp = conv_num(bp, &tm->tm_min, 0, 59);
221 			LEGAL_ALT(ALT_O);
222 			continue;
223 
224 		case 'm':	/* The month. */
225 			i = 1;
226 			bp = conv_num(bp, &i, 1, 12);
227 			tm->tm_mon = i - 1;
228 			LEGAL_ALT(ALT_O);
229 			continue;
230 
231 		case 'p':	/* The locale's equivalent of AM/PM. */
232 			bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2);
233 			if (tm->tm_hour > 11)
234 				return NULL;
235 			tm->tm_hour += i * 12;
236 			LEGAL_ALT(0);
237 			continue;
238 
239 		case 'S':	/* The seconds. */
240 			bp = conv_num(bp, &tm->tm_sec, 0, 61);
241 			LEGAL_ALT(ALT_O);
242 			continue;
243 
244 		case 'U':	/* The week of year, beginning on sunday. */
245 		case 'W':	/* The week of year, beginning on monday. */
246 			/*
247 			 * XXX This is bogus, as we can not assume any valid
248 			 * information present in the tm structure at this
249 			 * point to calculate a real value, so just check the
250 			 * range for now.
251 			 */
252 			 bp = conv_num(bp, &i, 0, 53);
253 			 LEGAL_ALT(ALT_O);
254 			 continue;
255 
256 		case 'w':	/* The day of week, beginning on sunday. */
257 			bp = conv_num(bp, &tm->tm_wday, 0, 6);
258 			LEGAL_ALT(ALT_O);
259 			continue;
260 
261 		case 'Y':	/* The year. */
262 			i = TM_YEAR_BASE;	/* just for data sanity... */
263 			bp = conv_num(bp, &i, 0, 9999);
264 			tm->tm_year = i - TM_YEAR_BASE;
265 			LEGAL_ALT(ALT_E);
266 			continue;
267 
268 		case 'y':	/* The year within 100 years of the epoch. */
269 			/* LEGAL_ALT(ALT_E | ALT_O); */
270 			bp = conv_num(bp, &i, 0, 99);
271 
272 			if (split_year)
273 				/* preserve century */
274 				i += (tm->tm_year / 100) * 100;
275 			else {
276 				split_year = 1;
277 				if (i <= 68)
278 					i = i + 2000 - TM_YEAR_BASE;
279 				else
280 					i = i + 1900 - TM_YEAR_BASE;
281 			}
282 			tm->tm_year = i;
283 			continue;
284 
285 		case 'Z':
286 			tzset();
287 			if (strncmp((const char *)bp, gmt, 3) == 0) {
288 				tm->tm_isdst = 0;
289 #ifdef TM_GMTOFF
290 				tm->TM_GMTOFF = 0;
291 #endif
292 #ifdef TM_ZONE
293 				tm->TM_ZONE = gmt;
294 #endif
295 				bp += 3;
296 			} else {
297 				const unsigned char *ep;
298 
299 				ep = find_string(bp, &i,
300 					       	 (const char * const *)tzname,
301 					       	  NULL, 2);
302 				if (ep != NULL) {
303 					tm->tm_isdst = i;
304 #ifdef TM_GMTOFF
305 					tm->TM_GMTOFF = -(timezone);
306 #endif
307 #ifdef TM_ZONE
308 					tm->TM_ZONE = tzname[i];
309 #endif
310 				}
311 				bp = ep;
312 			}
313 			continue;
314 
315 		/*
316 		 * Miscellaneous conversions.
317 		 */
318 		case 'n':	/* Any kind of white-space. */
319 		case 't':
320 			while (isspace(*bp))
321 				bp++;
322 			LEGAL_ALT(0);
323 			continue;
324 
325 
326 		default:	/* Unknown/unsupported conversion. */
327 			return NULL;
328 		}
329 	}
330 
331 	return __UNCONST(bp);
332 }
333 
334 
335 static const u_char *
336 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
337 {
338 	uint result = 0;
339 	unsigned char ch;
340 
341 	/* The limit also determines the number of valid digits. */
342 	uint rulim = ulim;
343 
344 	ch = *buf;
345 	if (ch < '0' || ch > '9')
346 		return NULL;
347 
348 	do {
349 		result *= 10;
350 		result += ch - '0';
351 		rulim /= 10;
352 		ch = *++buf;
353 	} while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
354 
355 	if (result < llim || result > ulim)
356 		return NULL;
357 
358 	*dest = result;
359 	return buf;
360 }
361 
362 static const u_char *
363 find_string(const u_char *bp, int *tgt, const char * const *n1,
364 		const char * const *n2, int c)
365 {
366 	int i;
367 	unsigned int len;
368 
369 	/* check full name - then abbreviated ones */
370 	for (; n1 != NULL; n1 = n2, n2 = NULL) {
371 		for (i = 0; i < c; i++, n1++) {
372 			len = strlen(*n1);
373 			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
374 				*tgt = i;
375 				return bp + len;
376 			}
377 		}
378 	}
379 
380 	/* Nothing matched */
381 	return NULL;
382 }
383