xref: /netbsd-src/lib/libc/time/strptime.c (revision 8ac07aec990b9d2e483062509d0a9fa5b4f57cf2)
1 /*	$NetBSD: strptime.c,v 1.27 2008/04/25 20:51:10 ginsbach Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code was contributed to The NetBSD Foundation by Klaus Klein.
8  * Heavily optimised by David Laight
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 #if defined(LIBC_SCCS) && !defined(lint)
41 __RCSID("$NetBSD: strptime.c,v 1.27 2008/04/25 20:51:10 ginsbach Exp $");
42 #endif
43 
44 #include "namespace.h"
45 #include <sys/localedef.h>
46 #include <ctype.h>
47 #include <locale.h>
48 #include <string.h>
49 #include <time.h>
50 #include <tzfile.h>
51 
52 #ifdef __weak_alias
53 __weak_alias(strptime,_strptime)
54 #endif
55 
56 #define	_ctloc(x)		(_CurrentTimeLocale->x)
57 
58 /*
59  * We do not implement alternate representations. However, we always
60  * check whether a given modifier is allowed for a certain conversion.
61  */
62 #define ALT_E			0x01
63 #define ALT_O			0x02
64 #define	LEGAL_ALT(x)		{ if (alt_format & ~(x)) return NULL; }
65 
66 static const char gmt[4] = { "GMT" };
67 
68 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
69 static const u_char *find_string(const u_char *, int *, const char * const *,
70 	const char * const *, int);
71 
72 
73 char *
74 strptime(const char *buf, const char *fmt, struct tm *tm)
75 {
76 	unsigned char c;
77 	const unsigned char *bp;
78 	int alt_format, i, split_year = 0;
79 	const char *new_fmt;
80 
81 	bp = (const u_char *)buf;
82 
83 	while (bp != NULL && (c = *fmt++) != '\0') {
84 		/* Clear `alternate' modifier prior to new conversion. */
85 		alt_format = 0;
86 		i = 0;
87 
88 		/* Eat up white-space. */
89 		if (isspace(c)) {
90 			while (isspace(*bp))
91 				bp++;
92 			continue;
93 		}
94 
95 		if (c != '%')
96 			goto literal;
97 
98 
99 again:		switch (c = *fmt++) {
100 		case '%':	/* "%%" is converted to "%". */
101 literal:
102 			if (c != *bp++)
103 				return NULL;
104 			LEGAL_ALT(0);
105 			continue;
106 
107 		/*
108 		 * "Alternative" modifiers. Just set the appropriate flag
109 		 * and start over again.
110 		 */
111 		case 'E':	/* "%E?" alternative conversion modifier. */
112 			LEGAL_ALT(0);
113 			alt_format |= ALT_E;
114 			goto again;
115 
116 		case 'O':	/* "%O?" alternative conversion modifier. */
117 			LEGAL_ALT(0);
118 			alt_format |= ALT_O;
119 			goto again;
120 
121 		/*
122 		 * "Complex" conversion rules, implemented through recursion.
123 		 */
124 		case 'c':	/* Date and time, using the locale's format. */
125 			new_fmt = _ctloc(d_t_fmt);
126 			goto recurse;
127 
128 		case 'D':	/* The date as "%m/%d/%y". */
129 			new_fmt = "%m/%d/%y";
130 			LEGAL_ALT(0);
131 			goto recurse;
132 
133 		case 'F':	/* The date as "%Y-%m-%d". */
134 			new_fmt = "%Y-%m-%d";
135 			LEGAL_ALT(0);
136 			goto recurse;
137 
138 		case 'R':	/* The time as "%H:%M". */
139 			new_fmt = "%H:%M";
140 			LEGAL_ALT(0);
141 			goto recurse;
142 
143 		case 'r':	/* The time in 12-hour clock representation. */
144 			new_fmt =_ctloc(t_fmt_ampm);
145 			LEGAL_ALT(0);
146 			goto recurse;
147 
148 		case 'T':	/* The time as "%H:%M:%S". */
149 			new_fmt = "%H:%M:%S";
150 			LEGAL_ALT(0);
151 			goto recurse;
152 
153 		case 'X':	/* The time, using the locale's format. */
154 			new_fmt =_ctloc(t_fmt);
155 			goto recurse;
156 
157 		case 'x':	/* The date, using the locale's format. */
158 			new_fmt =_ctloc(d_fmt);
159 		    recurse:
160 			bp = (const u_char *)strptime((const char *)bp,
161 							    new_fmt, tm);
162 			LEGAL_ALT(ALT_E);
163 			continue;
164 
165 		/*
166 		 * "Elementary" conversion rules.
167 		 */
168 		case 'A':	/* The day of week, using the locale's form. */
169 		case 'a':
170 			bp = find_string(bp, &tm->tm_wday, _ctloc(day),
171 					_ctloc(abday), 7);
172 			LEGAL_ALT(0);
173 			continue;
174 
175 		case 'B':	/* The month, using the locale's form. */
176 		case 'b':
177 		case 'h':
178 			bp = find_string(bp, &tm->tm_mon, _ctloc(mon),
179 					_ctloc(abmon), 12);
180 			LEGAL_ALT(0);
181 			continue;
182 
183 		case 'C':	/* The century number. */
184 			i = 20;
185 			bp = conv_num(bp, &i, 0, 99);
186 
187 			i = i * 100 - TM_YEAR_BASE;
188 			if (split_year)
189 				i += tm->tm_year % 100;
190 			split_year = 1;
191 			tm->tm_year = i;
192 			LEGAL_ALT(ALT_E);
193 			continue;
194 
195 		case 'd':	/* The day of month. */
196 		case 'e':
197 			bp = conv_num(bp, &tm->tm_mday, 1, 31);
198 			LEGAL_ALT(ALT_O);
199 			continue;
200 
201 		case 'k':	/* The hour (24-hour clock representation). */
202 			LEGAL_ALT(0);
203 			/* FALLTHROUGH */
204 		case 'H':
205 			bp = conv_num(bp, &tm->tm_hour, 0, 23);
206 			LEGAL_ALT(ALT_O);
207 			continue;
208 
209 		case 'l':	/* The hour (12-hour clock representation). */
210 			LEGAL_ALT(0);
211 			/* FALLTHROUGH */
212 		case 'I':
213 			bp = conv_num(bp, &tm->tm_hour, 1, 12);
214 			if (tm->tm_hour == 12)
215 				tm->tm_hour = 0;
216 			LEGAL_ALT(ALT_O);
217 			continue;
218 
219 		case 'j':	/* The day of year. */
220 			i = 1;
221 			bp = conv_num(bp, &i, 1, 366);
222 			tm->tm_yday = i - 1;
223 			LEGAL_ALT(0);
224 			continue;
225 
226 		case 'M':	/* The minute. */
227 			bp = conv_num(bp, &tm->tm_min, 0, 59);
228 			LEGAL_ALT(ALT_O);
229 			continue;
230 
231 		case 'm':	/* The month. */
232 			i = 1;
233 			bp = conv_num(bp, &i, 1, 12);
234 			tm->tm_mon = i - 1;
235 			LEGAL_ALT(ALT_O);
236 			continue;
237 
238 		case 'p':	/* The locale's equivalent of AM/PM. */
239 			bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2);
240 			if (tm->tm_hour > 11)
241 				return NULL;
242 			tm->tm_hour += i * 12;
243 			LEGAL_ALT(0);
244 			continue;
245 
246 		case 'S':	/* The seconds. */
247 			bp = conv_num(bp, &tm->tm_sec, 0, 61);
248 			LEGAL_ALT(ALT_O);
249 			continue;
250 
251 		case 'U':	/* The week of year, beginning on sunday. */
252 		case 'W':	/* The week of year, beginning on monday. */
253 			/*
254 			 * XXX This is bogus, as we can not assume any valid
255 			 * information present in the tm structure at this
256 			 * point to calculate a real value, so just check the
257 			 * range for now.
258 			 */
259 			 bp = conv_num(bp, &i, 0, 53);
260 			 LEGAL_ALT(ALT_O);
261 			 continue;
262 
263 		case 'w':	/* The day of week, beginning on sunday. */
264 			bp = conv_num(bp, &tm->tm_wday, 0, 6);
265 			LEGAL_ALT(ALT_O);
266 			continue;
267 
268 		case 'Y':	/* The year. */
269 			i = TM_YEAR_BASE;	/* just for data sanity... */
270 			bp = conv_num(bp, &i, 0, 9999);
271 			tm->tm_year = i - TM_YEAR_BASE;
272 			LEGAL_ALT(ALT_E);
273 			continue;
274 
275 		case 'y':	/* The year within 100 years of the epoch. */
276 			/* LEGAL_ALT(ALT_E | ALT_O); */
277 			bp = conv_num(bp, &i, 0, 99);
278 
279 			if (split_year)
280 				/* preserve century */
281 				i += (tm->tm_year / 100) * 100;
282 			else {
283 				split_year = 1;
284 				if (i <= 68)
285 					i = i + 2000 - TM_YEAR_BASE;
286 				else
287 					i = i + 1900 - TM_YEAR_BASE;
288 			}
289 			tm->tm_year = i;
290 			continue;
291 
292 		case 'Z':
293 			tzset();
294 			if (strncmp((const char *)bp, gmt, 3) == 0) {
295 				tm->tm_isdst = 0;
296 #ifdef TM_GMTOFF
297 				tm->TM_GMTOFF = 0;
298 #endif
299 #ifdef TM_ZONE
300 				tm->TM_ZONE = gmt;
301 #endif
302 				bp += 3;
303 			} else {
304 				const unsigned char *ep;
305 
306 				ep = find_string(bp, &i,
307 					       	 (const char * const *)tzname,
308 					       	  NULL, 2);
309 				if (ep != NULL) {
310 					tm->tm_isdst = i;
311 #ifdef TM_GMTOFF
312 					tm->TM_GMTOFF = -(timezone);
313 #endif
314 #ifdef TM_ZONE
315 					tm->TM_ZONE = tzname[i];
316 #endif
317 				}
318 				bp = ep;
319 			}
320 			continue;
321 
322 		/*
323 		 * Miscellaneous conversions.
324 		 */
325 		case 'n':	/* Any kind of white-space. */
326 		case 't':
327 			while (isspace(*bp))
328 				bp++;
329 			LEGAL_ALT(0);
330 			continue;
331 
332 
333 		default:	/* Unknown/unsupported conversion. */
334 			return NULL;
335 		}
336 	}
337 
338 	return __UNCONST(bp);
339 }
340 
341 
342 static const u_char *
343 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
344 {
345 	uint result = 0;
346 	unsigned char ch;
347 
348 	/* The limit also determines the number of valid digits. */
349 	uint rulim = ulim;
350 
351 	ch = *buf;
352 	if (ch < '0' || ch > '9')
353 		return NULL;
354 
355 	do {
356 		result *= 10;
357 		result += ch - '0';
358 		rulim /= 10;
359 		ch = *++buf;
360 	} while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
361 
362 	if (result < llim || result > ulim)
363 		return NULL;
364 
365 	*dest = result;
366 	return buf;
367 }
368 
369 static const u_char *
370 find_string(const u_char *bp, int *tgt, const char * const *n1,
371 		const char * const *n2, int c)
372 {
373 	int i;
374 	unsigned int len;
375 
376 	/* check full name - then abbreviated ones */
377 	for (; n1 != NULL; n1 = n2, n2 = NULL) {
378 		for (i = 0; i < c; i++, n1++) {
379 			len = strlen(*n1);
380 			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
381 				*tgt = i;
382 				return bp + len;
383 			}
384 		}
385 	}
386 
387 	/* Nothing matched */
388 	return NULL;
389 }
390