1 /* $NetBSD: strptime.c,v 1.31 2008/11/04 21:08:33 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code was contributed to The NetBSD Foundation by Klaus Klein. 8 * Heavily optimised by David Laight 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #if defined(LIBC_SCCS) && !defined(lint) 34 __RCSID("$NetBSD: strptime.c,v 1.31 2008/11/04 21:08:33 christos Exp $"); 35 #endif 36 37 #include "namespace.h" 38 #include <sys/localedef.h> 39 #include <ctype.h> 40 #include <locale.h> 41 #include <string.h> 42 #include <time.h> 43 #include <tzfile.h> 44 #include "private.h" 45 46 #ifdef __weak_alias 47 __weak_alias(strptime,_strptime) 48 #endif 49 50 #define _ctloc(x) (_CurrentTimeLocale->x) 51 52 /* 53 * We do not implement alternate representations. However, we always 54 * check whether a given modifier is allowed for a certain conversion. 55 */ 56 #define ALT_E 0x01 57 #define ALT_O 0x02 58 #define LEGAL_ALT(x) { if (alt_format & ~(x)) return NULL; } 59 60 static char gmt[] = { "GMT" }; 61 static char utc[] = { "UTC" }; 62 63 static const u_char *conv_num(const unsigned char *, int *, uint, uint); 64 static const u_char *find_string(const u_char *, int *, const char * const *, 65 const char * const *, int); 66 67 68 char * 69 strptime(const char *buf, const char *fmt, struct tm *tm) 70 { 71 unsigned char c; 72 const unsigned char *bp; 73 int alt_format, i, split_year = 0, neg, offs; 74 const char *new_fmt; 75 76 bp = (const u_char *)buf; 77 78 while (bp != NULL && (c = *fmt++) != '\0') { 79 /* Clear `alternate' modifier prior to new conversion. */ 80 alt_format = 0; 81 i = 0; 82 83 /* Eat up white-space. */ 84 if (isspace(c)) { 85 while (isspace(*bp)) 86 bp++; 87 continue; 88 } 89 90 if (c != '%') 91 goto literal; 92 93 94 again: switch (c = *fmt++) { 95 case '%': /* "%%" is converted to "%". */ 96 literal: 97 if (c != *bp++) 98 return NULL; 99 LEGAL_ALT(0); 100 continue; 101 102 /* 103 * "Alternative" modifiers. Just set the appropriate flag 104 * and start over again. 105 */ 106 case 'E': /* "%E?" alternative conversion modifier. */ 107 LEGAL_ALT(0); 108 alt_format |= ALT_E; 109 goto again; 110 111 case 'O': /* "%O?" alternative conversion modifier. */ 112 LEGAL_ALT(0); 113 alt_format |= ALT_O; 114 goto again; 115 116 /* 117 * "Complex" conversion rules, implemented through recursion. 118 */ 119 case 'c': /* Date and time, using the locale's format. */ 120 new_fmt = _ctloc(d_t_fmt); 121 goto recurse; 122 123 case 'D': /* The date as "%m/%d/%y". */ 124 new_fmt = "%m/%d/%y"; 125 LEGAL_ALT(0); 126 goto recurse; 127 128 case 'F': /* The date as "%Y-%m-%d". */ 129 new_fmt = "%Y-%m-%d"; 130 LEGAL_ALT(0); 131 goto recurse; 132 133 case 'R': /* The time as "%H:%M". */ 134 new_fmt = "%H:%M"; 135 LEGAL_ALT(0); 136 goto recurse; 137 138 case 'r': /* The time in 12-hour clock representation. */ 139 new_fmt =_ctloc(t_fmt_ampm); 140 LEGAL_ALT(0); 141 goto recurse; 142 143 case 'T': /* The time as "%H:%M:%S". */ 144 new_fmt = "%H:%M:%S"; 145 LEGAL_ALT(0); 146 goto recurse; 147 148 case 'X': /* The time, using the locale's format. */ 149 new_fmt =_ctloc(t_fmt); 150 goto recurse; 151 152 case 'x': /* The date, using the locale's format. */ 153 new_fmt =_ctloc(d_fmt); 154 recurse: 155 bp = (const u_char *)strptime((const char *)bp, 156 new_fmt, tm); 157 LEGAL_ALT(ALT_E); 158 continue; 159 160 /* 161 * "Elementary" conversion rules. 162 */ 163 case 'A': /* The day of week, using the locale's form. */ 164 case 'a': 165 bp = find_string(bp, &tm->tm_wday, _ctloc(day), 166 _ctloc(abday), 7); 167 LEGAL_ALT(0); 168 continue; 169 170 case 'B': /* The month, using the locale's form. */ 171 case 'b': 172 case 'h': 173 bp = find_string(bp, &tm->tm_mon, _ctloc(mon), 174 _ctloc(abmon), 12); 175 LEGAL_ALT(0); 176 continue; 177 178 case 'C': /* The century number. */ 179 i = 20; 180 bp = conv_num(bp, &i, 0, 99); 181 182 i = i * 100 - TM_YEAR_BASE; 183 if (split_year) 184 i += tm->tm_year % 100; 185 split_year = 1; 186 tm->tm_year = i; 187 LEGAL_ALT(ALT_E); 188 continue; 189 190 case 'd': /* The day of month. */ 191 case 'e': 192 bp = conv_num(bp, &tm->tm_mday, 1, 31); 193 LEGAL_ALT(ALT_O); 194 continue; 195 196 case 'k': /* The hour (24-hour clock representation). */ 197 LEGAL_ALT(0); 198 /* FALLTHROUGH */ 199 case 'H': 200 bp = conv_num(bp, &tm->tm_hour, 0, 23); 201 LEGAL_ALT(ALT_O); 202 continue; 203 204 case 'l': /* The hour (12-hour clock representation). */ 205 LEGAL_ALT(0); 206 /* FALLTHROUGH */ 207 case 'I': 208 bp = conv_num(bp, &tm->tm_hour, 1, 12); 209 if (tm->tm_hour == 12) 210 tm->tm_hour = 0; 211 LEGAL_ALT(ALT_O); 212 continue; 213 214 case 'j': /* The day of year. */ 215 i = 1; 216 bp = conv_num(bp, &i, 1, 366); 217 tm->tm_yday = i - 1; 218 LEGAL_ALT(0); 219 continue; 220 221 case 'M': /* The minute. */ 222 bp = conv_num(bp, &tm->tm_min, 0, 59); 223 LEGAL_ALT(ALT_O); 224 continue; 225 226 case 'm': /* The month. */ 227 i = 1; 228 bp = conv_num(bp, &i, 1, 12); 229 tm->tm_mon = i - 1; 230 LEGAL_ALT(ALT_O); 231 continue; 232 233 case 'p': /* The locale's equivalent of AM/PM. */ 234 bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2); 235 if (tm->tm_hour > 11) 236 return NULL; 237 tm->tm_hour += i * 12; 238 LEGAL_ALT(0); 239 continue; 240 241 case 'S': /* The seconds. */ 242 bp = conv_num(bp, &tm->tm_sec, 0, 61); 243 LEGAL_ALT(ALT_O); 244 continue; 245 246 case 'U': /* The week of year, beginning on sunday. */ 247 case 'W': /* The week of year, beginning on monday. */ 248 /* 249 * XXX This is bogus, as we can not assume any valid 250 * information present in the tm structure at this 251 * point to calculate a real value, so just check the 252 * range for now. 253 */ 254 bp = conv_num(bp, &i, 0, 53); 255 LEGAL_ALT(ALT_O); 256 continue; 257 258 case 'w': /* The day of week, beginning on sunday. */ 259 bp = conv_num(bp, &tm->tm_wday, 0, 6); 260 LEGAL_ALT(ALT_O); 261 continue; 262 263 case 'u': /* The day of week, monday = 1. */ 264 bp = conv_num(bp, &i, 1, 7); 265 tm->tm_wday = i % 7; 266 LEGAL_ALT(ALT_O); 267 continue; 268 269 case 'g': /* The year corresponding to the ISO week 270 * number but without the century. 271 */ 272 bp = conv_num(bp, &i, 0, 99); 273 continue; 274 275 case 'G': /* The year corresponding to the ISO week 276 * number with century. 277 */ 278 do 279 bp++; 280 while (isdigit(*bp)); 281 continue; 282 283 case 'V': /* The ISO 8601:1988 week number as decimal */ 284 bp = conv_num(bp, &i, 0, 53); 285 continue; 286 287 case 'Y': /* The year. */ 288 i = TM_YEAR_BASE; /* just for data sanity... */ 289 bp = conv_num(bp, &i, 0, 9999); 290 tm->tm_year = i - TM_YEAR_BASE; 291 LEGAL_ALT(ALT_E); 292 continue; 293 294 case 'y': /* The year within 100 years of the epoch. */ 295 /* LEGAL_ALT(ALT_E | ALT_O); */ 296 bp = conv_num(bp, &i, 0, 99); 297 298 if (split_year) 299 /* preserve century */ 300 i += (tm->tm_year / 100) * 100; 301 else { 302 split_year = 1; 303 if (i <= 68) 304 i = i + 2000 - TM_YEAR_BASE; 305 else 306 i = i + 1900 - TM_YEAR_BASE; 307 } 308 tm->tm_year = i; 309 continue; 310 311 case 'Z': 312 tzset(); 313 if (strncmp((const char *)bp, gmt, 3) == 0) { 314 tm->tm_isdst = 0; 315 #ifdef TM_GMTOFF 316 tm->TM_GMTOFF = 0; 317 #endif 318 #ifdef TM_ZONE 319 tm->TM_ZONE = gmt; 320 #endif 321 bp += 3; 322 } else { 323 const unsigned char *ep; 324 325 ep = find_string(bp, &i, 326 (const char * const *)tzname, 327 NULL, 2); 328 if (ep != NULL) { 329 tm->tm_isdst = i; 330 #ifdef TM_GMTOFF 331 tm->TM_GMTOFF = -(timezone); 332 #endif 333 #ifdef TM_ZONE 334 tm->TM_ZONE = tzname[i]; 335 #endif 336 } 337 bp = ep; 338 } 339 continue; 340 341 case 'z': 342 /* 343 * We recognize all ISO 8601 formats: 344 * Z = Zulu time/UTC 345 * [+-]hhmm 346 * [+-]hh:mm 347 * [+-]hh 348 */ 349 while (isspace(*bp)) 350 bp++; 351 352 switch (*bp++) { 353 case 'Z': 354 tm->tm_isdst = 0; 355 #ifdef TM_GMTOFF 356 tm->TM_GMTOFF = 0; 357 #endif 358 #ifdef TM_ZONE 359 tm->TM_ZONE = utc; 360 #endif 361 continue; 362 case '+': 363 neg = 0; 364 break; 365 case '-': 366 neg = 1; 367 break; 368 default: 369 return NULL; 370 } 371 offs = 0; 372 for (i = 0; i < 4; ) { 373 if (isdigit(*bp)) { 374 offs = offs * 10 + (*bp++ - '0'); 375 i++; 376 continue; 377 } 378 if (i == 2 && *bp == ':') { 379 bp++; 380 continue; 381 } 382 break; 383 } 384 switch (i) { 385 case 2: 386 offs *= 100; 387 break; 388 case 4: 389 i = offs % 100; 390 if (i >= 60) 391 return NULL; 392 /* Convert minutes into decimal */ 393 offs = (offs / 100) * 100 + (i * 50) / 30; 394 break; 395 default: 396 return NULL; 397 } 398 if (neg) 399 offs = -offs; 400 tm->tm_isdst = 0; /* XXX */ 401 #ifdef TM_GMTOFF 402 tm->TM_GMTOFF = offs; 403 #endif 404 #ifdef TM_ZONE 405 tm->TM_ZONE = NULL; /* XXX */ 406 #endif 407 continue; 408 409 /* 410 * Miscellaneous conversions. 411 */ 412 case 'n': /* Any kind of white-space. */ 413 case 't': 414 while (isspace(*bp)) 415 bp++; 416 LEGAL_ALT(0); 417 continue; 418 419 420 default: /* Unknown/unsupported conversion. */ 421 return NULL; 422 } 423 } 424 425 return __UNCONST(bp); 426 } 427 428 429 static const u_char * 430 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim) 431 { 432 uint result = 0; 433 unsigned char ch; 434 435 /* The limit also determines the number of valid digits. */ 436 uint rulim = ulim; 437 438 ch = *buf; 439 if (ch < '0' || ch > '9') 440 return NULL; 441 442 do { 443 result *= 10; 444 result += ch - '0'; 445 rulim /= 10; 446 ch = *++buf; 447 } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9'); 448 449 if (result < llim || result > ulim) 450 return NULL; 451 452 *dest = result; 453 return buf; 454 } 455 456 static const u_char * 457 find_string(const u_char *bp, int *tgt, const char * const *n1, 458 const char * const *n2, int c) 459 { 460 int i; 461 unsigned int len; 462 463 /* check full name - then abbreviated ones */ 464 for (; n1 != NULL; n1 = n2, n2 = NULL) { 465 for (i = 0; i < c; i++, n1++) { 466 len = strlen(*n1); 467 if (strncasecmp(*n1, (const char *)bp, len) == 0) { 468 *tgt = i; 469 return bp + len; 470 } 471 } 472 } 473 474 /* Nothing matched */ 475 return NULL; 476 } 477