1 /* $NetBSD: strptime.c,v 1.33 2009/05/24 02:25:43 ginsbach Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code was contributed to The NetBSD Foundation by Klaus Klein. 8 * Heavily optimised by David Laight 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #if defined(LIBC_SCCS) && !defined(lint) 34 __RCSID("$NetBSD: strptime.c,v 1.33 2009/05/24 02:25:43 ginsbach Exp $"); 35 #endif 36 37 #include "namespace.h" 38 #include <sys/localedef.h> 39 #include <ctype.h> 40 #include <locale.h> 41 #include <string.h> 42 #include <time.h> 43 #include <tzfile.h> 44 #include "private.h" 45 46 #ifdef __weak_alias 47 __weak_alias(strptime,_strptime) 48 #endif 49 50 #define _ctloc(x) (_CurrentTimeLocale->x) 51 52 /* 53 * We do not implement alternate representations. However, we always 54 * check whether a given modifier is allowed for a certain conversion. 55 */ 56 #define ALT_E 0x01 57 #define ALT_O 0x02 58 #define LEGAL_ALT(x) { if (alt_format & ~(x)) return NULL; } 59 60 static char gmt[] = { "GMT" }; 61 static char utc[] = { "UTC" }; 62 /* RFC-822/RFC-2822 */ 63 static const char * const nast[5] = { 64 "EST", "CST", "MST", "PST", "\0\0\0" 65 }; 66 static const char * const nadt[5] = { 67 "EDT", "CDT", "MDT", "PDT", "\0\0\0" 68 }; 69 70 static const u_char *conv_num(const unsigned char *, int *, uint, uint); 71 static const u_char *find_string(const u_char *, int *, const char * const *, 72 const char * const *, int); 73 74 75 char * 76 strptime(const char *buf, const char *fmt, struct tm *tm) 77 { 78 unsigned char c; 79 const unsigned char *bp, *ep; 80 int alt_format, i, split_year = 0, neg = 0, offs; 81 const char *new_fmt; 82 83 bp = (const u_char *)buf; 84 85 while (bp != NULL && (c = *fmt++) != '\0') { 86 /* Clear `alternate' modifier prior to new conversion. */ 87 alt_format = 0; 88 i = 0; 89 90 /* Eat up white-space. */ 91 if (isspace(c)) { 92 while (isspace(*bp)) 93 bp++; 94 continue; 95 } 96 97 if (c != '%') 98 goto literal; 99 100 101 again: switch (c = *fmt++) { 102 case '%': /* "%%" is converted to "%". */ 103 literal: 104 if (c != *bp++) 105 return NULL; 106 LEGAL_ALT(0); 107 continue; 108 109 /* 110 * "Alternative" modifiers. Just set the appropriate flag 111 * and start over again. 112 */ 113 case 'E': /* "%E?" alternative conversion modifier. */ 114 LEGAL_ALT(0); 115 alt_format |= ALT_E; 116 goto again; 117 118 case 'O': /* "%O?" alternative conversion modifier. */ 119 LEGAL_ALT(0); 120 alt_format |= ALT_O; 121 goto again; 122 123 /* 124 * "Complex" conversion rules, implemented through recursion. 125 */ 126 case 'c': /* Date and time, using the locale's format. */ 127 new_fmt = _ctloc(d_t_fmt); 128 goto recurse; 129 130 case 'D': /* The date as "%m/%d/%y". */ 131 new_fmt = "%m/%d/%y"; 132 LEGAL_ALT(0); 133 goto recurse; 134 135 case 'F': /* The date as "%Y-%m-%d". */ 136 new_fmt = "%Y-%m-%d"; 137 LEGAL_ALT(0); 138 goto recurse; 139 140 case 'R': /* The time as "%H:%M". */ 141 new_fmt = "%H:%M"; 142 LEGAL_ALT(0); 143 goto recurse; 144 145 case 'r': /* The time in 12-hour clock representation. */ 146 new_fmt =_ctloc(t_fmt_ampm); 147 LEGAL_ALT(0); 148 goto recurse; 149 150 case 'T': /* The time as "%H:%M:%S". */ 151 new_fmt = "%H:%M:%S"; 152 LEGAL_ALT(0); 153 goto recurse; 154 155 case 'X': /* The time, using the locale's format. */ 156 new_fmt =_ctloc(t_fmt); 157 goto recurse; 158 159 case 'x': /* The date, using the locale's format. */ 160 new_fmt =_ctloc(d_fmt); 161 recurse: 162 bp = (const u_char *)strptime((const char *)bp, 163 new_fmt, tm); 164 LEGAL_ALT(ALT_E); 165 continue; 166 167 /* 168 * "Elementary" conversion rules. 169 */ 170 case 'A': /* The day of week, using the locale's form. */ 171 case 'a': 172 bp = find_string(bp, &tm->tm_wday, _ctloc(day), 173 _ctloc(abday), 7); 174 LEGAL_ALT(0); 175 continue; 176 177 case 'B': /* The month, using the locale's form. */ 178 case 'b': 179 case 'h': 180 bp = find_string(bp, &tm->tm_mon, _ctloc(mon), 181 _ctloc(abmon), 12); 182 LEGAL_ALT(0); 183 continue; 184 185 case 'C': /* The century number. */ 186 i = 20; 187 bp = conv_num(bp, &i, 0, 99); 188 189 i = i * 100 - TM_YEAR_BASE; 190 if (split_year) 191 i += tm->tm_year % 100; 192 split_year = 1; 193 tm->tm_year = i; 194 LEGAL_ALT(ALT_E); 195 continue; 196 197 case 'd': /* The day of month. */ 198 case 'e': 199 bp = conv_num(bp, &tm->tm_mday, 1, 31); 200 LEGAL_ALT(ALT_O); 201 continue; 202 203 case 'k': /* The hour (24-hour clock representation). */ 204 LEGAL_ALT(0); 205 /* FALLTHROUGH */ 206 case 'H': 207 bp = conv_num(bp, &tm->tm_hour, 0, 23); 208 LEGAL_ALT(ALT_O); 209 continue; 210 211 case 'l': /* The hour (12-hour clock representation). */ 212 LEGAL_ALT(0); 213 /* FALLTHROUGH */ 214 case 'I': 215 bp = conv_num(bp, &tm->tm_hour, 1, 12); 216 if (tm->tm_hour == 12) 217 tm->tm_hour = 0; 218 LEGAL_ALT(ALT_O); 219 continue; 220 221 case 'j': /* The day of year. */ 222 i = 1; 223 bp = conv_num(bp, &i, 1, 366); 224 tm->tm_yday = i - 1; 225 LEGAL_ALT(0); 226 continue; 227 228 case 'M': /* The minute. */ 229 bp = conv_num(bp, &tm->tm_min, 0, 59); 230 LEGAL_ALT(ALT_O); 231 continue; 232 233 case 'm': /* The month. */ 234 i = 1; 235 bp = conv_num(bp, &i, 1, 12); 236 tm->tm_mon = i - 1; 237 LEGAL_ALT(ALT_O); 238 continue; 239 240 case 'p': /* The locale's equivalent of AM/PM. */ 241 bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2); 242 if (tm->tm_hour > 11) 243 return NULL; 244 tm->tm_hour += i * 12; 245 LEGAL_ALT(0); 246 continue; 247 248 case 'S': /* The seconds. */ 249 bp = conv_num(bp, &tm->tm_sec, 0, 61); 250 LEGAL_ALT(ALT_O); 251 continue; 252 253 #ifndef TIME_MAX 254 #define TIME_MAX INT64_MAX 255 #endif 256 case 's': /* seconds since the epoch */ 257 { 258 time_t sse = 0; 259 uint64_t rulim = TIME_MAX; 260 261 if (*bp < '0' || *bp > '9') { 262 bp = NULL; 263 continue; 264 } 265 266 do { 267 sse *= 10; 268 sse += *bp++ - '0'; 269 rulim /= 10; 270 } while ((sse * 10 <= TIME_MAX) && 271 rulim && *bp >= '0' && *bp <= '9'); 272 273 if (sse < 0 || (uint64_t)sse > TIME_MAX) { 274 bp = NULL; 275 continue; 276 } 277 278 if (localtime_r(&sse, tm) == NULL) 279 bp = NULL; 280 } 281 continue; 282 283 case 'U': /* The week of year, beginning on sunday. */ 284 case 'W': /* The week of year, beginning on monday. */ 285 /* 286 * XXX This is bogus, as we can not assume any valid 287 * information present in the tm structure at this 288 * point to calculate a real value, so just check the 289 * range for now. 290 */ 291 bp = conv_num(bp, &i, 0, 53); 292 LEGAL_ALT(ALT_O); 293 continue; 294 295 case 'w': /* The day of week, beginning on sunday. */ 296 bp = conv_num(bp, &tm->tm_wday, 0, 6); 297 LEGAL_ALT(ALT_O); 298 continue; 299 300 case 'u': /* The day of week, monday = 1. */ 301 bp = conv_num(bp, &i, 1, 7); 302 tm->tm_wday = i % 7; 303 LEGAL_ALT(ALT_O); 304 continue; 305 306 case 'g': /* The year corresponding to the ISO week 307 * number but without the century. 308 */ 309 bp = conv_num(bp, &i, 0, 99); 310 continue; 311 312 case 'G': /* The year corresponding to the ISO week 313 * number with century. 314 */ 315 do 316 bp++; 317 while (isdigit(*bp)); 318 continue; 319 320 case 'V': /* The ISO 8601:1988 week number as decimal */ 321 bp = conv_num(bp, &i, 0, 53); 322 continue; 323 324 case 'Y': /* The year. */ 325 i = TM_YEAR_BASE; /* just for data sanity... */ 326 bp = conv_num(bp, &i, 0, 9999); 327 tm->tm_year = i - TM_YEAR_BASE; 328 LEGAL_ALT(ALT_E); 329 continue; 330 331 case 'y': /* The year within 100 years of the epoch. */ 332 /* LEGAL_ALT(ALT_E | ALT_O); */ 333 bp = conv_num(bp, &i, 0, 99); 334 335 if (split_year) 336 /* preserve century */ 337 i += (tm->tm_year / 100) * 100; 338 else { 339 split_year = 1; 340 if (i <= 68) 341 i = i + 2000 - TM_YEAR_BASE; 342 else 343 i = i + 1900 - TM_YEAR_BASE; 344 } 345 tm->tm_year = i; 346 continue; 347 348 case 'Z': 349 tzset(); 350 if (strncmp((const char *)bp, gmt, 3) == 0) { 351 tm->tm_isdst = 0; 352 #ifdef TM_GMTOFF 353 tm->TM_GMTOFF = 0; 354 #endif 355 #ifdef TM_ZONE 356 tm->TM_ZONE = gmt; 357 #endif 358 bp += 3; 359 } else { 360 ep = find_string(bp, &i, 361 (const char * const *)tzname, 362 NULL, 2); 363 if (ep != NULL) { 364 tm->tm_isdst = i; 365 #ifdef TM_GMTOFF 366 tm->TM_GMTOFF = -(timezone); 367 #endif 368 #ifdef TM_ZONE 369 tm->TM_ZONE = tzname[i]; 370 #endif 371 } 372 bp = ep; 373 } 374 continue; 375 376 case 'z': 377 /* 378 * We recognize all ISO 8601 formats: 379 * Z = Zulu time/UTC 380 * [+-]hhmm 381 * [+-]hh:mm 382 * [+-]hh 383 * We recognize all RFC-822/RFC-2822 formats: 384 * UT|GMT 385 * North American : UTC offsets 386 * E[DS]T = Eastern : -4 | -5 387 * C[DS]T = Central : -5 | -6 388 * M[DS]T = Mountain: -6 | -7 389 * P[DS]T = Pacific : -7 | -8 390 * Military 391 * [A-IL-M] = -1 ... -9 (J not used) 392 * [N-Y] = +1 ... +12 393 */ 394 while (isspace(*bp)) 395 bp++; 396 397 switch (*bp++) { 398 case 'G': 399 if (*bp++ != 'M') 400 return NULL; 401 /*FALLTHROUGH*/ 402 case 'U': 403 if (*bp++ != 'T') 404 return NULL; 405 /*FALLTHROUGH*/ 406 case 'Z': 407 tm->tm_isdst = 0; 408 #ifdef TM_GMTOFF 409 tm->TM_GMTOFF = 0; 410 #endif 411 #ifdef TM_ZONE 412 tm->TM_ZONE = utc; 413 #endif 414 continue; 415 case '+': 416 neg = 0; 417 break; 418 case '-': 419 neg = 1; 420 break; 421 default: 422 --bp; 423 ep = find_string(bp, &i, nast, NULL, 4); 424 if (ep != NULL) { 425 #ifdef TM_GMTOFF 426 tm->TM_GMTOFF = -5 - i; 427 #endif 428 #ifdef TM_ZONE 429 tm->TM_ZONE = __UNCONST(nast[i]); 430 #endif 431 bp = ep; 432 continue; 433 } 434 ep = find_string(bp, &i, nadt, NULL, 4); 435 if (ep != NULL) { 436 tm->tm_isdst = 1; 437 #ifdef TM_GMTOFF 438 tm->TM_GMTOFF = -4 - i; 439 #endif 440 #ifdef TM_ZONE 441 tm->TM_ZONE = __UNCONST(nadt[i]); 442 #endif 443 bp = ep; 444 continue; 445 } 446 447 if ((*bp >= 'A' && *bp <= 'I') || 448 (*bp >= 'L' && *bp <= 'Y')) { 449 #ifdef TM_GMTOFF 450 /* Argh! No 'J'! */ 451 if (*bp >= 'A' && *bp <= 'I') 452 tm->TM_GMTOFF = 453 ('A' - 1) - (int)*bp; 454 else if (*bp >= 'L' && *bp <= 'M') 455 tm->TM_GMTOFF = 'A' - (int)*bp; 456 else if (*bp >= 'N' && *bp <= 'Y') 457 tm->TM_GMTOFF = (int)*bp - 'M'; 458 #endif 459 #ifdef TM_ZONE 460 tm->TM_ZONE = NULL; /* XXX */ 461 #endif 462 bp++; 463 continue; 464 } 465 return NULL; 466 } 467 offs = 0; 468 for (i = 0; i < 4; ) { 469 if (isdigit(*bp)) { 470 offs = offs * 10 + (*bp++ - '0'); 471 i++; 472 continue; 473 } 474 if (i == 2 && *bp == ':') { 475 bp++; 476 continue; 477 } 478 break; 479 } 480 switch (i) { 481 case 2: 482 offs *= 100; 483 break; 484 case 4: 485 i = offs % 100; 486 if (i >= 60) 487 return NULL; 488 /* Convert minutes into decimal */ 489 offs = (offs / 100) * 100 + (i * 50) / 30; 490 break; 491 default: 492 return NULL; 493 } 494 if (neg) 495 offs = -offs; 496 tm->tm_isdst = 0; /* XXX */ 497 #ifdef TM_GMTOFF 498 tm->TM_GMTOFF = offs; 499 #endif 500 #ifdef TM_ZONE 501 tm->TM_ZONE = NULL; /* XXX */ 502 #endif 503 continue; 504 505 /* 506 * Miscellaneous conversions. 507 */ 508 case 'n': /* Any kind of white-space. */ 509 case 't': 510 while (isspace(*bp)) 511 bp++; 512 LEGAL_ALT(0); 513 continue; 514 515 516 default: /* Unknown/unsupported conversion. */ 517 return NULL; 518 } 519 } 520 521 return __UNCONST(bp); 522 } 523 524 525 static const u_char * 526 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim) 527 { 528 uint result = 0; 529 unsigned char ch; 530 531 /* The limit also determines the number of valid digits. */ 532 uint rulim = ulim; 533 534 ch = *buf; 535 if (ch < '0' || ch > '9') 536 return NULL; 537 538 do { 539 result *= 10; 540 result += ch - '0'; 541 rulim /= 10; 542 ch = *++buf; 543 } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9'); 544 545 if (result < llim || result > ulim) 546 return NULL; 547 548 *dest = result; 549 return buf; 550 } 551 552 static const u_char * 553 find_string(const u_char *bp, int *tgt, const char * const *n1, 554 const char * const *n2, int c) 555 { 556 int i; 557 unsigned int len; 558 559 /* check full name - then abbreviated ones */ 560 for (; n1 != NULL; n1 = n2, n2 = NULL) { 561 for (i = 0; i < c; i++, n1++) { 562 len = strlen(*n1); 563 if (strncasecmp(*n1, (const char *)bp, len) == 0) { 564 *tgt = i; 565 return bp + len; 566 } 567 } 568 } 569 570 /* Nothing matched */ 571 return NULL; 572 } 573