1 /* $NetBSD: strptime.c,v 1.49 2015/10/09 17:21:45 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code was contributed to The NetBSD Foundation by Klaus Klein. 8 * Heavily optimised by David Laight 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #if defined(LIBC_SCCS) && !defined(lint) 34 __RCSID("$NetBSD: strptime.c,v 1.49 2015/10/09 17:21:45 christos Exp $"); 35 #endif 36 37 #include "namespace.h" 38 #include <sys/localedef.h> 39 #include <sys/types.h> 40 #include <ctype.h> 41 #include <locale.h> 42 #include <string.h> 43 #include <time.h> 44 #include <tzfile.h> 45 #include "private.h" 46 #include "setlocale_local.h" 47 48 #ifdef __weak_alias 49 __weak_alias(strptime,_strptime) 50 __weak_alias(strptime_l, _strptime_l) 51 #endif 52 53 static const u_char *conv_num(const unsigned char *, int *, uint, uint); 54 static const u_char *find_string(const u_char *, int *, const char * const *, 55 const char * const *, int); 56 57 #define _TIME_LOCALE(loc) \ 58 ((_TimeLocale *)((loc)->part_impl[(size_t)LC_TIME])) 59 60 /* 61 * We do not implement alternate representations. However, we always 62 * check whether a given modifier is allowed for a certain conversion. 63 */ 64 #define ALT_E 0x01 65 #define ALT_O 0x02 66 #define LEGAL_ALT(x) { if (alt_format & ~(x)) return NULL; } 67 68 #define S_YEAR (1 << 0) 69 #define S_MON (1 << 1) 70 #define S_YDAY (1 << 2) 71 #define S_MDAY (1 << 3) 72 #define S_WDAY (1 << 4) 73 #define S_HOUR (1 << 5) 74 75 #define HAVE_MDAY(s) (s & S_MDAY) 76 #define HAVE_MON(s) (s & S_MON) 77 #define HAVE_WDAY(s) (s & S_WDAY) 78 #define HAVE_YDAY(s) (s & S_YDAY) 79 #define HAVE_YEAR(s) (s & S_YEAR) 80 #define HAVE_HOUR(s) (s & S_HOUR) 81 82 static char gmt[] = { "GMT" }; 83 static char utc[] = { "UTC" }; 84 /* RFC-822/RFC-2822 */ 85 static const char * const nast[5] = { 86 "EST", "CST", "MST", "PST", "\0\0\0" 87 }; 88 static const char * const nadt[5] = { 89 "EDT", "CDT", "MDT", "PDT", "\0\0\0" 90 }; 91 92 /* 93 * Table to determine the ordinal date for the start of a month. 94 * Ref: http://en.wikipedia.org/wiki/ISO_week_date 95 */ 96 static const int start_of_month[2][13] = { 97 /* non-leap year */ 98 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, 99 /* leap year */ 100 { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } 101 }; 102 103 /* 104 * Calculate the week day of the first day of a year. Valid for 105 * the Gregorian calendar, which began Sept 14, 1752 in the UK 106 * and its colonies. Ref: 107 * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week 108 */ 109 110 static int 111 first_wday_of(int yr) 112 { 113 return ((2 * (3 - (yr / 100) % 4)) + (yr % 100) + ((yr % 100) / 4) + 114 (isleap(yr) ? 6 : 0) + 1) % 7; 115 } 116 117 char * 118 strptime(const char *buf, const char *fmt, struct tm *tm) 119 { 120 return strptime_l(buf, fmt, tm, _current_locale()); 121 } 122 123 char * 124 strptime_l(const char *buf, const char *fmt, struct tm *tm, locale_t loc) 125 { 126 unsigned char c; 127 const unsigned char *bp, *ep; 128 int alt_format, i, split_year = 0, neg = 0, state = 0, 129 day_offset = -1, week_offset = 0, offs; 130 const char *new_fmt; 131 132 bp = (const u_char *)buf; 133 134 while (bp != NULL && (c = *fmt++) != '\0') { 135 /* Clear `alternate' modifier prior to new conversion. */ 136 alt_format = 0; 137 i = 0; 138 139 /* Eat up white-space. */ 140 if (isspace(c)) { 141 while (isspace(*bp)) 142 bp++; 143 continue; 144 } 145 146 if (c != '%') 147 goto literal; 148 149 150 again: switch (c = *fmt++) { 151 case '%': /* "%%" is converted to "%". */ 152 literal: 153 if (c != *bp++) 154 return NULL; 155 LEGAL_ALT(0); 156 continue; 157 158 /* 159 * "Alternative" modifiers. Just set the appropriate flag 160 * and start over again. 161 */ 162 case 'E': /* "%E?" alternative conversion modifier. */ 163 LEGAL_ALT(0); 164 alt_format |= ALT_E; 165 goto again; 166 167 case 'O': /* "%O?" alternative conversion modifier. */ 168 LEGAL_ALT(0); 169 alt_format |= ALT_O; 170 goto again; 171 172 /* 173 * "Complex" conversion rules, implemented through recursion. 174 */ 175 case 'c': /* Date and time, using the locale's format. */ 176 new_fmt = _TIME_LOCALE(loc)->d_t_fmt; 177 state |= S_WDAY | S_MON | S_MDAY | S_YEAR; 178 goto recurse; 179 180 case 'D': /* The date as "%m/%d/%y". */ 181 new_fmt = "%m/%d/%y"; 182 LEGAL_ALT(0); 183 state |= S_MON | S_MDAY | S_YEAR; 184 goto recurse; 185 186 case 'F': /* The date as "%Y-%m-%d". */ 187 new_fmt = "%Y-%m-%d"; 188 LEGAL_ALT(0); 189 state |= S_MON | S_MDAY | S_YEAR; 190 goto recurse; 191 192 case 'R': /* The time as "%H:%M". */ 193 new_fmt = "%H:%M"; 194 LEGAL_ALT(0); 195 goto recurse; 196 197 case 'r': /* The time in 12-hour clock representation. */ 198 new_fmt = _TIME_LOCALE(loc)->t_fmt_ampm; 199 LEGAL_ALT(0); 200 goto recurse; 201 202 case 'T': /* The time as "%H:%M:%S". */ 203 new_fmt = "%H:%M:%S"; 204 LEGAL_ALT(0); 205 goto recurse; 206 207 case 'X': /* The time, using the locale's format. */ 208 new_fmt = _TIME_LOCALE(loc)->t_fmt; 209 goto recurse; 210 211 case 'x': /* The date, using the locale's format. */ 212 new_fmt = _TIME_LOCALE(loc)->d_fmt; 213 state |= S_MON | S_MDAY | S_YEAR; 214 recurse: 215 bp = (const u_char *)strptime((const char *)bp, 216 new_fmt, tm); 217 LEGAL_ALT(ALT_E); 218 continue; 219 220 /* 221 * "Elementary" conversion rules. 222 */ 223 case 'A': /* The day of week, using the locale's form. */ 224 case 'a': 225 bp = find_string(bp, &tm->tm_wday, 226 _TIME_LOCALE(loc)->day, _TIME_LOCALE(loc)->abday, 7); 227 LEGAL_ALT(0); 228 state |= S_WDAY; 229 continue; 230 231 case 'B': /* The month, using the locale's form. */ 232 case 'b': 233 case 'h': 234 bp = find_string(bp, &tm->tm_mon, 235 _TIME_LOCALE(loc)->mon, _TIME_LOCALE(loc)->abmon, 236 12); 237 LEGAL_ALT(0); 238 state |= S_MON; 239 continue; 240 241 case 'C': /* The century number. */ 242 i = 20; 243 bp = conv_num(bp, &i, 0, 99); 244 245 i = i * 100 - TM_YEAR_BASE; 246 if (split_year) 247 i += tm->tm_year % 100; 248 split_year = 1; 249 tm->tm_year = i; 250 LEGAL_ALT(ALT_E); 251 state |= S_YEAR; 252 continue; 253 254 case 'd': /* The day of month. */ 255 case 'e': 256 bp = conv_num(bp, &tm->tm_mday, 1, 31); 257 LEGAL_ALT(ALT_O); 258 state |= S_MDAY; 259 continue; 260 261 case 'k': /* The hour (24-hour clock representation). */ 262 LEGAL_ALT(0); 263 /* FALLTHROUGH */ 264 case 'H': 265 bp = conv_num(bp, &tm->tm_hour, 0, 23); 266 LEGAL_ALT(ALT_O); 267 state |= S_HOUR; 268 continue; 269 270 case 'l': /* The hour (12-hour clock representation). */ 271 LEGAL_ALT(0); 272 /* FALLTHROUGH */ 273 case 'I': 274 bp = conv_num(bp, &tm->tm_hour, 1, 12); 275 if (tm->tm_hour == 12) 276 tm->tm_hour = 0; 277 LEGAL_ALT(ALT_O); 278 state |= S_HOUR; 279 continue; 280 281 case 'j': /* The day of year. */ 282 i = 1; 283 bp = conv_num(bp, &i, 1, 366); 284 tm->tm_yday = i - 1; 285 LEGAL_ALT(0); 286 state |= S_YDAY; 287 continue; 288 289 case 'M': /* The minute. */ 290 bp = conv_num(bp, &tm->tm_min, 0, 59); 291 LEGAL_ALT(ALT_O); 292 continue; 293 294 case 'm': /* The month. */ 295 i = 1; 296 bp = conv_num(bp, &i, 1, 12); 297 tm->tm_mon = i - 1; 298 LEGAL_ALT(ALT_O); 299 state |= S_MON; 300 continue; 301 302 case 'p': /* The locale's equivalent of AM/PM. */ 303 bp = find_string(bp, &i, _TIME_LOCALE(loc)->am_pm, 304 NULL, 2); 305 if (HAVE_HOUR(state) && tm->tm_hour > 11) 306 return NULL; 307 tm->tm_hour += i * 12; 308 LEGAL_ALT(0); 309 continue; 310 311 case 'S': /* The seconds. */ 312 bp = conv_num(bp, &tm->tm_sec, 0, 61); 313 LEGAL_ALT(ALT_O); 314 continue; 315 316 #ifndef TIME_MAX 317 #define TIME_MAX INT64_MAX 318 #endif 319 case 's': /* seconds since the epoch */ 320 { 321 time_t sse = 0; 322 uint64_t rulim = TIME_MAX; 323 324 if (*bp < '0' || *bp > '9') { 325 bp = NULL; 326 continue; 327 } 328 329 do { 330 sse *= 10; 331 sse += *bp++ - '0'; 332 rulim /= 10; 333 } while ((sse * 10 <= TIME_MAX) && 334 rulim && *bp >= '0' && *bp <= '9'); 335 336 if (sse < 0 || (uint64_t)sse > TIME_MAX) { 337 bp = NULL; 338 continue; 339 } 340 341 if (localtime_r(&sse, tm) == NULL) 342 bp = NULL; 343 else 344 state |= S_YDAY | S_WDAY | 345 S_MON | S_MDAY | S_YEAR; 346 } 347 continue; 348 349 case 'U': /* The week of year, beginning on sunday. */ 350 case 'W': /* The week of year, beginning on monday. */ 351 /* 352 * XXX This is bogus, as we can not assume any valid 353 * information present in the tm structure at this 354 * point to calculate a real value, so just check the 355 * range for now. 356 */ 357 bp = conv_num(bp, &i, 0, 53); 358 LEGAL_ALT(ALT_O); 359 if (c == 'U') 360 day_offset = TM_SUNDAY; 361 else 362 day_offset = TM_MONDAY; 363 week_offset = i; 364 continue; 365 366 case 'w': /* The day of week, beginning on sunday. */ 367 bp = conv_num(bp, &tm->tm_wday, 0, 6); 368 LEGAL_ALT(ALT_O); 369 state |= S_WDAY; 370 continue; 371 372 case 'u': /* The day of week, monday = 1. */ 373 bp = conv_num(bp, &i, 1, 7); 374 tm->tm_wday = i % 7; 375 LEGAL_ALT(ALT_O); 376 state |= S_WDAY; 377 continue; 378 379 case 'g': /* The year corresponding to the ISO week 380 * number but without the century. 381 */ 382 bp = conv_num(bp, &i, 0, 99); 383 continue; 384 385 case 'G': /* The year corresponding to the ISO week 386 * number with century. 387 */ 388 do 389 bp++; 390 while (isdigit(*bp)); 391 continue; 392 393 case 'V': /* The ISO 8601:1988 week number as decimal */ 394 bp = conv_num(bp, &i, 0, 53); 395 continue; 396 397 case 'Y': /* The year. */ 398 i = TM_YEAR_BASE; /* just for data sanity... */ 399 bp = conv_num(bp, &i, 0, 9999); 400 tm->tm_year = i - TM_YEAR_BASE; 401 LEGAL_ALT(ALT_E); 402 state |= S_YEAR; 403 continue; 404 405 case 'y': /* The year within 100 years of the epoch. */ 406 /* LEGAL_ALT(ALT_E | ALT_O); */ 407 bp = conv_num(bp, &i, 0, 99); 408 409 if (split_year) 410 /* preserve century */ 411 i += (tm->tm_year / 100) * 100; 412 else { 413 split_year = 1; 414 if (i <= 68) 415 i = i + 2000 - TM_YEAR_BASE; 416 else 417 i = i + 1900 - TM_YEAR_BASE; 418 } 419 tm->tm_year = i; 420 state |= S_YEAR; 421 continue; 422 423 case 'Z': 424 tzset(); 425 if (strncmp((const char *)bp, gmt, 3) == 0 || 426 strncmp((const char *)bp, utc, 3) == 0) { 427 tm->tm_isdst = 0; 428 #ifdef TM_GMTOFF 429 tm->TM_GMTOFF = 0; 430 #endif 431 #ifdef TM_ZONE 432 tm->TM_ZONE = gmt; 433 #endif 434 bp += 3; 435 } else { 436 ep = find_string(bp, &i, 437 (const char * const *)tzname, 438 NULL, 2); 439 if (ep != NULL) { 440 tm->tm_isdst = i; 441 #ifdef TM_GMTOFF 442 tm->TM_GMTOFF = -(timezone); 443 #endif 444 #ifdef TM_ZONE 445 tm->TM_ZONE = tzname[i]; 446 #endif 447 } 448 bp = ep; 449 } 450 continue; 451 452 case 'z': 453 /* 454 * We recognize all ISO 8601 formats: 455 * Z = Zulu time/UTC 456 * [+-]hhmm 457 * [+-]hh:mm 458 * [+-]hh 459 * We recognize all RFC-822/RFC-2822 formats: 460 * UT|GMT 461 * North American : UTC offsets 462 * E[DS]T = Eastern : -4 | -5 463 * C[DS]T = Central : -5 | -6 464 * M[DS]T = Mountain: -6 | -7 465 * P[DS]T = Pacific : -7 | -8 466 * Military 467 * [A-IL-M] = -1 ... -9 (J not used) 468 * [N-Y] = +1 ... +12 469 */ 470 while (isspace(*bp)) 471 bp++; 472 473 switch (*bp++) { 474 case 'G': 475 if (*bp++ != 'M') 476 return NULL; 477 /*FALLTHROUGH*/ 478 case 'U': 479 if (*bp++ != 'T') 480 return NULL; 481 /*FALLTHROUGH*/ 482 case 'Z': 483 tm->tm_isdst = 0; 484 #ifdef TM_GMTOFF 485 tm->TM_GMTOFF = 0; 486 #endif 487 #ifdef TM_ZONE 488 tm->TM_ZONE = utc; 489 #endif 490 continue; 491 case '+': 492 neg = 0; 493 break; 494 case '-': 495 neg = 1; 496 break; 497 default: 498 --bp; 499 ep = find_string(bp, &i, nast, NULL, 4); 500 if (ep != NULL) { 501 #ifdef TM_GMTOFF 502 tm->TM_GMTOFF = -5 - i; 503 #endif 504 #ifdef TM_ZONE 505 tm->TM_ZONE = __UNCONST(nast[i]); 506 #endif 507 bp = ep; 508 continue; 509 } 510 ep = find_string(bp, &i, nadt, NULL, 4); 511 if (ep != NULL) { 512 tm->tm_isdst = 1; 513 #ifdef TM_GMTOFF 514 tm->TM_GMTOFF = -4 - i; 515 #endif 516 #ifdef TM_ZONE 517 tm->TM_ZONE = __UNCONST(nadt[i]); 518 #endif 519 bp = ep; 520 continue; 521 } 522 523 if ((*bp >= 'A' && *bp <= 'I') || 524 (*bp >= 'L' && *bp <= 'Y')) { 525 #ifdef TM_GMTOFF 526 /* Argh! No 'J'! */ 527 if (*bp >= 'A' && *bp <= 'I') 528 tm->TM_GMTOFF = 529 ('A' - 1) - (int)*bp; 530 else if (*bp >= 'L' && *bp <= 'M') 531 tm->TM_GMTOFF = 'A' - (int)*bp; 532 else if (*bp >= 'N' && *bp <= 'Y') 533 tm->TM_GMTOFF = (int)*bp - 'M'; 534 #endif 535 #ifdef TM_ZONE 536 tm->TM_ZONE = utc; /* XXX */ 537 #endif 538 bp++; 539 continue; 540 } 541 return NULL; 542 } 543 offs = 0; 544 for (i = 0; i < 4; ) { 545 if (isdigit(*bp)) { 546 offs = offs * 10 + (*bp++ - '0'); 547 i++; 548 continue; 549 } 550 if (i == 2 && *bp == ':') { 551 bp++; 552 continue; 553 } 554 break; 555 } 556 switch (i) { 557 case 2: 558 offs *= 100; 559 break; 560 case 4: 561 i = offs % 100; 562 if (i >= 60) 563 return NULL; 564 /* Convert minutes into decimal */ 565 offs = (offs / 100) * 100 + (i * 50) / 30; 566 break; 567 default: 568 return NULL; 569 } 570 if (neg) 571 offs = -offs; 572 tm->tm_isdst = 0; /* XXX */ 573 #ifdef TM_GMTOFF 574 tm->TM_GMTOFF = offs; 575 #endif 576 #ifdef TM_ZONE 577 tm->TM_ZONE = utc; /* XXX */ 578 #endif 579 continue; 580 581 /* 582 * Miscellaneous conversions. 583 */ 584 case 'n': /* Any kind of white-space. */ 585 case 't': 586 while (isspace(*bp)) 587 bp++; 588 LEGAL_ALT(0); 589 continue; 590 591 592 default: /* Unknown/unsupported conversion. */ 593 return NULL; 594 } 595 } 596 597 if (!HAVE_YDAY(state) && HAVE_YEAR(state)) { 598 if (HAVE_MON(state) && HAVE_MDAY(state)) { 599 /* calculate day of year (ordinal date) */ 600 tm->tm_yday = start_of_month[isleap_sum(tm->tm_year, 601 TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1); 602 state |= S_YDAY; 603 } else if (day_offset != -1) { 604 /* 605 * Set the date to the first Sunday (or Monday) 606 * of the specified week of the year. 607 */ 608 if (!HAVE_WDAY(state)) { 609 tm->tm_wday = day_offset; 610 state |= S_WDAY; 611 } 612 tm->tm_yday = (7 - 613 first_wday_of(tm->tm_year + TM_YEAR_BASE) + 614 day_offset) % 7 + (week_offset - 1) * 7 + 615 tm->tm_wday - day_offset; 616 state |= S_YDAY; 617 } 618 } 619 620 if (HAVE_YDAY(state) && HAVE_YEAR(state)) { 621 int isleap; 622 623 if (!HAVE_MON(state)) { 624 /* calculate month of day of year */ 625 i = 0; 626 isleap = isleap_sum(tm->tm_year, TM_YEAR_BASE); 627 while (tm->tm_yday >= start_of_month[isleap][i]) 628 i++; 629 if (i > 12) { 630 i = 1; 631 tm->tm_yday -= start_of_month[isleap][12]; 632 tm->tm_year++; 633 } 634 tm->tm_mon = i - 1; 635 state |= S_MON; 636 } 637 638 if (!HAVE_MDAY(state)) { 639 /* calculate day of month */ 640 isleap = isleap_sum(tm->tm_year, TM_YEAR_BASE); 641 tm->tm_mday = tm->tm_yday - 642 start_of_month[isleap][tm->tm_mon] + 1; 643 state |= S_MDAY; 644 } 645 646 if (!HAVE_WDAY(state)) { 647 /* calculate day of week */ 648 i = 0; 649 week_offset = first_wday_of(tm->tm_year); 650 while (i++ <= tm->tm_yday) { 651 if (week_offset++ >= 6) 652 week_offset = 0; 653 } 654 tm->tm_wday = week_offset; 655 state |= S_WDAY; 656 } 657 } 658 659 return __UNCONST(bp); 660 } 661 662 663 static const u_char * 664 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim) 665 { 666 uint result = 0; 667 unsigned char ch; 668 669 /* The limit also determines the number of valid digits. */ 670 uint rulim = ulim; 671 672 ch = *buf; 673 if (ch < '0' || ch > '9') 674 return NULL; 675 676 do { 677 result *= 10; 678 result += ch - '0'; 679 rulim /= 10; 680 ch = *++buf; 681 } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9'); 682 683 if (result < llim || result > ulim) 684 return NULL; 685 686 *dest = result; 687 return buf; 688 } 689 690 static const u_char * 691 find_string(const u_char *bp, int *tgt, const char * const *n1, 692 const char * const *n2, int c) 693 { 694 int i; 695 size_t len; 696 697 /* check full name - then abbreviated ones */ 698 for (; n1 != NULL; n1 = n2, n2 = NULL) { 699 for (i = 0; i < c; i++, n1++) { 700 len = strlen(*n1); 701 if (strncasecmp(*n1, (const char *)bp, len) == 0) { 702 *tgt = i; 703 return bp + len; 704 } 705 } 706 } 707 708 /* Nothing matched */ 709 return NULL; 710 } 711