1 /* $OpenBSD: strptime.c,v 1.24 2019/01/22 11:09:03 cheloha Exp $ */ 2 /* $NetBSD: strptime.c,v 1.12 1998/01/20 21:39:40 mycroft Exp $ */ 3 /*- 4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code was contributed to The NetBSD Foundation by Klaus Klein. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <ctype.h> 32 #include <locale.h> 33 #include <stdint.h> 34 #include <string.h> 35 #include <time.h> 36 37 #include "localedef.h" 38 #include "private.h" 39 #include "tzfile.h" 40 41 #define _ctloc(x) (_CurrentTimeLocale->x) 42 43 /* 44 * We do not implement alternate representations. However, we always 45 * check whether a given modifier is allowed for a certain conversion. 46 */ 47 #define _ALT_E 0x01 48 #define _ALT_O 0x02 49 #define _LEGAL_ALT(x) { if (alt_format & ~(x)) return (0); } 50 51 /* 52 * We keep track of some of the fields we set in order to compute missing ones. 53 */ 54 #define FIELD_TM_MON (1 << 0) 55 #define FIELD_TM_MDAY (1 << 1) 56 #define FIELD_TM_WDAY (1 << 2) 57 #define FIELD_TM_YDAY (1 << 3) 58 #define FIELD_TM_YEAR (1 << 4) 59 60 static char gmt[] = { "GMT" }; 61 static char utc[] = { "UTC" }; 62 /* RFC-822/RFC-2822 */ 63 static const char * const nast[5] = { 64 "EST", "CST", "MST", "PST", "\0\0\0" 65 }; 66 static const char * const nadt[5] = { 67 "EDT", "CDT", "MDT", "PDT", "\0\0\0" 68 }; 69 70 static const int mon_lengths[2][MONSPERYEAR] = { 71 { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }, 72 { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 } 73 }; 74 75 static int _conv_num64(const unsigned char **, int64_t *, int64_t, int64_t); 76 static int _conv_num(const unsigned char **, int *, int, int); 77 static int leaps_thru_end_of(const int y); 78 static char *_strptime(const char *, const char *, struct tm *, int); 79 static const u_char *_find_string(const u_char *, int *, const char * const *, 80 const char * const *, int); 81 82 83 char * 84 strptime(const char *buf, const char *fmt, struct tm *tm) 85 { 86 return(_strptime(buf, fmt, tm, 1)); 87 } 88 DEF_WEAK(strptime); 89 90 static char * 91 _strptime(const char *buf, const char *fmt, struct tm *tm, int initialize) 92 { 93 unsigned char c; 94 const unsigned char *bp, *ep; 95 size_t len; 96 int alt_format, i, offs; 97 int neg = 0; 98 static int century, relyear, fields; 99 100 if (initialize) { 101 century = TM_YEAR_BASE; 102 relyear = -1; 103 fields = 0; 104 } 105 106 bp = (const unsigned char *)buf; 107 while ((c = *fmt) != '\0') { 108 /* Clear `alternate' modifier prior to new conversion. */ 109 alt_format = 0; 110 111 /* Eat up white-space. */ 112 if (isspace(c)) { 113 while (isspace(*bp)) 114 bp++; 115 116 fmt++; 117 continue; 118 } 119 120 if ((c = *fmt++) != '%') 121 goto literal; 122 123 124 again: switch (c = *fmt++) { 125 case '%': /* "%%" is converted to "%". */ 126 literal: 127 if (c != *bp++) 128 return (NULL); 129 130 break; 131 132 /* 133 * "Alternative" modifiers. Just set the appropriate flag 134 * and start over again. 135 */ 136 case 'E': /* "%E?" alternative conversion modifier. */ 137 _LEGAL_ALT(0); 138 alt_format |= _ALT_E; 139 goto again; 140 141 case 'O': /* "%O?" alternative conversion modifier. */ 142 _LEGAL_ALT(0); 143 alt_format |= _ALT_O; 144 goto again; 145 146 /* 147 * "Complex" conversion rules, implemented through recursion. 148 */ 149 case 'c': /* Date and time, using the locale's format. */ 150 _LEGAL_ALT(_ALT_E); 151 if (!(bp = _strptime(bp, _ctloc(d_t_fmt), tm, 0))) 152 return (NULL); 153 break; 154 155 case 'D': /* The date as "%m/%d/%y". */ 156 _LEGAL_ALT(0); 157 if (!(bp = _strptime(bp, "%m/%d/%y", tm, 0))) 158 return (NULL); 159 break; 160 161 case 'F': /* The date as "%Y-%m-%d". */ 162 _LEGAL_ALT(0); 163 if (!(bp = _strptime(bp, "%Y-%m-%d", tm, 0))) 164 return (NULL); 165 continue; 166 167 case 'R': /* The time as "%H:%M". */ 168 _LEGAL_ALT(0); 169 if (!(bp = _strptime(bp, "%H:%M", tm, 0))) 170 return (NULL); 171 break; 172 173 case 'r': /* The time as "%I:%M:%S %p". */ 174 _LEGAL_ALT(0); 175 if (!(bp = _strptime(bp, "%I:%M:%S %p", tm, 0))) 176 return (NULL); 177 break; 178 179 case 'T': /* The time as "%H:%M:%S". */ 180 _LEGAL_ALT(0); 181 if (!(bp = _strptime(bp, "%H:%M:%S", tm, 0))) 182 return (NULL); 183 break; 184 185 case 'X': /* The time, using the locale's format. */ 186 _LEGAL_ALT(_ALT_E); 187 if (!(bp = _strptime(bp, _ctloc(t_fmt), tm, 0))) 188 return (NULL); 189 break; 190 191 case 'x': /* The date, using the locale's format. */ 192 _LEGAL_ALT(_ALT_E); 193 if (!(bp = _strptime(bp, _ctloc(d_fmt), tm, 0))) 194 return (NULL); 195 break; 196 197 /* 198 * "Elementary" conversion rules. 199 */ 200 case 'A': /* The day of week, using the locale's form. */ 201 case 'a': 202 _LEGAL_ALT(0); 203 for (i = 0; i < 7; i++) { 204 /* Full name. */ 205 len = strlen(_ctloc(day[i])); 206 if (strncasecmp(_ctloc(day[i]), bp, len) == 0) 207 break; 208 209 /* Abbreviated name. */ 210 len = strlen(_ctloc(abday[i])); 211 if (strncasecmp(_ctloc(abday[i]), bp, len) == 0) 212 break; 213 } 214 215 /* Nothing matched. */ 216 if (i == 7) 217 return (NULL); 218 219 tm->tm_wday = i; 220 bp += len; 221 fields |= FIELD_TM_WDAY; 222 break; 223 224 case 'B': /* The month, using the locale's form. */ 225 case 'b': 226 case 'h': 227 _LEGAL_ALT(0); 228 for (i = 0; i < 12; i++) { 229 /* Full name. */ 230 len = strlen(_ctloc(mon[i])); 231 if (strncasecmp(_ctloc(mon[i]), bp, len) == 0) 232 break; 233 234 /* Abbreviated name. */ 235 len = strlen(_ctloc(abmon[i])); 236 if (strncasecmp(_ctloc(abmon[i]), bp, len) == 0) 237 break; 238 } 239 240 /* Nothing matched. */ 241 if (i == 12) 242 return (NULL); 243 244 tm->tm_mon = i; 245 bp += len; 246 fields |= FIELD_TM_MON; 247 break; 248 249 case 'C': /* The century number. */ 250 _LEGAL_ALT(_ALT_E); 251 if (!(_conv_num(&bp, &i, 0, 99))) 252 return (NULL); 253 254 century = i * 100; 255 break; 256 257 case 'd': /* The day of month. */ 258 case 'e': 259 _LEGAL_ALT(_ALT_O); 260 if (!(_conv_num(&bp, &tm->tm_mday, 1, 31))) 261 return (NULL); 262 fields |= FIELD_TM_MDAY; 263 break; 264 265 case 'k': /* The hour (24-hour clock representation). */ 266 _LEGAL_ALT(0); 267 /* FALLTHROUGH */ 268 case 'H': 269 _LEGAL_ALT(_ALT_O); 270 if (!(_conv_num(&bp, &tm->tm_hour, 0, 23))) 271 return (NULL); 272 break; 273 274 case 'l': /* The hour (12-hour clock representation). */ 275 _LEGAL_ALT(0); 276 /* FALLTHROUGH */ 277 case 'I': 278 _LEGAL_ALT(_ALT_O); 279 if (!(_conv_num(&bp, &tm->tm_hour, 1, 12))) 280 return (NULL); 281 break; 282 283 case 'j': /* The day of year. */ 284 _LEGAL_ALT(0); 285 if (!(_conv_num(&bp, &tm->tm_yday, 1, 366))) 286 return (NULL); 287 tm->tm_yday--; 288 fields |= FIELD_TM_YDAY; 289 break; 290 291 case 'M': /* The minute. */ 292 _LEGAL_ALT(_ALT_O); 293 if (!(_conv_num(&bp, &tm->tm_min, 0, 59))) 294 return (NULL); 295 break; 296 297 case 'm': /* The month. */ 298 _LEGAL_ALT(_ALT_O); 299 if (!(_conv_num(&bp, &tm->tm_mon, 1, 12))) 300 return (NULL); 301 tm->tm_mon--; 302 fields |= FIELD_TM_MON; 303 break; 304 305 case 'p': /* The locale's equivalent of AM/PM. */ 306 _LEGAL_ALT(0); 307 /* AM? */ 308 len = strlen(_ctloc(am_pm[0])); 309 if (strncasecmp(_ctloc(am_pm[0]), bp, len) == 0) { 310 if (tm->tm_hour > 12) /* i.e., 13:00 AM ?! */ 311 return (NULL); 312 else if (tm->tm_hour == 12) 313 tm->tm_hour = 0; 314 315 bp += len; 316 break; 317 } 318 /* PM? */ 319 len = strlen(_ctloc(am_pm[1])); 320 if (strncasecmp(_ctloc(am_pm[1]), bp, len) == 0) { 321 if (tm->tm_hour > 12) /* i.e., 13:00 PM ?! */ 322 return (NULL); 323 else if (tm->tm_hour < 12) 324 tm->tm_hour += 12; 325 326 bp += len; 327 break; 328 } 329 330 /* Nothing matched. */ 331 return (NULL); 332 333 case 'S': /* The seconds. */ 334 _LEGAL_ALT(_ALT_O); 335 if (!(_conv_num(&bp, &tm->tm_sec, 0, 60))) 336 return (NULL); 337 break; 338 case 's': /* Seconds since epoch */ 339 { 340 int64_t i64; 341 if (!(_conv_num64(&bp, &i64, 0, INT64_MAX))) 342 return (NULL); 343 if (!gmtime_r(&i64, tm)) 344 return (NULL); 345 fields = 0xffff; /* everything */ 346 } 347 break; 348 case 'U': /* The week of year, beginning on sunday. */ 349 case 'W': /* The week of year, beginning on monday. */ 350 _LEGAL_ALT(_ALT_O); 351 /* 352 * XXX This is bogus, as we can not assume any valid 353 * information present in the tm structure at this 354 * point to calculate a real value, so just check the 355 * range for now. 356 */ 357 if (!(_conv_num(&bp, &i, 0, 53))) 358 return (NULL); 359 break; 360 361 case 'w': /* The day of week, beginning on sunday. */ 362 _LEGAL_ALT(_ALT_O); 363 if (!(_conv_num(&bp, &tm->tm_wday, 0, 6))) 364 return (NULL); 365 fields |= FIELD_TM_WDAY; 366 break; 367 368 case 'u': /* The day of week, monday = 1. */ 369 _LEGAL_ALT(_ALT_O); 370 if (!(_conv_num(&bp, &i, 1, 7))) 371 return (NULL); 372 tm->tm_wday = i % 7; 373 fields |= FIELD_TM_WDAY; 374 continue; 375 376 case 'g': /* The year corresponding to the ISO week 377 * number but without the century. 378 */ 379 if (!(_conv_num(&bp, &i, 0, 99))) 380 return (NULL); 381 continue; 382 383 case 'G': /* The year corresponding to the ISO week 384 * number with century. 385 */ 386 do 387 bp++; 388 while (isdigit(*bp)); 389 continue; 390 391 case 'V': /* The ISO 8601:1988 week number as decimal */ 392 if (!(_conv_num(&bp, &i, 0, 53))) 393 return (NULL); 394 continue; 395 396 case 'Y': /* The year. */ 397 _LEGAL_ALT(_ALT_E); 398 if (!(_conv_num(&bp, &i, 0, 9999))) 399 return (NULL); 400 401 relyear = -1; 402 tm->tm_year = i - TM_YEAR_BASE; 403 fields |= FIELD_TM_YEAR; 404 break; 405 406 case 'y': /* The year within the century (2 digits). */ 407 _LEGAL_ALT(_ALT_E | _ALT_O); 408 if (!(_conv_num(&bp, &relyear, 0, 99))) 409 return (NULL); 410 break; 411 412 case 'Z': 413 tzset(); 414 if (strncmp((const char *)bp, gmt, 3) == 0) { 415 tm->tm_isdst = 0; 416 #ifdef TM_GMTOFF 417 tm->TM_GMTOFF = 0; 418 #endif 419 #ifdef TM_ZONE 420 tm->TM_ZONE = gmt; 421 #endif 422 bp += 3; 423 } else if (strncmp((const char *)bp, utc, 3) == 0) { 424 tm->tm_isdst = 0; 425 #ifdef TM_GMTOFF 426 tm->TM_GMTOFF = 0; 427 #endif 428 #ifdef TM_ZONE 429 tm->TM_ZONE = utc; 430 #endif 431 bp += 3; 432 } else { 433 ep = _find_string(bp, &i, 434 (const char * const *)tzname, 435 NULL, 2); 436 if (ep == NULL) 437 return (NULL); 438 439 tm->tm_isdst = i; 440 #ifdef TM_GMTOFF 441 tm->TM_GMTOFF = -(timezone); 442 #endif 443 #ifdef TM_ZONE 444 tm->TM_ZONE = tzname[i]; 445 #endif 446 bp = ep; 447 } 448 continue; 449 450 case 'z': 451 /* 452 * We recognize all ISO 8601 formats: 453 * Z = Zulu time/UTC 454 * [+-]hhmm 455 * [+-]hh:mm 456 * [+-]hh 457 * We recognize all RFC-822/RFC-2822 formats: 458 * UT|GMT 459 * North American : UTC offsets 460 * E[DS]T = Eastern : -4 | -5 461 * C[DS]T = Central : -5 | -6 462 * M[DS]T = Mountain: -6 | -7 463 * P[DS]T = Pacific : -7 | -8 464 * Military 465 * [A-IL-M] = -1 ... -9 (J not used) 466 * [N-Y] = +1 ... +12 467 */ 468 while (isspace(*bp)) 469 bp++; 470 471 switch (*bp++) { 472 case 'G': 473 if (*bp++ != 'M') 474 return NULL; 475 /*FALLTHROUGH*/ 476 case 'U': 477 if (*bp++ != 'T') 478 return NULL; 479 /*FALLTHROUGH*/ 480 case 'Z': 481 tm->tm_isdst = 0; 482 #ifdef TM_GMTOFF 483 tm->TM_GMTOFF = 0; 484 #endif 485 #ifdef TM_ZONE 486 tm->TM_ZONE = utc; 487 #endif 488 continue; 489 case '+': 490 neg = 0; 491 break; 492 case '-': 493 neg = 1; 494 break; 495 default: 496 --bp; 497 ep = _find_string(bp, &i, nast, NULL, 4); 498 if (ep != NULL) { 499 #ifdef TM_GMTOFF 500 tm->TM_GMTOFF = -5 - i; 501 #endif 502 #ifdef TM_ZONE 503 tm->TM_ZONE = (char *)nast[i]; 504 #endif 505 bp = ep; 506 continue; 507 } 508 ep = _find_string(bp, &i, nadt, NULL, 4); 509 if (ep != NULL) { 510 tm->tm_isdst = 1; 511 #ifdef TM_GMTOFF 512 tm->TM_GMTOFF = -4 - i; 513 #endif 514 #ifdef TM_ZONE 515 tm->TM_ZONE = (char *)nadt[i]; 516 #endif 517 bp = ep; 518 continue; 519 } 520 521 if ((*bp >= 'A' && *bp <= 'I') || 522 (*bp >= 'L' && *bp <= 'Y')) { 523 #ifdef TM_GMTOFF 524 /* Argh! No 'J'! */ 525 if (*bp >= 'A' && *bp <= 'I') 526 tm->TM_GMTOFF = 527 ('A' - 1) - (int)*bp; 528 else if (*bp >= 'L' && *bp <= 'M') 529 tm->TM_GMTOFF = 'A' - (int)*bp; 530 else if (*bp >= 'N' && *bp <= 'Y') 531 tm->TM_GMTOFF = (int)*bp - 'M'; 532 #endif 533 #ifdef TM_ZONE 534 tm->TM_ZONE = NULL; /* XXX */ 535 #endif 536 bp++; 537 continue; 538 } 539 return NULL; 540 } 541 offs = 0; 542 for (i = 0; i < 4; ) { 543 if (isdigit(*bp)) { 544 offs = offs * 10 + (*bp++ - '0'); 545 i++; 546 continue; 547 } 548 if (i == 2 && *bp == ':') { 549 bp++; 550 continue; 551 } 552 break; 553 } 554 switch (i) { 555 case 2: 556 offs *= 100; 557 break; 558 case 4: 559 i = offs % 100; 560 if (i >= 60) 561 return NULL; 562 /* Convert minutes into decimal */ 563 offs = (offs / 100) * 100 + (i * 50) / 30; 564 break; 565 default: 566 return NULL; 567 } 568 if (neg) 569 offs = -offs; 570 tm->tm_isdst = 0; /* XXX */ 571 #ifdef TM_GMTOFF 572 tm->TM_GMTOFF = offs; 573 #endif 574 #ifdef TM_ZONE 575 tm->TM_ZONE = NULL; /* XXX */ 576 #endif 577 continue; 578 579 /* 580 * Miscellaneous conversions. 581 */ 582 case 'n': /* Any kind of white-space. */ 583 case 't': 584 _LEGAL_ALT(0); 585 while (isspace(*bp)) 586 bp++; 587 break; 588 589 590 default: /* Unknown/unsupported conversion. */ 591 return (NULL); 592 } 593 594 595 } 596 597 /* 598 * We need to evaluate the two digit year spec (%y) 599 * last as we can get a century spec (%C) at any time. 600 */ 601 if (relyear != -1) { 602 if (century == TM_YEAR_BASE) { 603 if (relyear <= 68) 604 tm->tm_year = relyear + 2000 - TM_YEAR_BASE; 605 else 606 tm->tm_year = relyear + 1900 - TM_YEAR_BASE; 607 } else { 608 tm->tm_year = relyear + century - TM_YEAR_BASE; 609 } 610 fields |= FIELD_TM_YEAR; 611 } 612 613 /* Compute some missing values when possible. */ 614 if (fields & FIELD_TM_YEAR) { 615 const int year = tm->tm_year + TM_YEAR_BASE; 616 const int *mon_lens = mon_lengths[isleap(year)]; 617 if (!(fields & FIELD_TM_YDAY) && 618 (fields & FIELD_TM_MON) && (fields & FIELD_TM_MDAY)) { 619 tm->tm_yday = tm->tm_mday - 1; 620 for (i = 0; i < tm->tm_mon; i++) 621 tm->tm_yday += mon_lens[i]; 622 fields |= FIELD_TM_YDAY; 623 } 624 if (fields & FIELD_TM_YDAY) { 625 int days = tm->tm_yday; 626 if (!(fields & FIELD_TM_WDAY)) { 627 tm->tm_wday = EPOCH_WDAY + 628 ((year - EPOCH_YEAR) % DAYSPERWEEK) * 629 (DAYSPERNYEAR % DAYSPERWEEK) + 630 leaps_thru_end_of(year - 1) - 631 leaps_thru_end_of(EPOCH_YEAR - 1) + 632 tm->tm_yday; 633 tm->tm_wday %= DAYSPERWEEK; 634 if (tm->tm_wday < 0) 635 tm->tm_wday += DAYSPERWEEK; 636 } 637 if (!(fields & FIELD_TM_MON)) { 638 tm->tm_mon = 0; 639 while (tm->tm_mon < MONSPERYEAR && days >= mon_lens[tm->tm_mon]) 640 days -= mon_lens[tm->tm_mon++]; 641 } 642 if (!(fields & FIELD_TM_MDAY)) 643 tm->tm_mday = days + 1; 644 } 645 } 646 647 return ((char *)bp); 648 } 649 650 651 static int 652 _conv_num(const unsigned char **buf, int *dest, int llim, int ulim) 653 { 654 int result = 0; 655 int rulim = ulim; 656 657 if (**buf < '0' || **buf > '9') 658 return (0); 659 660 /* we use rulim to break out of the loop when we run out of digits */ 661 do { 662 result *= 10; 663 result += *(*buf)++ - '0'; 664 rulim /= 10; 665 } while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9'); 666 667 if (result < llim || result > ulim) 668 return (0); 669 670 *dest = result; 671 return (1); 672 } 673 674 static int 675 _conv_num64(const unsigned char **buf, int64_t *dest, int64_t llim, int64_t ulim) 676 { 677 int result = 0; 678 int64_t rulim = ulim; 679 680 if (**buf < '0' || **buf > '9') 681 return (0); 682 683 /* we use rulim to break out of the loop when we run out of digits */ 684 do { 685 result *= 10; 686 result += *(*buf)++ - '0'; 687 rulim /= 10; 688 } while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9'); 689 690 if (result < llim || result > ulim) 691 return (0); 692 693 *dest = result; 694 return (1); 695 } 696 697 static const u_char * 698 _find_string(const u_char *bp, int *tgt, const char * const *n1, 699 const char * const *n2, int c) 700 { 701 int i; 702 unsigned int len; 703 704 /* check full name - then abbreviated ones */ 705 for (; n1 != NULL; n1 = n2, n2 = NULL) { 706 for (i = 0; i < c; i++, n1++) { 707 len = strlen(*n1); 708 if (strncasecmp(*n1, (const char *)bp, len) == 0) { 709 *tgt = i; 710 return bp + len; 711 } 712 } 713 } 714 715 /* Nothing matched */ 716 return NULL; 717 } 718 719 static int 720 leaps_thru_end_of(const int y) 721 { 722 return (y >= 0) ? (y / 4 - y / 100 + y / 400) : 723 -(leaps_thru_end_of(-(y + 1)) + 1); 724 } 725