1 /* $NetBSD: strptime.c,v 1.39 2015/04/06 14:38:22 ginsbach Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code was contributed to The NetBSD Foundation by Klaus Klein. 8 * Heavily optimised by David Laight 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #if defined(LIBC_SCCS) && !defined(lint) 34 __RCSID("$NetBSD: strptime.c,v 1.39 2015/04/06 14:38:22 ginsbach Exp $"); 35 #endif 36 37 #include "namespace.h" 38 #include <sys/localedef.h> 39 #include <ctype.h> 40 #include <locale.h> 41 #include <string.h> 42 #include <time.h> 43 #include <tzfile.h> 44 #include "private.h" 45 #include "setlocale_local.h" 46 47 #ifdef __weak_alias 48 __weak_alias(strptime,_strptime) 49 __weak_alias(strptime_l, _strptime_l) 50 #endif 51 52 #define _TIME_LOCALE(loc) \ 53 ((_TimeLocale *)((loc)->part_impl[(size_t)LC_TIME])) 54 55 /* 56 * We do not implement alternate representations. However, we always 57 * check whether a given modifier is allowed for a certain conversion. 58 */ 59 #define ALT_E 0x01 60 #define ALT_O 0x02 61 #define LEGAL_ALT(x) { if (alt_format & ~(x)) return NULL; } 62 63 static char gmt[] = { "GMT" }; 64 static char utc[] = { "UTC" }; 65 /* RFC-822/RFC-2822 */ 66 static const char * const nast[5] = { 67 "EST", "CST", "MST", "PST", "\0\0\0" 68 }; 69 static const char * const nadt[5] = { 70 "EDT", "CDT", "MDT", "PDT", "\0\0\0" 71 }; 72 73 static const u_char *conv_num(const unsigned char *, int *, uint, uint); 74 static const u_char *find_string(const u_char *, int *, const char * const *, 75 const char * const *, int); 76 77 char * 78 strptime(const char *buf, const char *fmt, struct tm *tm) 79 { 80 return strptime_l(buf, fmt, tm, _current_locale()); 81 } 82 83 char * 84 strptime_l(const char *buf, const char *fmt, struct tm *tm, locale_t loc) 85 { 86 unsigned char c; 87 const unsigned char *bp, *ep; 88 int alt_format, i, split_year = 0, neg = 0, offs; 89 const char *new_fmt; 90 91 bp = (const u_char *)buf; 92 93 while (bp != NULL && (c = *fmt++) != '\0') { 94 /* Clear `alternate' modifier prior to new conversion. */ 95 alt_format = 0; 96 i = 0; 97 98 /* Eat up white-space. */ 99 if (isspace(c)) { 100 while (isspace(*bp)) 101 bp++; 102 continue; 103 } 104 105 if (c != '%') 106 goto literal; 107 108 109 again: switch (c = *fmt++) { 110 case '%': /* "%%" is converted to "%". */ 111 literal: 112 if (c != *bp++) 113 return NULL; 114 LEGAL_ALT(0); 115 continue; 116 117 /* 118 * "Alternative" modifiers. Just set the appropriate flag 119 * and start over again. 120 */ 121 case 'E': /* "%E?" alternative conversion modifier. */ 122 LEGAL_ALT(0); 123 alt_format |= ALT_E; 124 goto again; 125 126 case 'O': /* "%O?" alternative conversion modifier. */ 127 LEGAL_ALT(0); 128 alt_format |= ALT_O; 129 goto again; 130 131 /* 132 * "Complex" conversion rules, implemented through recursion. 133 */ 134 case 'c': /* Date and time, using the locale's format. */ 135 new_fmt = _TIME_LOCALE(loc)->d_t_fmt; 136 goto recurse; 137 138 case 'D': /* The date as "%m/%d/%y". */ 139 new_fmt = "%m/%d/%y"; 140 LEGAL_ALT(0); 141 goto recurse; 142 143 case 'F': /* The date as "%Y-%m-%d". */ 144 new_fmt = "%Y-%m-%d"; 145 LEGAL_ALT(0); 146 goto recurse; 147 148 case 'R': /* The time as "%H:%M". */ 149 new_fmt = "%H:%M"; 150 LEGAL_ALT(0); 151 goto recurse; 152 153 case 'r': /* The time in 12-hour clock representation. */ 154 new_fmt = _TIME_LOCALE(loc)->t_fmt_ampm; 155 LEGAL_ALT(0); 156 goto recurse; 157 158 case 'T': /* The time as "%H:%M:%S". */ 159 new_fmt = "%H:%M:%S"; 160 LEGAL_ALT(0); 161 goto recurse; 162 163 case 'X': /* The time, using the locale's format. */ 164 new_fmt = _TIME_LOCALE(loc)->t_fmt; 165 goto recurse; 166 167 case 'x': /* The date, using the locale's format. */ 168 new_fmt = _TIME_LOCALE(loc)->d_fmt; 169 recurse: 170 bp = (const u_char *)strptime((const char *)bp, 171 new_fmt, tm); 172 LEGAL_ALT(ALT_E); 173 continue; 174 175 /* 176 * "Elementary" conversion rules. 177 */ 178 case 'A': /* The day of week, using the locale's form. */ 179 case 'a': 180 bp = find_string(bp, &tm->tm_wday, 181 _TIME_LOCALE(loc)->day, _TIME_LOCALE(loc)->abday, 7); 182 LEGAL_ALT(0); 183 continue; 184 185 case 'B': /* The month, using the locale's form. */ 186 case 'b': 187 case 'h': 188 bp = find_string(bp, &tm->tm_mon, 189 _TIME_LOCALE(loc)->mon, _TIME_LOCALE(loc)->abmon, 190 12); 191 LEGAL_ALT(0); 192 continue; 193 194 case 'C': /* The century number. */ 195 i = 20; 196 bp = conv_num(bp, &i, 0, 99); 197 198 i = i * 100 - TM_YEAR_BASE; 199 if (split_year) 200 i += tm->tm_year % 100; 201 split_year = 1; 202 tm->tm_year = i; 203 LEGAL_ALT(ALT_E); 204 continue; 205 206 case 'd': /* The day of month. */ 207 case 'e': 208 bp = conv_num(bp, &tm->tm_mday, 1, 31); 209 LEGAL_ALT(ALT_O); 210 continue; 211 212 case 'k': /* The hour (24-hour clock representation). */ 213 LEGAL_ALT(0); 214 /* FALLTHROUGH */ 215 case 'H': 216 bp = conv_num(bp, &tm->tm_hour, 0, 23); 217 LEGAL_ALT(ALT_O); 218 continue; 219 220 case 'l': /* The hour (12-hour clock representation). */ 221 LEGAL_ALT(0); 222 /* FALLTHROUGH */ 223 case 'I': 224 bp = conv_num(bp, &tm->tm_hour, 1, 12); 225 if (tm->tm_hour == 12) 226 tm->tm_hour = 0; 227 LEGAL_ALT(ALT_O); 228 continue; 229 230 case 'j': /* The day of year. */ 231 i = 1; 232 bp = conv_num(bp, &i, 1, 366); 233 tm->tm_yday = i - 1; 234 LEGAL_ALT(0); 235 continue; 236 237 case 'M': /* The minute. */ 238 bp = conv_num(bp, &tm->tm_min, 0, 59); 239 LEGAL_ALT(ALT_O); 240 continue; 241 242 case 'm': /* The month. */ 243 i = 1; 244 bp = conv_num(bp, &i, 1, 12); 245 tm->tm_mon = i - 1; 246 LEGAL_ALT(ALT_O); 247 continue; 248 249 case 'p': /* The locale's equivalent of AM/PM. */ 250 bp = find_string(bp, &i, _TIME_LOCALE(loc)->am_pm, 251 NULL, 2); 252 if (tm->tm_hour > 11) 253 return NULL; 254 tm->tm_hour += i * 12; 255 LEGAL_ALT(0); 256 continue; 257 258 case 'S': /* The seconds. */ 259 bp = conv_num(bp, &tm->tm_sec, 0, 61); 260 LEGAL_ALT(ALT_O); 261 continue; 262 263 #ifndef TIME_MAX 264 #define TIME_MAX INT64_MAX 265 #endif 266 case 's': /* seconds since the epoch */ 267 { 268 time_t sse = 0; 269 uint64_t rulim = TIME_MAX; 270 271 if (*bp < '0' || *bp > '9') { 272 bp = NULL; 273 continue; 274 } 275 276 do { 277 sse *= 10; 278 sse += *bp++ - '0'; 279 rulim /= 10; 280 } while ((sse * 10 <= TIME_MAX) && 281 rulim && *bp >= '0' && *bp <= '9'); 282 283 if (sse < 0 || (uint64_t)sse > TIME_MAX) { 284 bp = NULL; 285 continue; 286 } 287 288 if (localtime_r(&sse, tm) == NULL) 289 bp = NULL; 290 } 291 continue; 292 293 case 'U': /* The week of year, beginning on sunday. */ 294 case 'W': /* The week of year, beginning on monday. */ 295 /* 296 * XXX This is bogus, as we can not assume any valid 297 * information present in the tm structure at this 298 * point to calculate a real value, so just check the 299 * range for now. 300 */ 301 bp = conv_num(bp, &i, 0, 53); 302 LEGAL_ALT(ALT_O); 303 continue; 304 305 case 'w': /* The day of week, beginning on sunday. */ 306 bp = conv_num(bp, &tm->tm_wday, 0, 6); 307 LEGAL_ALT(ALT_O); 308 continue; 309 310 case 'u': /* The day of week, monday = 1. */ 311 bp = conv_num(bp, &i, 1, 7); 312 tm->tm_wday = i % 7; 313 LEGAL_ALT(ALT_O); 314 continue; 315 316 case 'g': /* The year corresponding to the ISO week 317 * number but without the century. 318 */ 319 bp = conv_num(bp, &i, 0, 99); 320 continue; 321 322 case 'G': /* The year corresponding to the ISO week 323 * number with century. 324 */ 325 do 326 bp++; 327 while (isdigit(*bp)); 328 continue; 329 330 case 'V': /* The ISO 8601:1988 week number as decimal */ 331 bp = conv_num(bp, &i, 0, 53); 332 continue; 333 334 case 'Y': /* The year. */ 335 i = TM_YEAR_BASE; /* just for data sanity... */ 336 bp = conv_num(bp, &i, 0, 9999); 337 tm->tm_year = i - TM_YEAR_BASE; 338 LEGAL_ALT(ALT_E); 339 continue; 340 341 case 'y': /* The year within 100 years of the epoch. */ 342 /* LEGAL_ALT(ALT_E | ALT_O); */ 343 bp = conv_num(bp, &i, 0, 99); 344 345 if (split_year) 346 /* preserve century */ 347 i += (tm->tm_year / 100) * 100; 348 else { 349 split_year = 1; 350 if (i <= 68) 351 i = i + 2000 - TM_YEAR_BASE; 352 else 353 i = i + 1900 - TM_YEAR_BASE; 354 } 355 tm->tm_year = i; 356 continue; 357 358 case 'Z': 359 tzset(); 360 if (strncmp((const char *)bp, gmt, 3) == 0 || 361 strncmp((const char *)bp, utc, 3) == 0) { 362 tm->tm_isdst = 0; 363 #ifdef TM_GMTOFF 364 tm->TM_GMTOFF = 0; 365 #endif 366 #ifdef TM_ZONE 367 tm->TM_ZONE = gmt; 368 #endif 369 bp += 3; 370 } else { 371 ep = find_string(bp, &i, 372 (const char * const *)tzname, 373 NULL, 2); 374 if (ep != NULL) { 375 tm->tm_isdst = i; 376 #ifdef TM_GMTOFF 377 tm->TM_GMTOFF = -(timezone); 378 #endif 379 #ifdef TM_ZONE 380 tm->TM_ZONE = tzname[i]; 381 #endif 382 } 383 bp = ep; 384 } 385 continue; 386 387 case 'z': 388 /* 389 * We recognize all ISO 8601 formats: 390 * Z = Zulu time/UTC 391 * [+-]hhmm 392 * [+-]hh:mm 393 * [+-]hh 394 * We recognize all RFC-822/RFC-2822 formats: 395 * UT|GMT 396 * North American : UTC offsets 397 * E[DS]T = Eastern : -4 | -5 398 * C[DS]T = Central : -5 | -6 399 * M[DS]T = Mountain: -6 | -7 400 * P[DS]T = Pacific : -7 | -8 401 * Military 402 * [A-IL-M] = -1 ... -9 (J not used) 403 * [N-Y] = +1 ... +12 404 */ 405 while (isspace(*bp)) 406 bp++; 407 408 switch (*bp++) { 409 case 'G': 410 if (*bp++ != 'M') 411 return NULL; 412 /*FALLTHROUGH*/ 413 case 'U': 414 if (*bp++ != 'T') 415 return NULL; 416 /*FALLTHROUGH*/ 417 case 'Z': 418 tm->tm_isdst = 0; 419 #ifdef TM_GMTOFF 420 tm->TM_GMTOFF = 0; 421 #endif 422 #ifdef TM_ZONE 423 tm->TM_ZONE = utc; 424 #endif 425 continue; 426 case '+': 427 neg = 0; 428 break; 429 case '-': 430 neg = 1; 431 break; 432 default: 433 --bp; 434 ep = find_string(bp, &i, nast, NULL, 4); 435 if (ep != NULL) { 436 #ifdef TM_GMTOFF 437 tm->TM_GMTOFF = -5 - i; 438 #endif 439 #ifdef TM_ZONE 440 tm->TM_ZONE = __UNCONST(nast[i]); 441 #endif 442 bp = ep; 443 continue; 444 } 445 ep = find_string(bp, &i, nadt, NULL, 4); 446 if (ep != NULL) { 447 tm->tm_isdst = 1; 448 #ifdef TM_GMTOFF 449 tm->TM_GMTOFF = -4 - i; 450 #endif 451 #ifdef TM_ZONE 452 tm->TM_ZONE = __UNCONST(nadt[i]); 453 #endif 454 bp = ep; 455 continue; 456 } 457 458 if ((*bp >= 'A' && *bp <= 'I') || 459 (*bp >= 'L' && *bp <= 'Y')) { 460 #ifdef TM_GMTOFF 461 /* Argh! No 'J'! */ 462 if (*bp >= 'A' && *bp <= 'I') 463 tm->TM_GMTOFF = 464 ('A' - 1) - (int)*bp; 465 else if (*bp >= 'L' && *bp <= 'M') 466 tm->TM_GMTOFF = 'A' - (int)*bp; 467 else if (*bp >= 'N' && *bp <= 'Y') 468 tm->TM_GMTOFF = (int)*bp - 'M'; 469 #endif 470 #ifdef TM_ZONE 471 tm->TM_ZONE = NULL; /* XXX */ 472 #endif 473 bp++; 474 continue; 475 } 476 return NULL; 477 } 478 offs = 0; 479 for (i = 0; i < 4; ) { 480 if (isdigit(*bp)) { 481 offs = offs * 10 + (*bp++ - '0'); 482 i++; 483 continue; 484 } 485 if (i == 2 && *bp == ':') { 486 bp++; 487 continue; 488 } 489 break; 490 } 491 switch (i) { 492 case 2: 493 offs *= 100; 494 break; 495 case 4: 496 i = offs % 100; 497 if (i >= 60) 498 return NULL; 499 /* Convert minutes into decimal */ 500 offs = (offs / 100) * 100 + (i * 50) / 30; 501 break; 502 default: 503 return NULL; 504 } 505 if (neg) 506 offs = -offs; 507 tm->tm_isdst = 0; /* XXX */ 508 #ifdef TM_GMTOFF 509 tm->TM_GMTOFF = offs; 510 #endif 511 #ifdef TM_ZONE 512 tm->TM_ZONE = NULL; /* XXX */ 513 #endif 514 continue; 515 516 /* 517 * Miscellaneous conversions. 518 */ 519 case 'n': /* Any kind of white-space. */ 520 case 't': 521 while (isspace(*bp)) 522 bp++; 523 LEGAL_ALT(0); 524 continue; 525 526 527 default: /* Unknown/unsupported conversion. */ 528 return NULL; 529 } 530 } 531 532 return __UNCONST(bp); 533 } 534 535 536 static const u_char * 537 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim) 538 { 539 uint result = 0; 540 unsigned char ch; 541 542 /* The limit also determines the number of valid digits. */ 543 uint rulim = ulim; 544 545 ch = *buf; 546 if (ch < '0' || ch > '9') 547 return NULL; 548 549 do { 550 result *= 10; 551 result += ch - '0'; 552 rulim /= 10; 553 ch = *++buf; 554 } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9'); 555 556 if (result < llim || result > ulim) 557 return NULL; 558 559 *dest = result; 560 return buf; 561 } 562 563 static const u_char * 564 find_string(const u_char *bp, int *tgt, const char * const *n1, 565 const char * const *n2, int c) 566 { 567 int i; 568 size_t len; 569 570 /* check full name - then abbreviated ones */ 571 for (; n1 != NULL; n1 = n2, n2 = NULL) { 572 for (i = 0; i < c; i++, n1++) { 573 len = strlen(*n1); 574 if (strncasecmp(*n1, (const char *)bp, len) == 0) { 575 *tgt = i; 576 return bp + len; 577 } 578 } 579 } 580 581 /* Nothing matched */ 582 return NULL; 583 } 584