1 /* $NetBSD: strptime.c,v 1.32 2009/05/01 20:15:05 ginsbach Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code was contributed to The NetBSD Foundation by Klaus Klein. 8 * Heavily optimised by David Laight 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #if defined(LIBC_SCCS) && !defined(lint) 34 __RCSID("$NetBSD: strptime.c,v 1.32 2009/05/01 20:15:05 ginsbach Exp $"); 35 #endif 36 37 #include "namespace.h" 38 #include <sys/localedef.h> 39 #include <ctype.h> 40 #include <locale.h> 41 #include <string.h> 42 #include <time.h> 43 #include <tzfile.h> 44 #include "private.h" 45 46 #ifdef __weak_alias 47 __weak_alias(strptime,_strptime) 48 #endif 49 50 #define _ctloc(x) (_CurrentTimeLocale->x) 51 52 /* 53 * We do not implement alternate representations. However, we always 54 * check whether a given modifier is allowed for a certain conversion. 55 */ 56 #define ALT_E 0x01 57 #define ALT_O 0x02 58 #define LEGAL_ALT(x) { if (alt_format & ~(x)) return NULL; } 59 60 static char gmt[] = { "GMT" }; 61 static char utc[] = { "UTC" }; 62 /* RFC-822/RFC-2822 */ 63 static const char * const nast[5] = { 64 "EST", "CST", "MST", "PST", "\0\0\0" 65 }; 66 static const char * const nadt[5] = { 67 "EDT", "CDT", "MDT", "PDT", "\0\0\0" 68 }; 69 70 static const u_char *conv_num(const unsigned char *, int *, uint, uint); 71 static const u_char *find_string(const u_char *, int *, const char * const *, 72 const char * const *, int); 73 74 75 char * 76 strptime(const char *buf, const char *fmt, struct tm *tm) 77 { 78 unsigned char c; 79 const unsigned char *bp, *ep; 80 int alt_format, i, split_year = 0, neg = 0, offs; 81 const char *new_fmt; 82 83 bp = (const u_char *)buf; 84 85 while (bp != NULL && (c = *fmt++) != '\0') { 86 /* Clear `alternate' modifier prior to new conversion. */ 87 alt_format = 0; 88 i = 0; 89 90 /* Eat up white-space. */ 91 if (isspace(c)) { 92 while (isspace(*bp)) 93 bp++; 94 continue; 95 } 96 97 if (c != '%') 98 goto literal; 99 100 101 again: switch (c = *fmt++) { 102 case '%': /* "%%" is converted to "%". */ 103 literal: 104 if (c != *bp++) 105 return NULL; 106 LEGAL_ALT(0); 107 continue; 108 109 /* 110 * "Alternative" modifiers. Just set the appropriate flag 111 * and start over again. 112 */ 113 case 'E': /* "%E?" alternative conversion modifier. */ 114 LEGAL_ALT(0); 115 alt_format |= ALT_E; 116 goto again; 117 118 case 'O': /* "%O?" alternative conversion modifier. */ 119 LEGAL_ALT(0); 120 alt_format |= ALT_O; 121 goto again; 122 123 /* 124 * "Complex" conversion rules, implemented through recursion. 125 */ 126 case 'c': /* Date and time, using the locale's format. */ 127 new_fmt = _ctloc(d_t_fmt); 128 goto recurse; 129 130 case 'D': /* The date as "%m/%d/%y". */ 131 new_fmt = "%m/%d/%y"; 132 LEGAL_ALT(0); 133 goto recurse; 134 135 case 'F': /* The date as "%Y-%m-%d". */ 136 new_fmt = "%Y-%m-%d"; 137 LEGAL_ALT(0); 138 goto recurse; 139 140 case 'R': /* The time as "%H:%M". */ 141 new_fmt = "%H:%M"; 142 LEGAL_ALT(0); 143 goto recurse; 144 145 case 'r': /* The time in 12-hour clock representation. */ 146 new_fmt =_ctloc(t_fmt_ampm); 147 LEGAL_ALT(0); 148 goto recurse; 149 150 case 'T': /* The time as "%H:%M:%S". */ 151 new_fmt = "%H:%M:%S"; 152 LEGAL_ALT(0); 153 goto recurse; 154 155 case 'X': /* The time, using the locale's format. */ 156 new_fmt =_ctloc(t_fmt); 157 goto recurse; 158 159 case 'x': /* The date, using the locale's format. */ 160 new_fmt =_ctloc(d_fmt); 161 recurse: 162 bp = (const u_char *)strptime((const char *)bp, 163 new_fmt, tm); 164 LEGAL_ALT(ALT_E); 165 continue; 166 167 /* 168 * "Elementary" conversion rules. 169 */ 170 case 'A': /* The day of week, using the locale's form. */ 171 case 'a': 172 bp = find_string(bp, &tm->tm_wday, _ctloc(day), 173 _ctloc(abday), 7); 174 LEGAL_ALT(0); 175 continue; 176 177 case 'B': /* The month, using the locale's form. */ 178 case 'b': 179 case 'h': 180 bp = find_string(bp, &tm->tm_mon, _ctloc(mon), 181 _ctloc(abmon), 12); 182 LEGAL_ALT(0); 183 continue; 184 185 case 'C': /* The century number. */ 186 i = 20; 187 bp = conv_num(bp, &i, 0, 99); 188 189 i = i * 100 - TM_YEAR_BASE; 190 if (split_year) 191 i += tm->tm_year % 100; 192 split_year = 1; 193 tm->tm_year = i; 194 LEGAL_ALT(ALT_E); 195 continue; 196 197 case 'd': /* The day of month. */ 198 case 'e': 199 bp = conv_num(bp, &tm->tm_mday, 1, 31); 200 LEGAL_ALT(ALT_O); 201 continue; 202 203 case 'k': /* The hour (24-hour clock representation). */ 204 LEGAL_ALT(0); 205 /* FALLTHROUGH */ 206 case 'H': 207 bp = conv_num(bp, &tm->tm_hour, 0, 23); 208 LEGAL_ALT(ALT_O); 209 continue; 210 211 case 'l': /* The hour (12-hour clock representation). */ 212 LEGAL_ALT(0); 213 /* FALLTHROUGH */ 214 case 'I': 215 bp = conv_num(bp, &tm->tm_hour, 1, 12); 216 if (tm->tm_hour == 12) 217 tm->tm_hour = 0; 218 LEGAL_ALT(ALT_O); 219 continue; 220 221 case 'j': /* The day of year. */ 222 i = 1; 223 bp = conv_num(bp, &i, 1, 366); 224 tm->tm_yday = i - 1; 225 LEGAL_ALT(0); 226 continue; 227 228 case 'M': /* The minute. */ 229 bp = conv_num(bp, &tm->tm_min, 0, 59); 230 LEGAL_ALT(ALT_O); 231 continue; 232 233 case 'm': /* The month. */ 234 i = 1; 235 bp = conv_num(bp, &i, 1, 12); 236 tm->tm_mon = i - 1; 237 LEGAL_ALT(ALT_O); 238 continue; 239 240 case 'p': /* The locale's equivalent of AM/PM. */ 241 bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2); 242 if (tm->tm_hour > 11) 243 return NULL; 244 tm->tm_hour += i * 12; 245 LEGAL_ALT(0); 246 continue; 247 248 case 'S': /* The seconds. */ 249 bp = conv_num(bp, &tm->tm_sec, 0, 61); 250 LEGAL_ALT(ALT_O); 251 continue; 252 253 case 'U': /* The week of year, beginning on sunday. */ 254 case 'W': /* The week of year, beginning on monday. */ 255 /* 256 * XXX This is bogus, as we can not assume any valid 257 * information present in the tm structure at this 258 * point to calculate a real value, so just check the 259 * range for now. 260 */ 261 bp = conv_num(bp, &i, 0, 53); 262 LEGAL_ALT(ALT_O); 263 continue; 264 265 case 'w': /* The day of week, beginning on sunday. */ 266 bp = conv_num(bp, &tm->tm_wday, 0, 6); 267 LEGAL_ALT(ALT_O); 268 continue; 269 270 case 'u': /* The day of week, monday = 1. */ 271 bp = conv_num(bp, &i, 1, 7); 272 tm->tm_wday = i % 7; 273 LEGAL_ALT(ALT_O); 274 continue; 275 276 case 'g': /* The year corresponding to the ISO week 277 * number but without the century. 278 */ 279 bp = conv_num(bp, &i, 0, 99); 280 continue; 281 282 case 'G': /* The year corresponding to the ISO week 283 * number with century. 284 */ 285 do 286 bp++; 287 while (isdigit(*bp)); 288 continue; 289 290 case 'V': /* The ISO 8601:1988 week number as decimal */ 291 bp = conv_num(bp, &i, 0, 53); 292 continue; 293 294 case 'Y': /* The year. */ 295 i = TM_YEAR_BASE; /* just for data sanity... */ 296 bp = conv_num(bp, &i, 0, 9999); 297 tm->tm_year = i - TM_YEAR_BASE; 298 LEGAL_ALT(ALT_E); 299 continue; 300 301 case 'y': /* The year within 100 years of the epoch. */ 302 /* LEGAL_ALT(ALT_E | ALT_O); */ 303 bp = conv_num(bp, &i, 0, 99); 304 305 if (split_year) 306 /* preserve century */ 307 i += (tm->tm_year / 100) * 100; 308 else { 309 split_year = 1; 310 if (i <= 68) 311 i = i + 2000 - TM_YEAR_BASE; 312 else 313 i = i + 1900 - TM_YEAR_BASE; 314 } 315 tm->tm_year = i; 316 continue; 317 318 case 'Z': 319 tzset(); 320 if (strncmp((const char *)bp, gmt, 3) == 0) { 321 tm->tm_isdst = 0; 322 #ifdef TM_GMTOFF 323 tm->TM_GMTOFF = 0; 324 #endif 325 #ifdef TM_ZONE 326 tm->TM_ZONE = gmt; 327 #endif 328 bp += 3; 329 } else { 330 ep = find_string(bp, &i, 331 (const char * const *)tzname, 332 NULL, 2); 333 if (ep != NULL) { 334 tm->tm_isdst = i; 335 #ifdef TM_GMTOFF 336 tm->TM_GMTOFF = -(timezone); 337 #endif 338 #ifdef TM_ZONE 339 tm->TM_ZONE = tzname[i]; 340 #endif 341 } 342 bp = ep; 343 } 344 continue; 345 346 case 'z': 347 /* 348 * We recognize all ISO 8601 formats: 349 * Z = Zulu time/UTC 350 * [+-]hhmm 351 * [+-]hh:mm 352 * [+-]hh 353 * We recognize all RFC-822/RFC-2822 formats: 354 * UT|GMT 355 * North American : UTC offsets 356 * E[DS]T = Eastern : -4 | -5 357 * C[DS]T = Central : -5 | -6 358 * M[DS]T = Mountain: -6 | -7 359 * P[DS]T = Pacific : -7 | -8 360 * Military 361 * [A-IL-M] = -1 ... -9 (J not used) 362 * [N-Y] = +1 ... +12 363 */ 364 while (isspace(*bp)) 365 bp++; 366 367 switch (*bp++) { 368 case 'G': 369 if (*bp++ != 'M') 370 return NULL; 371 /*FALLTHROUGH*/ 372 case 'U': 373 if (*bp++ != 'T') 374 return NULL; 375 /*FALLTHROUGH*/ 376 case 'Z': 377 tm->tm_isdst = 0; 378 #ifdef TM_GMTOFF 379 tm->TM_GMTOFF = 0; 380 #endif 381 #ifdef TM_ZONE 382 tm->TM_ZONE = utc; 383 #endif 384 continue; 385 case '+': 386 neg = 0; 387 break; 388 case '-': 389 neg = 1; 390 break; 391 default: 392 --bp; 393 ep = find_string(bp, &i, nast, NULL, 4); 394 if (ep != NULL) { 395 #ifdef TM_GMTOFF 396 tm->TM_GMTOFF = -5 - i; 397 #endif 398 #ifdef TM_ZONE 399 tm->TM_ZONE = __UNCONST(nast[i]); 400 #endif 401 bp = ep; 402 continue; 403 } 404 ep = find_string(bp, &i, nadt, NULL, 4); 405 if (ep != NULL) { 406 tm->tm_isdst = 1; 407 #ifdef TM_GMTOFF 408 tm->TM_GMTOFF = -4 - i; 409 #endif 410 #ifdef TM_ZONE 411 tm->TM_ZONE = __UNCONST(nadt[i]); 412 #endif 413 bp = ep; 414 continue; 415 } 416 417 if ((*bp >= 'A' && *bp <= 'I') || 418 (*bp >= 'L' && *bp <= 'Y')) { 419 #ifdef TM_GMTOFF 420 /* Argh! No 'J'! */ 421 if (*bp >= 'A' && *bp <= 'I') 422 tm->TM_GMTOFF = 423 ('A' - 1) - (int)*bp; 424 else if (*bp >= 'L' && *bp <= 'M') 425 tm->TM_GMTOFF = 'A' - (int)*bp; 426 else if (*bp >= 'N' && *bp <= 'Y') 427 tm->TM_GMTOFF = (int)*bp - 'M'; 428 #endif 429 #ifdef TM_ZONE 430 tm->TM_ZONE = NULL; /* XXX */ 431 #endif 432 bp++; 433 continue; 434 } 435 return NULL; 436 } 437 offs = 0; 438 for (i = 0; i < 4; ) { 439 if (isdigit(*bp)) { 440 offs = offs * 10 + (*bp++ - '0'); 441 i++; 442 continue; 443 } 444 if (i == 2 && *bp == ':') { 445 bp++; 446 continue; 447 } 448 break; 449 } 450 switch (i) { 451 case 2: 452 offs *= 100; 453 break; 454 case 4: 455 i = offs % 100; 456 if (i >= 60) 457 return NULL; 458 /* Convert minutes into decimal */ 459 offs = (offs / 100) * 100 + (i * 50) / 30; 460 break; 461 default: 462 return NULL; 463 } 464 if (neg) 465 offs = -offs; 466 tm->tm_isdst = 0; /* XXX */ 467 #ifdef TM_GMTOFF 468 tm->TM_GMTOFF = offs; 469 #endif 470 #ifdef TM_ZONE 471 tm->TM_ZONE = NULL; /* XXX */ 472 #endif 473 continue; 474 475 /* 476 * Miscellaneous conversions. 477 */ 478 case 'n': /* Any kind of white-space. */ 479 case 't': 480 while (isspace(*bp)) 481 bp++; 482 LEGAL_ALT(0); 483 continue; 484 485 486 default: /* Unknown/unsupported conversion. */ 487 return NULL; 488 } 489 } 490 491 return __UNCONST(bp); 492 } 493 494 495 static const u_char * 496 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim) 497 { 498 uint result = 0; 499 unsigned char ch; 500 501 /* The limit also determines the number of valid digits. */ 502 uint rulim = ulim; 503 504 ch = *buf; 505 if (ch < '0' || ch > '9') 506 return NULL; 507 508 do { 509 result *= 10; 510 result += ch - '0'; 511 rulim /= 10; 512 ch = *++buf; 513 } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9'); 514 515 if (result < llim || result > ulim) 516 return NULL; 517 518 *dest = result; 519 return buf; 520 } 521 522 static const u_char * 523 find_string(const u_char *bp, int *tgt, const char * const *n1, 524 const char * const *n2, int c) 525 { 526 int i; 527 unsigned int len; 528 529 /* check full name - then abbreviated ones */ 530 for (; n1 != NULL; n1 = n2, n2 = NULL) { 531 for (i = 0; i < c; i++, n1++) { 532 len = strlen(*n1); 533 if (strncasecmp(*n1, (const char *)bp, len) == 0) { 534 *tgt = i; 535 return bp + len; 536 } 537 } 538 } 539 540 /* Nothing matched */ 541 return NULL; 542 } 543