1 /* locale.c 2 * 3 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2005, 2006, 2007, by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * A Elbereth Gilthoniel, 13 * silivren penna m�riel 14 * o menel aglar elenath! 15 * Na-chaered palan-d�riel 16 * o galadhremmin ennorath, 17 * Fanuilos, le linnathon 18 * nef aear, si nef aearon! 19 */ 20 21 /* utility functions for handling locale-specific stuff like what 22 * character represents the decimal point. 23 */ 24 25 #include "EXTERN.h" 26 #define PERL_IN_LOCALE_C 27 #include "perl.h" 28 29 #ifdef I_LOCALE 30 # include <locale.h> 31 #endif 32 33 #ifdef I_LANGINFO 34 # include <langinfo.h> 35 #endif 36 37 #include "reentr.h" 38 39 #if defined(USE_LOCALE_NUMERIC) || defined(USE_LOCALE_COLLATE) 40 /* 41 * Standardize the locale name from a string returned by 'setlocale'. 42 * 43 * The standard return value of setlocale() is either 44 * (1) "xx_YY" if the first argument of setlocale() is not LC_ALL 45 * (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL 46 * (the space-separated values represent the various sublocales, 47 * in some unspecificed order) 48 * 49 * In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n", 50 * which is harmful for further use of the string in setlocale(). 51 * 52 */ 53 STATIC char * 54 S_stdize_locale(pTHX_ char *locs) 55 { 56 const char * const s = strchr(locs, '='); 57 bool okay = TRUE; 58 59 if (s) { 60 const char * const t = strchr(s, '.'); 61 okay = FALSE; 62 if (t) { 63 const char * const u = strchr(t, '\n'); 64 if (u && (u[1] == 0)) { 65 const STRLEN len = u - s; 66 Move(s + 1, locs, len, char); 67 locs[len] = 0; 68 okay = TRUE; 69 } 70 } 71 } 72 73 if (!okay) 74 Perl_croak(aTHX_ "Can't fix broken locale name \"%s\"", locs); 75 76 return locs; 77 } 78 #endif 79 80 void 81 Perl_set_numeric_radix(pTHX) 82 { 83 #ifdef USE_LOCALE_NUMERIC 84 dVAR; 85 # ifdef HAS_LOCALECONV 86 const struct lconv* const lc = localeconv(); 87 88 if (lc && lc->decimal_point) { 89 if (lc->decimal_point[0] == '.' && lc->decimal_point[1] == 0) { 90 SvREFCNT_dec(PL_numeric_radix_sv); 91 PL_numeric_radix_sv = NULL; 92 } 93 else { 94 if (PL_numeric_radix_sv) 95 sv_setpv(PL_numeric_radix_sv, lc->decimal_point); 96 else 97 PL_numeric_radix_sv = newSVpv(lc->decimal_point, 0); 98 } 99 } 100 else 101 PL_numeric_radix_sv = NULL; 102 # endif /* HAS_LOCALECONV */ 103 #endif /* USE_LOCALE_NUMERIC */ 104 } 105 106 /* 107 * Set up for a new numeric locale. 108 */ 109 void 110 Perl_new_numeric(pTHX_ const char *newnum) 111 { 112 #ifdef USE_LOCALE_NUMERIC 113 dVAR; 114 115 if (! newnum) { 116 Safefree(PL_numeric_name); 117 PL_numeric_name = NULL; 118 PL_numeric_standard = TRUE; 119 PL_numeric_local = TRUE; 120 return; 121 } 122 123 if (! PL_numeric_name || strNE(PL_numeric_name, newnum)) { 124 Safefree(PL_numeric_name); 125 PL_numeric_name = stdize_locale(savepv(newnum)); 126 PL_numeric_standard = ((*newnum == 'C' && newnum[1] == '\0') 127 || strEQ(newnum, "POSIX")); 128 PL_numeric_local = TRUE; 129 set_numeric_radix(); 130 } 131 132 #endif /* USE_LOCALE_NUMERIC */ 133 } 134 135 void 136 Perl_set_numeric_standard(pTHX) 137 { 138 #ifdef USE_LOCALE_NUMERIC 139 dVAR; 140 141 if (! PL_numeric_standard) { 142 setlocale(LC_NUMERIC, "C"); 143 PL_numeric_standard = TRUE; 144 PL_numeric_local = FALSE; 145 set_numeric_radix(); 146 } 147 148 #endif /* USE_LOCALE_NUMERIC */ 149 } 150 151 void 152 Perl_set_numeric_local(pTHX) 153 { 154 #ifdef USE_LOCALE_NUMERIC 155 dVAR; 156 157 if (! PL_numeric_local) { 158 setlocale(LC_NUMERIC, PL_numeric_name); 159 PL_numeric_standard = FALSE; 160 PL_numeric_local = TRUE; 161 set_numeric_radix(); 162 } 163 164 #endif /* USE_LOCALE_NUMERIC */ 165 } 166 167 /* 168 * Set up for a new ctype locale. 169 */ 170 void 171 Perl_new_ctype(pTHX_ const char *newctype) 172 { 173 #ifdef USE_LOCALE_CTYPE 174 dVAR; 175 int i; 176 177 for (i = 0; i < 256; i++) { 178 if (isUPPER_LC(i)) 179 PL_fold_locale[i] = toLOWER_LC(i); 180 else if (isLOWER_LC(i)) 181 PL_fold_locale[i] = toUPPER_LC(i); 182 else 183 PL_fold_locale[i] = i; 184 } 185 186 #endif /* USE_LOCALE_CTYPE */ 187 PERL_UNUSED_ARG(newctype); 188 PERL_UNUSED_CONTEXT; 189 } 190 191 /* 192 * Set up for a new collation locale. 193 */ 194 void 195 Perl_new_collate(pTHX_ const char *newcoll) 196 { 197 #ifdef USE_LOCALE_COLLATE 198 dVAR; 199 200 if (! newcoll) { 201 if (PL_collation_name) { 202 ++PL_collation_ix; 203 Safefree(PL_collation_name); 204 PL_collation_name = NULL; 205 } 206 PL_collation_standard = TRUE; 207 PL_collxfrm_base = 0; 208 PL_collxfrm_mult = 2; 209 return; 210 } 211 212 if (! PL_collation_name || strNE(PL_collation_name, newcoll)) { 213 ++PL_collation_ix; 214 Safefree(PL_collation_name); 215 PL_collation_name = stdize_locale(savepv(newcoll)); 216 PL_collation_standard = ((*newcoll == 'C' && newcoll[1] == '\0') 217 || strEQ(newcoll, "POSIX")); 218 219 { 220 /* 2: at most so many chars ('a', 'b'). */ 221 /* 50: surely no system expands a char more. */ 222 #define XFRMBUFSIZE (2 * 50) 223 char xbuf[XFRMBUFSIZE]; 224 const Size_t fa = strxfrm(xbuf, "a", XFRMBUFSIZE); 225 const Size_t fb = strxfrm(xbuf, "ab", XFRMBUFSIZE); 226 const SSize_t mult = fb - fa; 227 if (mult < 1) 228 Perl_croak(aTHX_ "strxfrm() gets absurd"); 229 PL_collxfrm_base = (fa > (Size_t)mult) ? (fa - mult) : 0; 230 PL_collxfrm_mult = mult; 231 } 232 } 233 234 #endif /* USE_LOCALE_COLLATE */ 235 } 236 237 /* 238 * Initialize locale awareness. 239 */ 240 int 241 Perl_init_i18nl10n(pTHX_ int printwarn) 242 { 243 int ok = 1; 244 /* returns 245 * 1 = set ok or not applicable, 246 * 0 = fallback to C locale, 247 * -1 = fallback to C locale failed 248 */ 249 250 #if defined(USE_LOCALE) 251 dVAR; 252 253 #ifdef USE_LOCALE_CTYPE 254 char *curctype = NULL; 255 #endif /* USE_LOCALE_CTYPE */ 256 #ifdef USE_LOCALE_COLLATE 257 char *curcoll = NULL; 258 #endif /* USE_LOCALE_COLLATE */ 259 #ifdef USE_LOCALE_NUMERIC 260 char *curnum = NULL; 261 #endif /* USE_LOCALE_NUMERIC */ 262 #ifdef __GLIBC__ 263 char * const language = PerlEnv_getenv("LANGUAGE"); 264 #endif 265 char * const lc_all = PerlEnv_getenv("LC_ALL"); 266 char * const lang = PerlEnv_getenv("LANG"); 267 bool setlocale_failure = FALSE; 268 269 #ifdef LOCALE_ENVIRON_REQUIRED 270 271 /* 272 * Ultrix setlocale(..., "") fails if there are no environment 273 * variables from which to get a locale name. 274 */ 275 276 bool done = FALSE; 277 278 #ifdef LC_ALL 279 if (lang) { 280 if (setlocale(LC_ALL, "")) 281 done = TRUE; 282 else 283 setlocale_failure = TRUE; 284 } 285 if (!setlocale_failure) { 286 #ifdef USE_LOCALE_CTYPE 287 Safefree(curctype); 288 if (! (curctype = 289 setlocale(LC_CTYPE, 290 (!done && (lang || PerlEnv_getenv("LC_CTYPE"))) 291 ? "" : NULL))) 292 setlocale_failure = TRUE; 293 else 294 curctype = savepv(curctype); 295 #endif /* USE_LOCALE_CTYPE */ 296 #ifdef USE_LOCALE_COLLATE 297 Safefree(curcoll); 298 if (! (curcoll = 299 setlocale(LC_COLLATE, 300 (!done && (lang || PerlEnv_getenv("LC_COLLATE"))) 301 ? "" : NULL))) 302 setlocale_failure = TRUE; 303 else 304 curcoll = savepv(curcoll); 305 #endif /* USE_LOCALE_COLLATE */ 306 #ifdef USE_LOCALE_NUMERIC 307 Safefree(curnum); 308 if (! (curnum = 309 setlocale(LC_NUMERIC, 310 (!done && (lang || PerlEnv_getenv("LC_NUMERIC"))) 311 ? "" : NULL))) 312 setlocale_failure = TRUE; 313 else 314 curnum = savepv(curnum); 315 #endif /* USE_LOCALE_NUMERIC */ 316 } 317 318 #endif /* LC_ALL */ 319 320 #endif /* !LOCALE_ENVIRON_REQUIRED */ 321 322 #ifdef LC_ALL 323 if (! setlocale(LC_ALL, "")) 324 setlocale_failure = TRUE; 325 #endif /* LC_ALL */ 326 327 if (!setlocale_failure) { 328 #ifdef USE_LOCALE_CTYPE 329 Safefree(curctype); 330 if (! (curctype = setlocale(LC_CTYPE, ""))) 331 setlocale_failure = TRUE; 332 else 333 curctype = savepv(curctype); 334 #endif /* USE_LOCALE_CTYPE */ 335 #ifdef USE_LOCALE_COLLATE 336 Safefree(curcoll); 337 if (! (curcoll = setlocale(LC_COLLATE, ""))) 338 setlocale_failure = TRUE; 339 else 340 curcoll = savepv(curcoll); 341 #endif /* USE_LOCALE_COLLATE */ 342 #ifdef USE_LOCALE_NUMERIC 343 Safefree(curnum); 344 if (! (curnum = setlocale(LC_NUMERIC, ""))) 345 setlocale_failure = TRUE; 346 else 347 curnum = savepv(curnum); 348 #endif /* USE_LOCALE_NUMERIC */ 349 } 350 351 if (setlocale_failure) { 352 char *p; 353 const bool locwarn = (printwarn > 1 || 354 (printwarn && 355 (!(p = PerlEnv_getenv("PERL_BADLANG")) || atoi(p)))); 356 357 if (locwarn) { 358 #ifdef LC_ALL 359 360 PerlIO_printf(Perl_error_log, 361 "perl: warning: Setting locale failed.\n"); 362 363 #else /* !LC_ALL */ 364 365 PerlIO_printf(Perl_error_log, 366 "perl: warning: Setting locale failed for the categories:\n\t"); 367 #ifdef USE_LOCALE_CTYPE 368 if (! curctype) 369 PerlIO_printf(Perl_error_log, "LC_CTYPE "); 370 #endif /* USE_LOCALE_CTYPE */ 371 #ifdef USE_LOCALE_COLLATE 372 if (! curcoll) 373 PerlIO_printf(Perl_error_log, "LC_COLLATE "); 374 #endif /* USE_LOCALE_COLLATE */ 375 #ifdef USE_LOCALE_NUMERIC 376 if (! curnum) 377 PerlIO_printf(Perl_error_log, "LC_NUMERIC "); 378 #endif /* USE_LOCALE_NUMERIC */ 379 PerlIO_printf(Perl_error_log, "\n"); 380 381 #endif /* LC_ALL */ 382 383 PerlIO_printf(Perl_error_log, 384 "perl: warning: Please check that your locale settings:\n"); 385 386 #ifdef __GLIBC__ 387 PerlIO_printf(Perl_error_log, 388 "\tLANGUAGE = %c%s%c,\n", 389 language ? '"' : '(', 390 language ? language : "unset", 391 language ? '"' : ')'); 392 #endif 393 394 PerlIO_printf(Perl_error_log, 395 "\tLC_ALL = %c%s%c,\n", 396 lc_all ? '"' : '(', 397 lc_all ? lc_all : "unset", 398 lc_all ? '"' : ')'); 399 400 #if defined(USE_ENVIRON_ARRAY) 401 { 402 char **e; 403 for (e = environ; *e; e++) { 404 if (strnEQ(*e, "LC_", 3) 405 && strnNE(*e, "LC_ALL=", 7) 406 && (p = strchr(*e, '='))) 407 PerlIO_printf(Perl_error_log, "\t%.*s = \"%s\",\n", 408 (int)(p - *e), *e, p + 1); 409 } 410 } 411 #else 412 PerlIO_printf(Perl_error_log, 413 "\t(possibly more locale environment variables)\n"); 414 #endif 415 416 PerlIO_printf(Perl_error_log, 417 "\tLANG = %c%s%c\n", 418 lang ? '"' : '(', 419 lang ? lang : "unset", 420 lang ? '"' : ')'); 421 422 PerlIO_printf(Perl_error_log, 423 " are supported and installed on your system.\n"); 424 } 425 426 #ifdef LC_ALL 427 428 if (setlocale(LC_ALL, "C")) { 429 if (locwarn) 430 PerlIO_printf(Perl_error_log, 431 "perl: warning: Falling back to the standard locale (\"C\").\n"); 432 ok = 0; 433 } 434 else { 435 if (locwarn) 436 PerlIO_printf(Perl_error_log, 437 "perl: warning: Failed to fall back to the standard locale (\"C\").\n"); 438 ok = -1; 439 } 440 441 #else /* ! LC_ALL */ 442 443 if (0 444 #ifdef USE_LOCALE_CTYPE 445 || !(curctype || setlocale(LC_CTYPE, "C")) 446 #endif /* USE_LOCALE_CTYPE */ 447 #ifdef USE_LOCALE_COLLATE 448 || !(curcoll || setlocale(LC_COLLATE, "C")) 449 #endif /* USE_LOCALE_COLLATE */ 450 #ifdef USE_LOCALE_NUMERIC 451 || !(curnum || setlocale(LC_NUMERIC, "C")) 452 #endif /* USE_LOCALE_NUMERIC */ 453 ) 454 { 455 if (locwarn) 456 PerlIO_printf(Perl_error_log, 457 "perl: warning: Cannot fall back to the standard locale (\"C\").\n"); 458 ok = -1; 459 } 460 461 #endif /* ! LC_ALL */ 462 463 #ifdef USE_LOCALE_CTYPE 464 Safefree(curctype); 465 curctype = savepv(setlocale(LC_CTYPE, NULL)); 466 #endif /* USE_LOCALE_CTYPE */ 467 #ifdef USE_LOCALE_COLLATE 468 Safefree(curcoll); 469 curcoll = savepv(setlocale(LC_COLLATE, NULL)); 470 #endif /* USE_LOCALE_COLLATE */ 471 #ifdef USE_LOCALE_NUMERIC 472 Safefree(curnum); 473 curnum = savepv(setlocale(LC_NUMERIC, NULL)); 474 #endif /* USE_LOCALE_NUMERIC */ 475 } 476 else { 477 478 #ifdef USE_LOCALE_CTYPE 479 new_ctype(curctype); 480 #endif /* USE_LOCALE_CTYPE */ 481 482 #ifdef USE_LOCALE_COLLATE 483 new_collate(curcoll); 484 #endif /* USE_LOCALE_COLLATE */ 485 486 #ifdef USE_LOCALE_NUMERIC 487 new_numeric(curnum); 488 #endif /* USE_LOCALE_NUMERIC */ 489 490 } 491 492 #endif /* USE_LOCALE */ 493 494 #ifdef USE_PERLIO 495 { 496 /* Set PL_utf8locale to TRUE if using PerlIO _and_ 497 any of the following are true: 498 - nl_langinfo(CODESET) contains /^utf-?8/i 499 - $ENV{LC_ALL} contains /^utf-?8/i 500 - $ENV{LC_CTYPE} contains /^utf-?8/i 501 - $ENV{LANG} contains /^utf-?8/i 502 The LC_ALL, LC_CTYPE, LANG obey the usual override 503 hierarchy of locale environment variables. (LANGUAGE 504 affects only LC_MESSAGES only under glibc.) (If present, 505 it overrides LC_MESSAGES for GNU gettext, and it also 506 can have more than one locale, separated by spaces, 507 in case you need to know.) 508 If PL_utf8locale and PL_unicode (set by -C or by $ENV{PERL_UNICODE}) 509 are true, perl.c:S_parse_body() will turn on the PerlIO :utf8 layer 510 on STDIN, STDOUT, STDERR, _and_ the default open discipline. 511 */ 512 bool utf8locale = FALSE; 513 char *codeset = NULL; 514 #if defined(HAS_NL_LANGINFO) && defined(CODESET) 515 codeset = nl_langinfo(CODESET); 516 #endif 517 if (codeset) 518 utf8locale = (Perl_ibcmp(aTHX_ codeset, STR_WITH_LEN("UTF-8")) == 0 || 519 Perl_ibcmp(aTHX_ codeset, STR_WITH_LEN("UTF8") ) == 0); 520 #if defined(USE_LOCALE) 521 else { /* nl_langinfo(CODESET) is supposed to correctly 522 * interpret the locale environment variables, 523 * but just in case it fails, let's do this manually. */ 524 if (lang) 525 utf8locale = (Perl_ibcmp(aTHX_ lang, STR_WITH_LEN("UTF-8")) == 0 || 526 Perl_ibcmp(aTHX_ lang, STR_WITH_LEN("UTF8") ) == 0); 527 #ifdef USE_LOCALE_CTYPE 528 if (curctype) 529 utf8locale = (Perl_ibcmp(aTHX_ curctype, STR_WITH_LEN("UTF-8")) == 0 || 530 Perl_ibcmp(aTHX_ curctype, STR_WITH_LEN("UTF8") ) == 0); 531 #endif 532 if (lc_all) 533 utf8locale = (Perl_ibcmp(aTHX_ lc_all, STR_WITH_LEN("UTF-8")) == 0 || 534 Perl_ibcmp(aTHX_ lc_all, STR_WITH_LEN("UTF8") ) == 0); 535 } 536 #endif /* USE_LOCALE */ 537 if (utf8locale) 538 PL_utf8locale = TRUE; 539 } 540 /* Set PL_unicode to $ENV{PERL_UNICODE} if using PerlIO. 541 This is an alternative to using the -C command line switch 542 (the -C if present will override this). */ 543 { 544 const char *p = PerlEnv_getenv("PERL_UNICODE"); 545 PL_unicode = p ? parse_unicode_opts(&p) : 0; 546 if (PL_unicode & PERL_UNICODE_UTF8CACHEASSERT_FLAG) 547 PL_utf8cache = -1; 548 } 549 #endif 550 551 #ifdef USE_LOCALE_CTYPE 552 Safefree(curctype); 553 #endif /* USE_LOCALE_CTYPE */ 554 #ifdef USE_LOCALE_COLLATE 555 Safefree(curcoll); 556 #endif /* USE_LOCALE_COLLATE */ 557 #ifdef USE_LOCALE_NUMERIC 558 Safefree(curnum); 559 #endif /* USE_LOCALE_NUMERIC */ 560 return ok; 561 } 562 563 #ifdef USE_LOCALE_COLLATE 564 565 /* 566 * mem_collxfrm() is a bit like strxfrm() but with two important 567 * differences. First, it handles embedded NULs. Second, it allocates 568 * a bit more memory than needed for the transformed data itself. 569 * The real transformed data begins at offset sizeof(collationix). 570 * Please see sv_collxfrm() to see how this is used. 571 */ 572 573 char * 574 Perl_mem_collxfrm(pTHX_ const char *s, STRLEN len, STRLEN *xlen) 575 { 576 dVAR; 577 char *xbuf; 578 STRLEN xAlloc, xin, xout; /* xalloc is a reserved word in VC */ 579 580 /* the first sizeof(collationix) bytes are used by sv_collxfrm(). */ 581 /* the +1 is for the terminating NUL. */ 582 583 xAlloc = sizeof(PL_collation_ix) + PL_collxfrm_base + (PL_collxfrm_mult * len) + 1; 584 Newx(xbuf, xAlloc, char); 585 if (! xbuf) 586 goto bad; 587 588 *(U32*)xbuf = PL_collation_ix; 589 xout = sizeof(PL_collation_ix); 590 for (xin = 0; xin < len; ) { 591 Size_t xused; 592 593 for (;;) { 594 xused = strxfrm(xbuf + xout, s + xin, xAlloc - xout); 595 if (xused >= PERL_INT_MAX) 596 goto bad; 597 if ((STRLEN)xused < xAlloc - xout) 598 break; 599 xAlloc = (2 * xAlloc) + 1; 600 Renew(xbuf, xAlloc, char); 601 if (! xbuf) 602 goto bad; 603 } 604 605 xin += strlen(s + xin) + 1; 606 xout += xused; 607 608 /* Embedded NULs are understood but silently skipped 609 * because they make no sense in locale collation. */ 610 } 611 612 xbuf[xout] = '\0'; 613 *xlen = xout - sizeof(PL_collation_ix); 614 return xbuf; 615 616 bad: 617 Safefree(xbuf); 618 *xlen = 0; 619 return NULL; 620 } 621 622 #endif /* USE_LOCALE_COLLATE */ 623 624 /* 625 * Local variables: 626 * c-indentation-style: bsd 627 * c-basic-offset: 4 628 * indent-tabs-mode: t 629 * End: 630 * 631 * ex: set ts=8 sts=4 sw=4 noet: 632 */ 633