1 /* locale.c 2 * 3 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 4 * 2002, 2003, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * A Elbereth Gilthoniel, 13 * silivren penna míriel 14 * o menel aglar elenath! 15 * Na-chaered palan-díriel 16 * o galadhremmin ennorath, 17 * Fanuilos, le linnathon 18 * nef aear, si nef aearon! 19 * 20 * [p.238 of _The Lord of the Rings_, II/i: "Many Meetings"] 21 */ 22 23 /* utility functions for handling locale-specific stuff like what 24 * character represents the decimal point. 25 */ 26 27 #include "EXTERN.h" 28 #define PERL_IN_LOCALE_C 29 #include "perl.h" 30 31 #ifdef I_LANGINFO 32 # include <langinfo.h> 33 #endif 34 35 #include "reentr.h" 36 37 #if defined(USE_LOCALE_NUMERIC) || defined(USE_LOCALE_COLLATE) 38 /* 39 * Standardize the locale name from a string returned by 'setlocale'. 40 * 41 * The standard return value of setlocale() is either 42 * (1) "xx_YY" if the first argument of setlocale() is not LC_ALL 43 * (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL 44 * (the space-separated values represent the various sublocales, 45 * in some unspecified order) 46 * 47 * In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n", 48 * which is harmful for further use of the string in setlocale(). 49 * 50 */ 51 STATIC char * 52 S_stdize_locale(pTHX_ char *locs) 53 { 54 const char * const s = strchr(locs, '='); 55 bool okay = TRUE; 56 57 PERL_ARGS_ASSERT_STDIZE_LOCALE; 58 59 if (s) { 60 const char * const t = strchr(s, '.'); 61 okay = FALSE; 62 if (t) { 63 const char * const u = strchr(t, '\n'); 64 if (u && (u[1] == 0)) { 65 const STRLEN len = u - s; 66 Move(s + 1, locs, len, char); 67 locs[len] = 0; 68 okay = TRUE; 69 } 70 } 71 } 72 73 if (!okay) 74 Perl_croak(aTHX_ "Can't fix broken locale name \"%s\"", locs); 75 76 return locs; 77 } 78 #endif 79 80 void 81 Perl_set_numeric_radix(pTHX) 82 { 83 #ifdef USE_LOCALE_NUMERIC 84 dVAR; 85 # ifdef HAS_LOCALECONV 86 const struct lconv* const lc = localeconv(); 87 88 if (lc && lc->decimal_point) { 89 if (lc->decimal_point[0] == '.' && lc->decimal_point[1] == 0) { 90 SvREFCNT_dec(PL_numeric_radix_sv); 91 PL_numeric_radix_sv = NULL; 92 } 93 else { 94 if (PL_numeric_radix_sv) 95 sv_setpv(PL_numeric_radix_sv, lc->decimal_point); 96 else 97 PL_numeric_radix_sv = newSVpv(lc->decimal_point, 0); 98 } 99 } 100 else 101 PL_numeric_radix_sv = NULL; 102 # endif /* HAS_LOCALECONV */ 103 #endif /* USE_LOCALE_NUMERIC */ 104 } 105 106 /* 107 * Set up for a new numeric locale. 108 */ 109 void 110 Perl_new_numeric(pTHX_ const char *newnum) 111 { 112 #ifdef USE_LOCALE_NUMERIC 113 dVAR; 114 115 if (! newnum) { 116 Safefree(PL_numeric_name); 117 PL_numeric_name = NULL; 118 PL_numeric_standard = TRUE; 119 PL_numeric_local = TRUE; 120 return; 121 } 122 123 if (! PL_numeric_name || strNE(PL_numeric_name, newnum)) { 124 Safefree(PL_numeric_name); 125 PL_numeric_name = stdize_locale(savepv(newnum)); 126 PL_numeric_standard = ((*newnum == 'C' && newnum[1] == '\0') 127 || strEQ(newnum, "POSIX")); 128 PL_numeric_local = TRUE; 129 set_numeric_radix(); 130 } 131 132 #endif /* USE_LOCALE_NUMERIC */ 133 } 134 135 void 136 Perl_set_numeric_standard(pTHX) 137 { 138 #ifdef USE_LOCALE_NUMERIC 139 dVAR; 140 141 if (! PL_numeric_standard) { 142 setlocale(LC_NUMERIC, "C"); 143 PL_numeric_standard = TRUE; 144 PL_numeric_local = FALSE; 145 set_numeric_radix(); 146 } 147 148 #endif /* USE_LOCALE_NUMERIC */ 149 } 150 151 void 152 Perl_set_numeric_local(pTHX) 153 { 154 #ifdef USE_LOCALE_NUMERIC 155 dVAR; 156 157 if (! PL_numeric_local) { 158 setlocale(LC_NUMERIC, PL_numeric_name); 159 PL_numeric_standard = FALSE; 160 PL_numeric_local = TRUE; 161 set_numeric_radix(); 162 } 163 164 #endif /* USE_LOCALE_NUMERIC */ 165 } 166 167 /* 168 * Set up for a new ctype locale. 169 */ 170 void 171 Perl_new_ctype(pTHX_ const char *newctype) 172 { 173 #ifdef USE_LOCALE_CTYPE 174 dVAR; 175 int i; 176 177 PERL_ARGS_ASSERT_NEW_CTYPE; 178 179 for (i = 0; i < 256; i++) { 180 if (isUPPER_LC(i)) 181 PL_fold_locale[i] = toLOWER_LC(i); 182 else if (isLOWER_LC(i)) 183 PL_fold_locale[i] = toUPPER_LC(i); 184 else 185 PL_fold_locale[i] = i; 186 } 187 188 #endif /* USE_LOCALE_CTYPE */ 189 PERL_ARGS_ASSERT_NEW_CTYPE; 190 PERL_UNUSED_ARG(newctype); 191 PERL_UNUSED_CONTEXT; 192 } 193 194 /* 195 * Set up for a new collation locale. 196 */ 197 void 198 Perl_new_collate(pTHX_ const char *newcoll) 199 { 200 #ifdef USE_LOCALE_COLLATE 201 dVAR; 202 203 if (! newcoll) { 204 if (PL_collation_name) { 205 ++PL_collation_ix; 206 Safefree(PL_collation_name); 207 PL_collation_name = NULL; 208 } 209 PL_collation_standard = TRUE; 210 PL_collxfrm_base = 0; 211 PL_collxfrm_mult = 2; 212 return; 213 } 214 215 if (! PL_collation_name || strNE(PL_collation_name, newcoll)) { 216 ++PL_collation_ix; 217 Safefree(PL_collation_name); 218 PL_collation_name = stdize_locale(savepv(newcoll)); 219 PL_collation_standard = ((*newcoll == 'C' && newcoll[1] == '\0') 220 || strEQ(newcoll, "POSIX")); 221 222 { 223 /* 2: at most so many chars ('a', 'b'). */ 224 /* 50: surely no system expands a char more. */ 225 #define XFRMBUFSIZE (2 * 50) 226 char xbuf[XFRMBUFSIZE]; 227 const Size_t fa = strxfrm(xbuf, "a", XFRMBUFSIZE); 228 const Size_t fb = strxfrm(xbuf, "ab", XFRMBUFSIZE); 229 const SSize_t mult = fb - fa; 230 if (mult < 1 && !(fa == 0 && fb == 0)) 231 Perl_croak(aTHX_ "panic: strxfrm() gets absurd - a => %"UVuf", ab => %"UVuf, 232 (UV) fa, (UV) fb); 233 PL_collxfrm_base = (fa > (Size_t)mult) ? (fa - mult) : 0; 234 PL_collxfrm_mult = mult; 235 } 236 } 237 238 #endif /* USE_LOCALE_COLLATE */ 239 } 240 241 /* 242 * Initialize locale awareness. 243 */ 244 int 245 Perl_init_i18nl10n(pTHX_ int printwarn) 246 { 247 int ok = 1; 248 /* returns 249 * 1 = set ok or not applicable, 250 * 0 = fallback to C locale, 251 * -1 = fallback to C locale failed 252 */ 253 254 #if defined(USE_LOCALE) 255 dVAR; 256 257 #ifdef USE_LOCALE_CTYPE 258 char *curctype = NULL; 259 #endif /* USE_LOCALE_CTYPE */ 260 #ifdef USE_LOCALE_COLLATE 261 char *curcoll = NULL; 262 #endif /* USE_LOCALE_COLLATE */ 263 #ifdef USE_LOCALE_NUMERIC 264 char *curnum = NULL; 265 #endif /* USE_LOCALE_NUMERIC */ 266 #ifdef __GLIBC__ 267 char * const language = PerlEnv_getenv("LANGUAGE"); 268 #endif 269 char * const lc_all = PerlEnv_getenv("LC_ALL"); 270 char * const lang = PerlEnv_getenv("LANG"); 271 bool setlocale_failure = FALSE; 272 273 #ifdef LOCALE_ENVIRON_REQUIRED 274 275 /* 276 * Ultrix setlocale(..., "") fails if there are no environment 277 * variables from which to get a locale name. 278 */ 279 280 bool done = FALSE; 281 282 #ifdef LC_ALL 283 if (lang) { 284 if (setlocale(LC_ALL, "")) 285 done = TRUE; 286 else 287 setlocale_failure = TRUE; 288 } 289 if (!setlocale_failure) { 290 #ifdef USE_LOCALE_CTYPE 291 Safefree(curctype); 292 if (! (curctype = 293 setlocale(LC_CTYPE, 294 (!done && (lang || PerlEnv_getenv("LC_CTYPE"))) 295 ? "" : NULL))) 296 setlocale_failure = TRUE; 297 else 298 curctype = savepv(curctype); 299 #endif /* USE_LOCALE_CTYPE */ 300 #ifdef USE_LOCALE_COLLATE 301 Safefree(curcoll); 302 if (! (curcoll = 303 setlocale(LC_COLLATE, 304 (!done && (lang || PerlEnv_getenv("LC_COLLATE"))) 305 ? "" : NULL))) 306 setlocale_failure = TRUE; 307 else 308 curcoll = savepv(curcoll); 309 #endif /* USE_LOCALE_COLLATE */ 310 #ifdef USE_LOCALE_NUMERIC 311 Safefree(curnum); 312 if (! (curnum = 313 setlocale(LC_NUMERIC, 314 (!done && (lang || PerlEnv_getenv("LC_NUMERIC"))) 315 ? "" : NULL))) 316 setlocale_failure = TRUE; 317 else 318 curnum = savepv(curnum); 319 #endif /* USE_LOCALE_NUMERIC */ 320 } 321 322 #endif /* LC_ALL */ 323 324 #endif /* !LOCALE_ENVIRON_REQUIRED */ 325 326 #ifdef LC_ALL 327 if (! setlocale(LC_ALL, "")) 328 setlocale_failure = TRUE; 329 #endif /* LC_ALL */ 330 331 if (!setlocale_failure) { 332 #ifdef USE_LOCALE_CTYPE 333 Safefree(curctype); 334 if (! (curctype = setlocale(LC_CTYPE, ""))) 335 setlocale_failure = TRUE; 336 else 337 curctype = savepv(curctype); 338 #endif /* USE_LOCALE_CTYPE */ 339 #ifdef USE_LOCALE_COLLATE 340 Safefree(curcoll); 341 if (! (curcoll = setlocale(LC_COLLATE, ""))) 342 setlocale_failure = TRUE; 343 else 344 curcoll = savepv(curcoll); 345 #endif /* USE_LOCALE_COLLATE */ 346 #ifdef USE_LOCALE_NUMERIC 347 Safefree(curnum); 348 if (! (curnum = setlocale(LC_NUMERIC, ""))) 349 setlocale_failure = TRUE; 350 else 351 curnum = savepv(curnum); 352 #endif /* USE_LOCALE_NUMERIC */ 353 } 354 355 if (setlocale_failure) { 356 char *p; 357 const bool locwarn = (printwarn > 1 || 358 (printwarn && 359 (!(p = PerlEnv_getenv("PERL_BADLANG")) || atoi(p)))); 360 361 if (locwarn) { 362 #ifdef LC_ALL 363 364 PerlIO_printf(Perl_error_log, 365 "perl: warning: Setting locale failed.\n"); 366 367 #else /* !LC_ALL */ 368 369 PerlIO_printf(Perl_error_log, 370 "perl: warning: Setting locale failed for the categories:\n\t"); 371 #ifdef USE_LOCALE_CTYPE 372 if (! curctype) 373 PerlIO_printf(Perl_error_log, "LC_CTYPE "); 374 #endif /* USE_LOCALE_CTYPE */ 375 #ifdef USE_LOCALE_COLLATE 376 if (! curcoll) 377 PerlIO_printf(Perl_error_log, "LC_COLLATE "); 378 #endif /* USE_LOCALE_COLLATE */ 379 #ifdef USE_LOCALE_NUMERIC 380 if (! curnum) 381 PerlIO_printf(Perl_error_log, "LC_NUMERIC "); 382 #endif /* USE_LOCALE_NUMERIC */ 383 PerlIO_printf(Perl_error_log, "\n"); 384 385 #endif /* LC_ALL */ 386 387 PerlIO_printf(Perl_error_log, 388 "perl: warning: Please check that your locale settings:\n"); 389 390 #ifdef __GLIBC__ 391 PerlIO_printf(Perl_error_log, 392 "\tLANGUAGE = %c%s%c,\n", 393 language ? '"' : '(', 394 language ? language : "unset", 395 language ? '"' : ')'); 396 #endif 397 398 PerlIO_printf(Perl_error_log, 399 "\tLC_ALL = %c%s%c,\n", 400 lc_all ? '"' : '(', 401 lc_all ? lc_all : "unset", 402 lc_all ? '"' : ')'); 403 404 #if defined(USE_ENVIRON_ARRAY) 405 { 406 char **e; 407 for (e = environ; *e; e++) { 408 if (strnEQ(*e, "LC_", 3) 409 && strnNE(*e, "LC_ALL=", 7) 410 && (p = strchr(*e, '='))) 411 PerlIO_printf(Perl_error_log, "\t%.*s = \"%s\",\n", 412 (int)(p - *e), *e, p + 1); 413 } 414 } 415 #else 416 PerlIO_printf(Perl_error_log, 417 "\t(possibly more locale environment variables)\n"); 418 #endif 419 420 PerlIO_printf(Perl_error_log, 421 "\tLANG = %c%s%c\n", 422 lang ? '"' : '(', 423 lang ? lang : "unset", 424 lang ? '"' : ')'); 425 426 PerlIO_printf(Perl_error_log, 427 " are supported and installed on your system.\n"); 428 } 429 430 #ifdef LC_ALL 431 432 if (setlocale(LC_ALL, "C")) { 433 if (locwarn) 434 PerlIO_printf(Perl_error_log, 435 "perl: warning: Falling back to the standard locale (\"C\").\n"); 436 ok = 0; 437 } 438 else { 439 if (locwarn) 440 PerlIO_printf(Perl_error_log, 441 "perl: warning: Failed to fall back to the standard locale (\"C\").\n"); 442 ok = -1; 443 } 444 445 #else /* ! LC_ALL */ 446 447 if (0 448 #ifdef USE_LOCALE_CTYPE 449 || !(curctype || setlocale(LC_CTYPE, "C")) 450 #endif /* USE_LOCALE_CTYPE */ 451 #ifdef USE_LOCALE_COLLATE 452 || !(curcoll || setlocale(LC_COLLATE, "C")) 453 #endif /* USE_LOCALE_COLLATE */ 454 #ifdef USE_LOCALE_NUMERIC 455 || !(curnum || setlocale(LC_NUMERIC, "C")) 456 #endif /* USE_LOCALE_NUMERIC */ 457 ) 458 { 459 if (locwarn) 460 PerlIO_printf(Perl_error_log, 461 "perl: warning: Cannot fall back to the standard locale (\"C\").\n"); 462 ok = -1; 463 } 464 465 #endif /* ! LC_ALL */ 466 467 #ifdef USE_LOCALE_CTYPE 468 Safefree(curctype); 469 curctype = savepv(setlocale(LC_CTYPE, NULL)); 470 #endif /* USE_LOCALE_CTYPE */ 471 #ifdef USE_LOCALE_COLLATE 472 Safefree(curcoll); 473 curcoll = savepv(setlocale(LC_COLLATE, NULL)); 474 #endif /* USE_LOCALE_COLLATE */ 475 #ifdef USE_LOCALE_NUMERIC 476 Safefree(curnum); 477 curnum = savepv(setlocale(LC_NUMERIC, NULL)); 478 #endif /* USE_LOCALE_NUMERIC */ 479 } 480 else { 481 482 #ifdef USE_LOCALE_CTYPE 483 new_ctype(curctype); 484 #endif /* USE_LOCALE_CTYPE */ 485 486 #ifdef USE_LOCALE_COLLATE 487 new_collate(curcoll); 488 #endif /* USE_LOCALE_COLLATE */ 489 490 #ifdef USE_LOCALE_NUMERIC 491 new_numeric(curnum); 492 #endif /* USE_LOCALE_NUMERIC */ 493 494 } 495 496 #endif /* USE_LOCALE */ 497 498 #ifdef USE_PERLIO 499 { 500 /* Set PL_utf8locale to TRUE if using PerlIO _and_ 501 any of the following are true: 502 - nl_langinfo(CODESET) contains /^utf-?8/i 503 - $ENV{LC_ALL} contains /^utf-?8/i 504 - $ENV{LC_CTYPE} contains /^utf-?8/i 505 - $ENV{LANG} contains /^utf-?8/i 506 The LC_ALL, LC_CTYPE, LANG obey the usual override 507 hierarchy of locale environment variables. (LANGUAGE 508 affects only LC_MESSAGES only under glibc.) (If present, 509 it overrides LC_MESSAGES for GNU gettext, and it also 510 can have more than one locale, separated by spaces, 511 in case you need to know.) 512 If PL_utf8locale and PL_unicode (set by -C or by $ENV{PERL_UNICODE}) 513 are true, perl.c:S_parse_body() will turn on the PerlIO :utf8 layer 514 on STDIN, STDOUT, STDERR, _and_ the default open discipline. 515 */ 516 bool utf8locale = FALSE; 517 char *codeset = NULL; 518 #if defined(HAS_NL_LANGINFO) && defined(CODESET) 519 codeset = nl_langinfo(CODESET); 520 #endif 521 if (codeset) 522 utf8locale = (foldEQ(codeset, STR_WITH_LEN("UTF-8")) 523 || foldEQ(codeset, STR_WITH_LEN("UTF8") )); 524 #if defined(USE_LOCALE) 525 else { /* nl_langinfo(CODESET) is supposed to correctly 526 * interpret the locale environment variables, 527 * but just in case it fails, let's do this manually. */ 528 if (lang) 529 utf8locale = (foldEQ(lang, STR_WITH_LEN("UTF-8")) 530 || foldEQ(lang, STR_WITH_LEN("UTF8") )); 531 #ifdef USE_LOCALE_CTYPE 532 if (curctype) 533 utf8locale = (foldEQ(curctype, STR_WITH_LEN("UTF-8")) 534 || foldEQ(curctype, STR_WITH_LEN("UTF8") )); 535 #endif 536 if (lc_all) 537 utf8locale = (foldEQ(lc_all, STR_WITH_LEN("UTF-8")) 538 || foldEQ(lc_all, STR_WITH_LEN("UTF8") )); 539 } 540 #endif /* USE_LOCALE */ 541 if (utf8locale) 542 PL_utf8locale = TRUE; 543 } 544 /* Set PL_unicode to $ENV{PERL_UNICODE} if using PerlIO. 545 This is an alternative to using the -C command line switch 546 (the -C if present will override this). */ 547 { 548 const char *p = PerlEnv_getenv("PERL_UNICODE"); 549 PL_unicode = p ? parse_unicode_opts(&p) : 0; 550 if (PL_unicode & PERL_UNICODE_UTF8CACHEASSERT_FLAG) 551 PL_utf8cache = -1; 552 } 553 #endif 554 555 #ifdef USE_LOCALE_CTYPE 556 Safefree(curctype); 557 #endif /* USE_LOCALE_CTYPE */ 558 #ifdef USE_LOCALE_COLLATE 559 Safefree(curcoll); 560 #endif /* USE_LOCALE_COLLATE */ 561 #ifdef USE_LOCALE_NUMERIC 562 Safefree(curnum); 563 #endif /* USE_LOCALE_NUMERIC */ 564 return ok; 565 } 566 567 #ifdef USE_LOCALE_COLLATE 568 569 /* 570 * mem_collxfrm() is a bit like strxfrm() but with two important 571 * differences. First, it handles embedded NULs. Second, it allocates 572 * a bit more memory than needed for the transformed data itself. 573 * The real transformed data begins at offset sizeof(collationix). 574 * Please see sv_collxfrm() to see how this is used. 575 */ 576 577 char * 578 Perl_mem_collxfrm(pTHX_ const char *s, STRLEN len, STRLEN *xlen) 579 { 580 dVAR; 581 char *xbuf; 582 STRLEN xAlloc, xin, xout; /* xalloc is a reserved word in VC */ 583 584 PERL_ARGS_ASSERT_MEM_COLLXFRM; 585 586 /* the first sizeof(collationix) bytes are used by sv_collxfrm(). */ 587 /* the +1 is for the terminating NUL. */ 588 589 xAlloc = sizeof(PL_collation_ix) + PL_collxfrm_base + (PL_collxfrm_mult * len) + 1; 590 Newx(xbuf, xAlloc, char); 591 if (! xbuf) 592 goto bad; 593 594 *(U32*)xbuf = PL_collation_ix; 595 xout = sizeof(PL_collation_ix); 596 for (xin = 0; xin < len; ) { 597 Size_t xused; 598 599 for (;;) { 600 xused = strxfrm(xbuf + xout, s + xin, xAlloc - xout); 601 if (xused >= PERL_INT_MAX) 602 goto bad; 603 if ((STRLEN)xused < xAlloc - xout) 604 break; 605 xAlloc = (2 * xAlloc) + 1; 606 Renew(xbuf, xAlloc, char); 607 if (! xbuf) 608 goto bad; 609 } 610 611 xin += strlen(s + xin) + 1; 612 xout += xused; 613 614 /* Embedded NULs are understood but silently skipped 615 * because they make no sense in locale collation. */ 616 } 617 618 xbuf[xout] = '\0'; 619 *xlen = xout - sizeof(PL_collation_ix); 620 return xbuf; 621 622 bad: 623 Safefree(xbuf); 624 *xlen = 0; 625 return NULL; 626 } 627 628 #endif /* USE_LOCALE_COLLATE */ 629 630 /* 631 * Local variables: 632 * c-indentation-style: bsd 633 * c-basic-offset: 4 634 * indent-tabs-mode: nil 635 * End: 636 * 637 * ex: set ts=8 sts=4 sw=4 et: 638 */ 639