1 /* locale.c 2 * 3 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 4 * 2002, 2003, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * A Elbereth Gilthoniel, 13 * silivren penna míriel 14 * o menel aglar elenath! 15 * Na-chaered palan-díriel 16 * o galadhremmin ennorath, 17 * Fanuilos, le linnathon 18 * nef aear, si nef aearon! 19 * 20 * [p.238 of _The Lord of the Rings_, II/i: "Many Meetings"] 21 */ 22 23 /* utility functions for handling locale-specific stuff like what 24 * character represents the decimal point. 25 * 26 * All C programs have an underlying locale. Perl generally doesn't pay any 27 * attention to it except within the scope of a 'use locale'. For most 28 * categories, it accomplishes this by just using different operations if it is 29 * in such scope than if not. However, various libc functions called by Perl 30 * are affected by the LC_NUMERIC category, so there are macros in perl.h that 31 * are used to toggle between the current locale and the C locale depending on 32 * the desired behavior of those functions at the moment. 33 */ 34 35 #include "EXTERN.h" 36 #define PERL_IN_LOCALE_C 37 #include "perl.h" 38 39 #ifdef I_LANGINFO 40 # include <langinfo.h> 41 #endif 42 43 #include "reentr.h" 44 45 #ifdef USE_LOCALE 46 47 /* 48 * Standardize the locale name from a string returned by 'setlocale', possibly 49 * modifying that string. 50 * 51 * The typical return value of setlocale() is either 52 * (1) "xx_YY" if the first argument of setlocale() is not LC_ALL 53 * (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL 54 * (the space-separated values represent the various sublocales, 55 * in some unspecified order). This is not handled by this function. 56 * 57 * In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n", 58 * which is harmful for further use of the string in setlocale(). This 59 * function removes the trailing new line and everything up through the '=' 60 * 61 */ 62 STATIC char * 63 S_stdize_locale(pTHX_ char *locs) 64 { 65 const char * const s = strchr(locs, '='); 66 bool okay = TRUE; 67 68 PERL_ARGS_ASSERT_STDIZE_LOCALE; 69 70 if (s) { 71 const char * const t = strchr(s, '.'); 72 okay = FALSE; 73 if (t) { 74 const char * const u = strchr(t, '\n'); 75 if (u && (u[1] == 0)) { 76 const STRLEN len = u - s; 77 Move(s + 1, locs, len, char); 78 locs[len] = 0; 79 okay = TRUE; 80 } 81 } 82 } 83 84 if (!okay) 85 Perl_croak(aTHX_ "Can't fix broken locale name \"%s\"", locs); 86 87 return locs; 88 } 89 90 #endif 91 92 void 93 Perl_set_numeric_radix(pTHX) 94 { 95 #ifdef USE_LOCALE_NUMERIC 96 dVAR; 97 # ifdef HAS_LOCALECONV 98 const struct lconv* const lc = localeconv(); 99 100 if (lc && lc->decimal_point) { 101 if (lc->decimal_point[0] == '.' && lc->decimal_point[1] == 0) { 102 SvREFCNT_dec(PL_numeric_radix_sv); 103 PL_numeric_radix_sv = NULL; 104 } 105 else { 106 if (PL_numeric_radix_sv) 107 sv_setpv(PL_numeric_radix_sv, lc->decimal_point); 108 else 109 PL_numeric_radix_sv = newSVpv(lc->decimal_point, 0); 110 if (! is_ascii_string((U8 *) lc->decimal_point, 0) 111 && is_utf8_string((U8 *) lc->decimal_point, 0) 112 && is_cur_LC_category_utf8(LC_NUMERIC)) 113 { 114 SvUTF8_on(PL_numeric_radix_sv); 115 } 116 } 117 } 118 else 119 PL_numeric_radix_sv = NULL; 120 121 DEBUG_L(PerlIO_printf(Perl_debug_log, "Locale radix is %s\n", 122 (PL_numeric_radix_sv) 123 ? lc->decimal_point 124 : "NULL")); 125 126 # endif /* HAS_LOCALECONV */ 127 #endif /* USE_LOCALE_NUMERIC */ 128 } 129 130 void 131 Perl_new_numeric(pTHX_ const char *newnum) 132 { 133 #ifdef USE_LOCALE_NUMERIC 134 135 /* Called after all libc setlocale() calls affecting LC_NUMERIC, to tell 136 * core Perl this and that 'newnum' is the name of the new locale. 137 * It installs this locale as the current underlying default. 138 * 139 * The default locale and the C locale can be toggled between by use of the 140 * set_numeric_local() and set_numeric_standard() functions, which should 141 * probably not be called directly, but only via macros like 142 * SET_NUMERIC_STANDARD() in perl.h. 143 * 144 * The toggling is necessary mainly so that a non-dot radix decimal point 145 * character can be output, while allowing internal calculations to use a 146 * dot. 147 * 148 * This sets several interpreter-level variables: 149 * PL_numeric_name The default locale's name: a copy of 'newnum' 150 * PL_numeric_local A boolean indicating if the toggled state is such 151 * that the current locale is the default locale 152 * PL_numeric_standard A boolean indicating if the toggled state is such 153 * that the current locale is the C locale 154 * Note that both of the last two variables can be true at the same time, 155 * if the underlying locale is C. (Toggling is a no-op under these 156 * circumstances.) 157 * 158 * Any code changing the locale (outside this file) should use 159 * POSIX::setlocale, which calls this function. Therefore this function 160 * should be called directly only from this file and from 161 * POSIX::setlocale() */ 162 163 char *save_newnum; 164 dVAR; 165 166 if (! newnum) { 167 Safefree(PL_numeric_name); 168 PL_numeric_name = NULL; 169 PL_numeric_standard = TRUE; 170 PL_numeric_local = TRUE; 171 return; 172 } 173 174 save_newnum = stdize_locale(savepv(newnum)); 175 if (! PL_numeric_name || strNE(PL_numeric_name, save_newnum)) { 176 Safefree(PL_numeric_name); 177 PL_numeric_name = save_newnum; 178 } 179 180 PL_numeric_standard = ((*save_newnum == 'C' && save_newnum[1] == '\0') 181 || strEQ(save_newnum, "POSIX")); 182 PL_numeric_local = TRUE; 183 set_numeric_radix(); 184 185 #endif /* USE_LOCALE_NUMERIC */ 186 } 187 188 void 189 Perl_set_numeric_standard(pTHX) 190 { 191 #ifdef USE_LOCALE_NUMERIC 192 dVAR; 193 194 /* Toggle the LC_NUMERIC locale to C, if not already there. Probably 195 * should use the macros like SET_NUMERIC_STANDARD() in perl.h instead of 196 * calling this directly. */ 197 198 if (! PL_numeric_standard) { 199 setlocale(LC_NUMERIC, "C"); 200 PL_numeric_standard = TRUE; 201 PL_numeric_local = FALSE; 202 set_numeric_radix(); 203 } 204 DEBUG_L(PerlIO_printf(Perl_debug_log, 205 "Underlying LC_NUMERIC locale now is C\n")); 206 207 #endif /* USE_LOCALE_NUMERIC */ 208 } 209 210 void 211 Perl_set_numeric_local(pTHX) 212 { 213 #ifdef USE_LOCALE_NUMERIC 214 dVAR; 215 216 /* Toggle the LC_NUMERIC locale to the current underlying default, if not 217 * already there. Probably should use the macros like SET_NUMERIC_LOCAL() 218 * in perl.h instead of calling this directly. */ 219 220 if (! PL_numeric_local) { 221 setlocale(LC_NUMERIC, PL_numeric_name); 222 PL_numeric_standard = FALSE; 223 PL_numeric_local = TRUE; 224 set_numeric_radix(); 225 } 226 DEBUG_L(PerlIO_printf(Perl_debug_log, 227 "Underlying LC_NUMERIC locale now is %s\n", 228 PL_numeric_name)); 229 230 #endif /* USE_LOCALE_NUMERIC */ 231 } 232 233 /* 234 * Set up for a new ctype locale. 235 */ 236 void 237 Perl_new_ctype(pTHX_ const char *newctype) 238 { 239 #ifdef USE_LOCALE_CTYPE 240 241 /* Called after all libc setlocale() calls affecting LC_CTYPE, to tell 242 * core Perl this and that 'newctype' is the name of the new locale. 243 * 244 * This function sets up the folding arrays for all 256 bytes, assuming 245 * that tofold() is tolc() since fold case is not a concept in POSIX, 246 * 247 * Any code changing the locale (outside this file) should use 248 * POSIX::setlocale, which calls this function. Therefore this function 249 * should be called directly only from this file and from 250 * POSIX::setlocale() */ 251 252 dVAR; 253 UV i; 254 255 PERL_ARGS_ASSERT_NEW_CTYPE; 256 257 PL_in_utf8_CTYPE_locale = is_cur_LC_category_utf8(LC_CTYPE); 258 259 /* A UTF-8 locale gets standard rules. But note that code still has to 260 * handle this specially because of the three problematic code points */ 261 if (PL_in_utf8_CTYPE_locale) { 262 Copy(PL_fold_latin1, PL_fold_locale, 256, U8); 263 } 264 else { 265 for (i = 0; i < 256; i++) { 266 if (isUPPER_LC((U8) i)) 267 PL_fold_locale[i] = (U8) toLOWER_LC((U8) i); 268 else if (isLOWER_LC((U8) i)) 269 PL_fold_locale[i] = (U8) toUPPER_LC((U8) i); 270 else 271 PL_fold_locale[i] = (U8) i; 272 } 273 } 274 275 #endif /* USE_LOCALE_CTYPE */ 276 PERL_ARGS_ASSERT_NEW_CTYPE; 277 PERL_UNUSED_ARG(newctype); 278 PERL_UNUSED_CONTEXT; 279 } 280 281 void 282 Perl_new_collate(pTHX_ const char *newcoll) 283 { 284 #ifdef USE_LOCALE_COLLATE 285 286 /* Called after all libc setlocale() calls affecting LC_COLLATE, to tell 287 * core Perl this and that 'newcoll' is the name of the new locale. 288 * 289 * Any code changing the locale (outside this file) should use 290 * POSIX::setlocale, which calls this function. Therefore this function 291 * should be called directly only from this file and from 292 * POSIX::setlocale() */ 293 294 dVAR; 295 296 if (! newcoll) { 297 if (PL_collation_name) { 298 ++PL_collation_ix; 299 Safefree(PL_collation_name); 300 PL_collation_name = NULL; 301 } 302 PL_collation_standard = TRUE; 303 PL_collxfrm_base = 0; 304 PL_collxfrm_mult = 2; 305 return; 306 } 307 308 if (! PL_collation_name || strNE(PL_collation_name, newcoll)) { 309 ++PL_collation_ix; 310 Safefree(PL_collation_name); 311 PL_collation_name = stdize_locale(savepv(newcoll)); 312 PL_collation_standard = ((*newcoll == 'C' && newcoll[1] == '\0') 313 || strEQ(newcoll, "POSIX")); 314 315 { 316 /* 2: at most so many chars ('a', 'b'). */ 317 /* 50: surely no system expands a char more. */ 318 #define XFRMBUFSIZE (2 * 50) 319 char xbuf[XFRMBUFSIZE]; 320 const Size_t fa = strxfrm(xbuf, "a", XFRMBUFSIZE); 321 const Size_t fb = strxfrm(xbuf, "ab", XFRMBUFSIZE); 322 const SSize_t mult = fb - fa; 323 if (mult < 1 && !(fa == 0 && fb == 0)) 324 Perl_croak(aTHX_ "panic: strxfrm() gets absurd - a => %"UVuf", ab => %"UVuf, 325 (UV) fa, (UV) fb); 326 PL_collxfrm_base = (fa > (Size_t)mult) ? (fa - mult) : 0; 327 PL_collxfrm_mult = mult; 328 } 329 } 330 331 #endif /* USE_LOCALE_COLLATE */ 332 } 333 334 #ifdef WIN32 335 336 char * 337 Perl_my_setlocale(pTHX_ int category, const char* locale) 338 { 339 /* This, for Windows, emulates POSIX setlocale() behavior. There is no 340 * difference unless the input locale is "", which means on Windows to get 341 * the machine default, which is set via the computer's "Regional and 342 * Language Options" (or its current equivalent). In POSIX, it instead 343 * means to find the locale from the user's environment. This routine 344 * looks in the environment, and, if anything is found, uses that instead 345 * of going to the machine default. If there is no environment override, 346 * the machine default is used, as normal, by calling the real setlocale() 347 * with "". The POSIX behavior is to use the LC_ALL variable if set; 348 * otherwise to use the particular category's variable if set; otherwise to 349 * use the LANG variable. */ 350 351 bool override_LC_ALL = FALSE; 352 char * result; 353 354 if (locale && strEQ(locale, "")) { 355 # ifdef LC_ALL 356 locale = PerlEnv_getenv("LC_ALL"); 357 if (! locale) { 358 #endif 359 switch (category) { 360 # ifdef LC_ALL 361 case LC_ALL: 362 override_LC_ALL = TRUE; 363 break; /* We already know its variable isn't set */ 364 # endif 365 # ifdef USE_LOCALE_TIME 366 case LC_TIME: 367 locale = PerlEnv_getenv("LC_TIME"); 368 break; 369 # endif 370 # ifdef USE_LOCALE_CTYPE 371 case LC_CTYPE: 372 locale = PerlEnv_getenv("LC_CTYPE"); 373 break; 374 # endif 375 # ifdef USE_LOCALE_COLLATE 376 case LC_COLLATE: 377 locale = PerlEnv_getenv("LC_COLLATE"); 378 break; 379 # endif 380 # ifdef USE_LOCALE_MONETARY 381 case LC_MONETARY: 382 locale = PerlEnv_getenv("LC_MONETARY"); 383 break; 384 # endif 385 # ifdef USE_LOCALE_NUMERIC 386 case LC_NUMERIC: 387 locale = PerlEnv_getenv("LC_NUMERIC"); 388 break; 389 # endif 390 # ifdef USE_LOCALE_MESSAGES 391 case LC_MESSAGES: 392 locale = PerlEnv_getenv("LC_MESSAGES"); 393 break; 394 # endif 395 default: 396 /* This is a category, like PAPER_SIZE that we don't 397 * know about; and so can't provide a wrapper. */ 398 break; 399 } 400 if (! locale) { 401 locale = PerlEnv_getenv("LANG"); 402 if (! locale) { 403 locale = ""; 404 } 405 } 406 # ifdef LC_ALL 407 } 408 # endif 409 } 410 411 result = setlocale(category, locale); 412 413 if (! override_LC_ALL) { 414 return result; 415 } 416 417 /* Here the input locale was LC_ALL, and we have set it to what is in the 418 * LANG variable or the system default if there is no LANG. But these have 419 * lower priority than the other LC_foo variables, so override it for each 420 * one that is set. (If they are set to "", it means to use the same thing 421 * we just set LC_ALL to, so can skip) */ 422 # ifdef USE_LOCALE_TIME 423 result = PerlEnv_getenv("LC_TIME"); 424 if (result && strNE(result, "")) { 425 setlocale(LC_TIME, result); 426 } 427 # endif 428 # ifdef USE_LOCALE_CTYPE 429 result = PerlEnv_getenv("LC_CTYPE"); 430 if (result && strNE(result, "")) { 431 setlocale(LC_CTYPE, result); 432 } 433 # endif 434 # ifdef USE_LOCALE_COLLATE 435 result = PerlEnv_getenv("LC_COLLATE"); 436 if (result && strNE(result, "")) { 437 setlocale(LC_COLLATE, result); 438 } 439 # endif 440 # ifdef USE_LOCALE_MONETARY 441 result = PerlEnv_getenv("LC_MONETARY"); 442 if (result && strNE(result, "")) { 443 setlocale(LC_MONETARY, result); 444 } 445 # endif 446 # ifdef USE_LOCALE_NUMERIC 447 result = PerlEnv_getenv("LC_NUMERIC"); 448 if (result && strNE(result, "")) { 449 setlocale(LC_NUMERIC, result); 450 } 451 # endif 452 # ifdef USE_LOCALE_MESSAGES 453 result = PerlEnv_getenv("LC_MESSAGES"); 454 if (result && strNE(result, "")) { 455 setlocale(LC_MESSAGES, result); 456 } 457 # endif 458 459 return setlocale(LC_ALL, NULL); 460 461 } 462 463 #endif 464 465 466 /* 467 * Initialize locale awareness. 468 */ 469 int 470 Perl_init_i18nl10n(pTHX_ int printwarn) 471 { 472 /* printwarn is 473 * 474 * 0 if not to output warning when setup locale is bad 475 * 1 if to output warning based on value of PERL_BADLANG 476 * >1 if to output regardless of PERL_BADLANG 477 * 478 * returns 479 * 1 = set ok or not applicable, 480 * 0 = fallback to a locale of lower priority 481 * -1 = fallback to all locales failed, not even to the C locale 482 */ 483 484 int ok = 1; 485 486 #if defined(USE_LOCALE) 487 dVAR; 488 489 #ifdef USE_LOCALE_CTYPE 490 char *curctype = NULL; 491 #endif /* USE_LOCALE_CTYPE */ 492 #ifdef USE_LOCALE_COLLATE 493 char *curcoll = NULL; 494 #endif /* USE_LOCALE_COLLATE */ 495 #ifdef USE_LOCALE_NUMERIC 496 char *curnum = NULL; 497 #endif /* USE_LOCALE_NUMERIC */ 498 #ifdef __GLIBC__ 499 const char * const language = savepv(PerlEnv_getenv("LANGUAGE")); 500 #endif 501 502 /* NULL uses the existing already set up locale */ 503 const char * const setlocale_init = (PerlEnv_getenv("PERL_SKIP_LOCALE_INIT")) 504 ? NULL 505 : ""; 506 const char* trial_locales[5]; /* 5 = 1 each for "", LC_ALL, LANG, "", C */ 507 unsigned int trial_locales_count; 508 const char * const lc_all = savepv(PerlEnv_getenv("LC_ALL")); 509 const char * const lang = savepv(PerlEnv_getenv("LANG")); 510 bool setlocale_failure = FALSE; 511 unsigned int i; 512 char *p; 513 514 /* A later getenv() could zap this, so only use here */ 515 const char * const bad_lang_use_once = PerlEnv_getenv("PERL_BADLANG"); 516 517 const bool locwarn = (printwarn > 1 518 || (printwarn 519 && (! bad_lang_use_once 520 || atoi(bad_lang_use_once)))); 521 bool done = FALSE; 522 const char *system_default_locale = NULL; 523 524 525 #ifndef LOCALE_ENVIRON_REQUIRED 526 PERL_UNUSED_VAR(done); 527 #else 528 529 /* 530 * Ultrix setlocale(..., "") fails if there are no environment 531 * variables from which to get a locale name. 532 */ 533 534 # ifdef LC_ALL 535 if (lang) { 536 if (my_setlocale(LC_ALL, setlocale_init)) 537 done = TRUE; 538 else 539 setlocale_failure = TRUE; 540 } 541 if (!setlocale_failure) { 542 # ifdef USE_LOCALE_CTYPE 543 Safefree(curctype); 544 if (! (curctype = 545 my_setlocale(LC_CTYPE, 546 (!done && (lang || PerlEnv_getenv("LC_CTYPE"))) 547 ? setlocale_init : NULL))) 548 setlocale_failure = TRUE; 549 else 550 curctype = savepv(curctype); 551 # endif /* USE_LOCALE_CTYPE */ 552 # ifdef USE_LOCALE_COLLATE 553 Safefree(curcoll); 554 if (! (curcoll = 555 my_setlocale(LC_COLLATE, 556 (!done && (lang || PerlEnv_getenv("LC_COLLATE"))) 557 ? setlocale_init : NULL))) 558 setlocale_failure = TRUE; 559 else 560 curcoll = savepv(curcoll); 561 # endif /* USE_LOCALE_COLLATE */ 562 # ifdef USE_LOCALE_NUMERIC 563 Safefree(curnum); 564 if (! (curnum = 565 my_setlocale(LC_NUMERIC, 566 (!done && (lang || PerlEnv_getenv("LC_NUMERIC"))) 567 ? setlocale_init : NULL))) 568 setlocale_failure = TRUE; 569 else 570 curnum = savepv(curnum); 571 # endif /* USE_LOCALE_NUMERIC */ 572 # ifdef USE_LOCALE_MESSAGES 573 if (! my_setlocale(LC_MESSAGES, 574 (!done && (lang || PerlEnv_getenv("LC_MESSAGES"))) 575 ? setlocale_init : NULL)) 576 { 577 setlocale_failure = TRUE; 578 } 579 # endif /* USE_LOCALE_MESSAGES */ 580 # ifdef USE_LOCALE_MONETARY 581 if (! my_setlocale(LC_MONETARY, 582 (!done && (lang || PerlEnv_getenv("LC_MONETARY"))) 583 ? setlocale_init : NULL)) 584 { 585 setlocale_failure = TRUE; 586 } 587 # endif /* USE_LOCALE_MONETARY */ 588 } 589 590 # endif /* LC_ALL */ 591 592 #endif /* !LOCALE_ENVIRON_REQUIRED */ 593 594 /* We try each locale in the list until we get one that works, or exhaust 595 * the list */ 596 trial_locales[0] = setlocale_init; 597 trial_locales_count = 1; 598 for (i= 0; i < trial_locales_count; i++) { 599 const char * trial_locale = trial_locales[i]; 600 601 if (i > 0) { 602 603 /* XXX This is to preserve old behavior for LOCALE_ENVIRON_REQUIRED 604 * when i==0, but I (khw) don't think that behavior makes much 605 * sense */ 606 setlocale_failure = FALSE; 607 608 #ifdef WIN32 609 610 /* On Windows machines, an entry of "" after the 0th means to use 611 * the system default locale, which we now proceed to get. */ 612 if (strEQ(trial_locale, "")) { 613 unsigned int j; 614 615 /* Note that this may change the locale, but we are going to do 616 * that anyway just below */ 617 system_default_locale = setlocale(LC_ALL, ""); 618 619 /* Skip if invalid or it's already on the list of locales to 620 * try */ 621 if (! system_default_locale) { 622 goto next_iteration; 623 } 624 for (j = 0; j < trial_locales_count; j++) { 625 if (strEQ(system_default_locale, trial_locales[j])) { 626 goto next_iteration; 627 } 628 } 629 630 trial_locale = system_default_locale; 631 } 632 #endif 633 } 634 635 #ifdef LC_ALL 636 if (! my_setlocale(LC_ALL, trial_locale)) { 637 setlocale_failure = TRUE; 638 } 639 else { 640 /* Since LC_ALL succeeded, it should have changed all the other 641 * categories it can to its value; so we massage things so that the 642 * setlocales below just return their category's current values. 643 * This adequately handles the case in NetBSD where LC_COLLATE may 644 * not be defined for a locale, and setting it individually will 645 * fail, whereas setting LC_ALL suceeds, leaving LC_COLLATE set to 646 * the POSIX locale. */ 647 trial_locale = NULL; 648 } 649 #endif /* LC_ALL */ 650 651 if (!setlocale_failure) { 652 #ifdef USE_LOCALE_CTYPE 653 Safefree(curctype); 654 if (! (curctype = my_setlocale(LC_CTYPE, trial_locale))) 655 setlocale_failure = TRUE; 656 else 657 curctype = savepv(curctype); 658 #endif /* USE_LOCALE_CTYPE */ 659 #ifdef USE_LOCALE_COLLATE 660 Safefree(curcoll); 661 if (! (curcoll = my_setlocale(LC_COLLATE, trial_locale))) 662 setlocale_failure = TRUE; 663 else 664 curcoll = savepv(curcoll); 665 #endif /* USE_LOCALE_COLLATE */ 666 #ifdef USE_LOCALE_NUMERIC 667 Safefree(curnum); 668 if (! (curnum = my_setlocale(LC_NUMERIC, trial_locale))) 669 setlocale_failure = TRUE; 670 else 671 curnum = savepv(curnum); 672 #endif /* USE_LOCALE_NUMERIC */ 673 #ifdef USE_LOCALE_MESSAGES 674 if (! (my_setlocale(LC_MESSAGES, trial_locale))) 675 setlocale_failure = TRUE; 676 #endif /* USE_LOCALE_MESSAGES */ 677 #ifdef USE_LOCALE_MONETARY 678 if (! (my_setlocale(LC_MONETARY, trial_locale))) 679 setlocale_failure = TRUE; 680 #endif /* USE_LOCALE_MONETARY */ 681 682 if (! setlocale_failure) { /* Success */ 683 break; 684 } 685 } 686 687 /* Here, something failed; will need to try a fallback. */ 688 ok = 0; 689 690 if (i == 0) { 691 unsigned int j; 692 693 if (locwarn) { /* Output failure info only on the first one */ 694 #ifdef LC_ALL 695 696 PerlIO_printf(Perl_error_log, 697 "perl: warning: Setting locale failed.\n"); 698 699 #else /* !LC_ALL */ 700 701 PerlIO_printf(Perl_error_log, 702 "perl: warning: Setting locale failed for the categories:\n\t"); 703 #ifdef USE_LOCALE_CTYPE 704 if (! curctype) 705 PerlIO_printf(Perl_error_log, "LC_CTYPE "); 706 #endif /* USE_LOCALE_CTYPE */ 707 #ifdef USE_LOCALE_COLLATE 708 if (! curcoll) 709 PerlIO_printf(Perl_error_log, "LC_COLLATE "); 710 #endif /* USE_LOCALE_COLLATE */ 711 #ifdef USE_LOCALE_NUMERIC 712 if (! curnum) 713 PerlIO_printf(Perl_error_log, "LC_NUMERIC "); 714 #endif /* USE_LOCALE_NUMERIC */ 715 PerlIO_printf(Perl_error_log, "and possibly others\n"); 716 717 #endif /* LC_ALL */ 718 719 PerlIO_printf(Perl_error_log, 720 "perl: warning: Please check that your locale settings:\n"); 721 722 #ifdef __GLIBC__ 723 PerlIO_printf(Perl_error_log, 724 "\tLANGUAGE = %c%s%c,\n", 725 language ? '"' : '(', 726 language ? language : "unset", 727 language ? '"' : ')'); 728 #endif 729 730 PerlIO_printf(Perl_error_log, 731 "\tLC_ALL = %c%s%c,\n", 732 lc_all ? '"' : '(', 733 lc_all ? lc_all : "unset", 734 lc_all ? '"' : ')'); 735 736 #if defined(USE_ENVIRON_ARRAY) 737 { 738 char **e; 739 for (e = environ; *e; e++) { 740 if (strnEQ(*e, "LC_", 3) 741 && strnNE(*e, "LC_ALL=", 7) 742 && (p = strchr(*e, '='))) 743 PerlIO_printf(Perl_error_log, "\t%.*s = \"%s\",\n", 744 (int)(p - *e), *e, p + 1); 745 } 746 } 747 #else 748 PerlIO_printf(Perl_error_log, 749 "\t(possibly more locale environment variables)\n"); 750 #endif 751 752 PerlIO_printf(Perl_error_log, 753 "\tLANG = %c%s%c\n", 754 lang ? '"' : '(', 755 lang ? lang : "unset", 756 lang ? '"' : ')'); 757 758 PerlIO_printf(Perl_error_log, 759 " are supported and installed on your system.\n"); 760 } 761 762 /* Calculate what fallback locales to try. We have avoided this 763 * until we have to, becuase failure is quite unlikely. This will 764 * usually change the upper bound of the loop we are in. 765 * 766 * Since the system's default way of setting the locale has not 767 * found one that works, We use Perl's defined ordering: LC_ALL, 768 * LANG, and the C locale. We don't try the same locale twice, so 769 * don't add to the list if already there. (On POSIX systems, the 770 * LC_ALL element will likely be a repeat of the 0th element "", 771 * but there's no harm done by doing it explicitly */ 772 if (lc_all) { 773 for (j = 0; j < trial_locales_count; j++) { 774 if (strEQ(lc_all, trial_locales[j])) { 775 goto done_lc_all; 776 } 777 } 778 trial_locales[trial_locales_count++] = lc_all; 779 } 780 done_lc_all: 781 782 if (lang) { 783 for (j = 0; j < trial_locales_count; j++) { 784 if (strEQ(lang, trial_locales[j])) { 785 goto done_lang; 786 } 787 } 788 trial_locales[trial_locales_count++] = lang; 789 } 790 done_lang: 791 792 #if defined(WIN32) && defined(LC_ALL) 793 /* For Windows, we also try the system default locale before "C". 794 * (If there exists a Windows without LC_ALL we skip this because 795 * it gets too complicated. For those, the "C" is the next 796 * fallback possibility). The "" is the same as the 0th element of 797 * the array, but the code at the loop above knows to treat it 798 * differently when not the 0th */ 799 trial_locales[trial_locales_count++] = ""; 800 #endif 801 802 for (j = 0; j < trial_locales_count; j++) { 803 if (strEQ("C", trial_locales[j])) { 804 goto done_C; 805 } 806 } 807 trial_locales[trial_locales_count++] = "C"; 808 809 done_C: ; 810 } /* end of first time through the loop */ 811 812 #ifdef WIN32 813 next_iteration: ; 814 #endif 815 816 } /* end of looping through the trial locales */ 817 818 if (ok < 1) { /* If we tried to fallback */ 819 const char* msg; 820 if (! setlocale_failure) { /* fallback succeeded */ 821 msg = "Falling back to"; 822 } 823 else { /* fallback failed */ 824 825 /* We dropped off the end of the loop, so have to decrement i to 826 * get back to the value the last time through */ 827 i--; 828 829 ok = -1; 830 msg = "Failed to fall back to"; 831 832 /* To continue, we should use whatever values we've got */ 833 #ifdef USE_LOCALE_CTYPE 834 Safefree(curctype); 835 curctype = savepv(setlocale(LC_CTYPE, NULL)); 836 #endif /* USE_LOCALE_CTYPE */ 837 #ifdef USE_LOCALE_COLLATE 838 Safefree(curcoll); 839 curcoll = savepv(setlocale(LC_COLLATE, NULL)); 840 #endif /* USE_LOCALE_COLLATE */ 841 #ifdef USE_LOCALE_NUMERIC 842 Safefree(curnum); 843 curnum = savepv(setlocale(LC_NUMERIC, NULL)); 844 #endif /* USE_LOCALE_NUMERIC */ 845 } 846 847 if (locwarn) { 848 const char * description; 849 const char * name = ""; 850 if (strEQ(trial_locales[i], "C")) { 851 description = "the standard locale"; 852 name = "C"; 853 } 854 else if (strEQ(trial_locales[i], "")) { 855 description = "the system default locale"; 856 if (system_default_locale) { 857 name = system_default_locale; 858 } 859 } 860 else { 861 description = "a fallback locale"; 862 name = trial_locales[i]; 863 } 864 if (name && strNE(name, "")) { 865 PerlIO_printf(Perl_error_log, 866 "perl: warning: %s %s (\"%s\").\n", msg, description, name); 867 } 868 else { 869 PerlIO_printf(Perl_error_log, 870 "perl: warning: %s %s.\n", msg, description); 871 } 872 } 873 } /* End of tried to fallback */ 874 875 #ifdef USE_LOCALE_CTYPE 876 new_ctype(curctype); 877 #endif /* USE_LOCALE_CTYPE */ 878 879 #ifdef USE_LOCALE_COLLATE 880 new_collate(curcoll); 881 #endif /* USE_LOCALE_COLLATE */ 882 883 #ifdef USE_LOCALE_NUMERIC 884 new_numeric(curnum); 885 #endif /* USE_LOCALE_NUMERIC */ 886 887 #if defined(USE_PERLIO) && defined(USE_LOCALE_CTYPE) 888 /* Set PL_utf8locale to TRUE if using PerlIO _and_ the current LC_CTYPE 889 * locale is UTF-8. If PL_utf8locale and PL_unicode (set by -C or by 890 * $ENV{PERL_UNICODE}) are true, perl.c:S_parse_body() will turn on the 891 * PerlIO :utf8 layer on STDIN, STDOUT, STDERR, _and_ the default open 892 * discipline. */ 893 PL_utf8locale = is_cur_LC_category_utf8(LC_CTYPE); 894 895 /* Set PL_unicode to $ENV{PERL_UNICODE} if using PerlIO. 896 This is an alternative to using the -C command line switch 897 (the -C if present will override this). */ 898 { 899 const char *p = PerlEnv_getenv("PERL_UNICODE"); 900 PL_unicode = p ? parse_unicode_opts(&p) : 0; 901 if (PL_unicode & PERL_UNICODE_UTF8CACHEASSERT_FLAG) 902 PL_utf8cache = -1; 903 } 904 #endif 905 906 #ifdef USE_LOCALE_CTYPE 907 Safefree(curctype); 908 #endif /* USE_LOCALE_CTYPE */ 909 #ifdef USE_LOCALE_COLLATE 910 Safefree(curcoll); 911 #endif /* USE_LOCALE_COLLATE */ 912 #ifdef USE_LOCALE_NUMERIC 913 Safefree(curnum); 914 #endif /* USE_LOCALE_NUMERIC */ 915 916 #endif /* USE_LOCALE */ 917 918 #ifdef __GLIBC__ 919 Safefree(language); 920 #endif 921 922 Safefree(lc_all); 923 Safefree(lang); 924 925 return ok; 926 } 927 928 929 #ifdef USE_LOCALE_COLLATE 930 931 /* 932 * mem_collxfrm() is a bit like strxfrm() but with two important 933 * differences. First, it handles embedded NULs. Second, it allocates 934 * a bit more memory than needed for the transformed data itself. 935 * The real transformed data begins at offset sizeof(collationix). 936 * Please see sv_collxfrm() to see how this is used. 937 */ 938 939 char * 940 Perl_mem_collxfrm(pTHX_ const char *s, STRLEN len, STRLEN *xlen) 941 { 942 dVAR; 943 char *xbuf; 944 STRLEN xAlloc, xin, xout; /* xalloc is a reserved word in VC */ 945 946 PERL_ARGS_ASSERT_MEM_COLLXFRM; 947 948 /* the first sizeof(collationix) bytes are used by sv_collxfrm(). */ 949 /* the +1 is for the terminating NUL. */ 950 951 xAlloc = sizeof(PL_collation_ix) + PL_collxfrm_base + (PL_collxfrm_mult * len) + 1; 952 Newx(xbuf, xAlloc, char); 953 if (! xbuf) 954 goto bad; 955 956 *(U32*)xbuf = PL_collation_ix; 957 xout = sizeof(PL_collation_ix); 958 for (xin = 0; xin < len; ) { 959 Size_t xused; 960 961 for (;;) { 962 xused = strxfrm(xbuf + xout, s + xin, xAlloc - xout); 963 if (xused >= PERL_INT_MAX) 964 goto bad; 965 if ((STRLEN)xused < xAlloc - xout) 966 break; 967 xAlloc = (2 * xAlloc) + 1; 968 Renew(xbuf, xAlloc, char); 969 if (! xbuf) 970 goto bad; 971 } 972 973 xin += strlen(s + xin) + 1; 974 xout += xused; 975 976 /* Embedded NULs are understood but silently skipped 977 * because they make no sense in locale collation. */ 978 } 979 980 xbuf[xout] = '\0'; 981 *xlen = xout - sizeof(PL_collation_ix); 982 return xbuf; 983 984 bad: 985 Safefree(xbuf); 986 *xlen = 0; 987 return NULL; 988 } 989 990 #endif /* USE_LOCALE_COLLATE */ 991 992 #ifdef USE_LOCALE 993 994 STATIC bool 995 S_is_cur_LC_category_utf8(pTHX_ int category) 996 { 997 /* Returns TRUE if the current locale for 'category' is UTF-8; FALSE 998 * otherwise. 'category' may not be LC_ALL. If the platform doesn't have 999 * nl_langinfo(), nor MB_CUR_MAX, this employs a heuristic, which hence 1000 * could give the wrong result. It errs on the side of not being a UTF-8 1001 * locale. */ 1002 1003 char *save_input_locale = NULL; 1004 STRLEN final_pos; 1005 1006 #ifdef LC_ALL 1007 assert(category != LC_ALL); 1008 #endif 1009 1010 /* First dispose of the trivial cases */ 1011 save_input_locale = setlocale(category, NULL); 1012 if (! save_input_locale) { 1013 DEBUG_L(PerlIO_printf(Perl_debug_log, 1014 "Could not find current locale for category %d\n", 1015 category)); 1016 return FALSE; /* XXX maybe should croak */ 1017 } 1018 save_input_locale = stdize_locale(savepv(save_input_locale)); 1019 if ((*save_input_locale == 'C' && save_input_locale[1] == '\0') 1020 || strEQ(save_input_locale, "POSIX")) 1021 { 1022 DEBUG_L(PerlIO_printf(Perl_debug_log, 1023 "Current locale for category %d is %s\n", 1024 category, save_input_locale)); 1025 Safefree(save_input_locale); 1026 return FALSE; 1027 } 1028 1029 #if defined(USE_LOCALE_CTYPE) \ 1030 && (defined(MB_CUR_MAX) || (defined(HAS_NL_LANGINFO) && defined(CODESET))) 1031 1032 { /* Next try nl_langinfo or MB_CUR_MAX if available */ 1033 1034 char *save_ctype_locale = NULL; 1035 bool is_utf8; 1036 1037 if (category != LC_CTYPE) { /* These work only on LC_CTYPE */ 1038 1039 /* Get the current LC_CTYPE locale */ 1040 save_ctype_locale = stdize_locale(savepv(setlocale(LC_CTYPE, NULL))); 1041 if (! save_ctype_locale) { 1042 DEBUG_L(PerlIO_printf(Perl_debug_log, 1043 "Could not find current locale for LC_CTYPE\n")); 1044 goto cant_use_nllanginfo; 1045 } 1046 1047 /* If LC_CTYPE and the desired category use the same locale, this 1048 * means that finding the value for LC_CTYPE is the same as finding 1049 * the value for the desired category. Otherwise, switch LC_CTYPE 1050 * to the desired category's locale */ 1051 if (strEQ(save_ctype_locale, save_input_locale)) { 1052 Safefree(save_ctype_locale); 1053 save_ctype_locale = NULL; 1054 } 1055 else if (! setlocale(LC_CTYPE, save_input_locale)) { 1056 DEBUG_L(PerlIO_printf(Perl_debug_log, 1057 "Could not change LC_CTYPE locale to %s\n", 1058 save_input_locale)); 1059 Safefree(save_ctype_locale); 1060 goto cant_use_nllanginfo; 1061 } 1062 } 1063 1064 DEBUG_L(PerlIO_printf(Perl_debug_log, "Current LC_CTYPE locale=%s\n", 1065 save_input_locale)); 1066 1067 /* Here the current LC_CTYPE is set to the locale of the category whose 1068 * information is desired. This means that nl_langinfo() and MB_CUR_MAX 1069 * should give the correct results */ 1070 1071 # if defined(HAS_NL_LANGINFO) && defined(CODESET) 1072 { 1073 char *codeset = savepv(nl_langinfo(CODESET)); 1074 if (codeset && strNE(codeset, "")) { 1075 1076 /* If we switched LC_CTYPE, switch back */ 1077 if (save_ctype_locale) { 1078 setlocale(LC_CTYPE, save_ctype_locale); 1079 Safefree(save_ctype_locale); 1080 } 1081 1082 is_utf8 = foldEQ(codeset, STR_WITH_LEN("UTF-8")) 1083 || foldEQ(codeset, STR_WITH_LEN("UTF8")); 1084 1085 DEBUG_L(PerlIO_printf(Perl_debug_log, 1086 "\tnllanginfo returned CODESET '%s'; ?UTF8 locale=%d\n", 1087 codeset, is_utf8)); 1088 Safefree(codeset); 1089 Safefree(save_input_locale); 1090 return is_utf8; 1091 } 1092 Safefree(codeset); 1093 } 1094 1095 # endif 1096 # ifdef MB_CUR_MAX 1097 1098 /* Here, either we don't have nl_langinfo, or it didn't return a 1099 * codeset. Try MB_CUR_MAX */ 1100 1101 /* Standard UTF-8 needs at least 4 bytes to represent the maximum 1102 * Unicode code point. Since UTF-8 is the only non-single byte 1103 * encoding we handle, we just say any such encoding is UTF-8, and if 1104 * turns out to be wrong, other things will fail */ 1105 is_utf8 = MB_CUR_MAX >= 4; 1106 1107 DEBUG_L(PerlIO_printf(Perl_debug_log, 1108 "\tMB_CUR_MAX=%d; ?UTF8 locale=%d\n", 1109 (int) MB_CUR_MAX, is_utf8)); 1110 1111 Safefree(save_input_locale); 1112 1113 # ifdef HAS_MBTOWC 1114 1115 /* ... But, most system that have MB_CUR_MAX will also have mbtowc(), 1116 * since they are both in the C99 standard. We can feed a known byte 1117 * string to the latter function, and check that it gives the expected 1118 * result */ 1119 if (is_utf8) { 1120 wchar_t wc; 1121 PERL_UNUSED_RESULT(mbtowc(&wc, NULL, 0));/* Reset any shift state */ 1122 errno = 0; 1123 if (mbtowc(&wc, HYPHEN_UTF8, strlen(HYPHEN_UTF8)) 1124 != strlen(HYPHEN_UTF8) 1125 || wc != (wchar_t) 0x2010) 1126 { 1127 is_utf8 = FALSE; 1128 DEBUG_L(PerlIO_printf(Perl_debug_log, "\thyphen=U+%x\n", wc)); 1129 DEBUG_L(PerlIO_printf(Perl_debug_log, 1130 "\treturn from mbtowc=%d; errno=%d; ?UTF8 locale=0\n", 1131 mbtowc(&wc, HYPHEN_UTF8, strlen(HYPHEN_UTF8)), errno)); 1132 } 1133 } 1134 # endif 1135 1136 /* If we switched LC_CTYPE, switch back */ 1137 if (save_ctype_locale) { 1138 setlocale(LC_CTYPE, save_ctype_locale); 1139 Safefree(save_ctype_locale); 1140 } 1141 1142 return is_utf8; 1143 # endif 1144 } 1145 1146 cant_use_nllanginfo: 1147 1148 #endif /* HAS_NL_LANGINFO etc */ 1149 1150 /* nl_langinfo not available or failed somehow. Look at the locale name to 1151 * see if it matches qr/UTF -? 8 /ix */ 1152 1153 final_pos = strlen(save_input_locale) - 1; 1154 if (final_pos >= 3) { 1155 char *name = save_input_locale; 1156 1157 /* Find next 'U' or 'u' and look from there */ 1158 while ((name += strcspn(name, "Uu") + 1) 1159 <= save_input_locale + final_pos - 2) 1160 { 1161 if (toFOLD(*(name)) != 't' 1162 || toFOLD(*(name + 1)) != 'f') 1163 { 1164 continue; 1165 } 1166 name += 2; 1167 if (*(name) == '-') { 1168 if ((name > save_input_locale + final_pos - 1)) { 1169 break; 1170 } 1171 name++; 1172 } 1173 if (*(name) == '8') { 1174 Safefree(save_input_locale); 1175 DEBUG_L(PerlIO_printf(Perl_debug_log, 1176 "Locale %s ends with UTF-8 in name\n", 1177 save_input_locale)); 1178 return TRUE; 1179 } 1180 } 1181 DEBUG_L(PerlIO_printf(Perl_debug_log, 1182 "Locale %s doesn't end with UTF-8 in name\n", 1183 save_input_locale)); 1184 } 1185 1186 #ifdef WIN32 1187 /* http://msdn.microsoft.com/en-us/library/windows/desktop/dd317756.aspx */ 1188 if (final_pos >= 4 1189 && *(save_input_locale + final_pos - 0) == '1' 1190 && *(save_input_locale + final_pos - 1) == '0' 1191 && *(save_input_locale + final_pos - 2) == '0' 1192 && *(save_input_locale + final_pos - 3) == '5' 1193 && *(save_input_locale + final_pos - 4) == '6') 1194 { 1195 DEBUG_L(PerlIO_printf(Perl_debug_log, 1196 "Locale %s ends with 10056 in name, is UTF-8 locale\n", 1197 save_input_locale)); 1198 Safefree(save_input_locale); 1199 return TRUE; 1200 } 1201 #endif 1202 1203 /* Other common encodings are the ISO 8859 series, which aren't UTF-8 */ 1204 if (instr(save_input_locale, "8859")) { 1205 DEBUG_L(PerlIO_printf(Perl_debug_log, 1206 "Locale %s has 8859 in name, not UTF-8 locale\n", 1207 save_input_locale)); 1208 Safefree(save_input_locale); 1209 return FALSE; 1210 } 1211 1212 #ifdef HAS_LOCALECONV 1213 1214 # ifdef USE_LOCALE_MONETARY 1215 1216 /* Here, there is nothing in the locale name to indicate whether the locale 1217 * is UTF-8 or not. This "name", the return of setlocale(), is actually 1218 * defined to be opaque, so we can't really rely on the absence of various 1219 * substrings in the name to indicate its UTF-8ness. Look at the locale's 1220 * currency symbol. Often that will be in the native script, and if the 1221 * symbol isn't in UTF-8, we know that the locale isn't. If it is 1222 * non-ASCII UTF-8, we infer that the locale is too. 1223 * To do this, like above for LC_CTYPE, we first set LC_MONETARY to the 1224 * locale of the desired category, if it isn't that locale already */ 1225 1226 { 1227 char *save_monetary_locale = NULL; 1228 bool illegal_utf8 = FALSE; 1229 bool only_ascii = FALSE; 1230 const struct lconv* const lc = localeconv(); 1231 1232 if (category != LC_MONETARY) { 1233 1234 save_monetary_locale = stdize_locale(savepv(setlocale(LC_MONETARY, 1235 NULL))); 1236 if (! save_monetary_locale) { 1237 DEBUG_L(PerlIO_printf(Perl_debug_log, 1238 "Could not find current locale for LC_MONETARY\n")); 1239 goto cant_use_monetary; 1240 } 1241 1242 if (strNE(save_monetary_locale, save_input_locale)) { 1243 if (! setlocale(LC_MONETARY, save_input_locale)) { 1244 DEBUG_L(PerlIO_printf(Perl_debug_log, 1245 "Could not change LC_MONETARY locale to %s\n", 1246 save_input_locale)); 1247 Safefree(save_monetary_locale); 1248 goto cant_use_monetary; 1249 } 1250 } 1251 } 1252 1253 /* Here the current LC_MONETARY is set to the locale of the category 1254 * whose information is desired. */ 1255 1256 if (lc && lc->currency_symbol) { 1257 if (! is_utf8_string((U8 *) lc->currency_symbol, 0)) { 1258 DEBUG_L(PerlIO_printf(Perl_debug_log, 1259 "Currency symbol for %s is not legal UTF-8\n", 1260 save_input_locale)); 1261 illegal_utf8 = TRUE; 1262 } 1263 else if (is_ascii_string((U8 *) lc->currency_symbol, 0)) { 1264 DEBUG_L(PerlIO_printf(Perl_debug_log, "Currency symbol for %s contains only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale)); 1265 only_ascii = TRUE; 1266 } 1267 } 1268 1269 /* If we changed it, restore LC_MONETARY to its original locale */ 1270 if (save_monetary_locale) { 1271 setlocale(LC_MONETARY, save_monetary_locale); 1272 Safefree(save_monetary_locale); 1273 } 1274 1275 Safefree(save_input_locale); 1276 1277 /* It isn't a UTF-8 locale if the symbol is not legal UTF-8; otherwise 1278 * assume the locale is UTF-8 if and only if the symbol is non-ascii 1279 * UTF-8. (We can't really tell if the locale is UTF-8 or not if the 1280 * symbol is just a '$', so we err on the side of it not being UTF-8) 1281 * */ 1282 DEBUG_L(PerlIO_printf(Perl_debug_log, "\tis_utf8=%d\n", (illegal_utf8) 1283 ? FALSE 1284 : ! only_ascii)); 1285 return (illegal_utf8) 1286 ? FALSE 1287 : ! only_ascii; 1288 1289 } 1290 cant_use_monetary: 1291 1292 # endif /* USE_LOCALE_MONETARY */ 1293 #endif /* HAS_LOCALECONV */ 1294 1295 #if 0 && defined(HAS_STRERROR) && defined(USE_LOCALE_MESSAGES) 1296 1297 /* This code is ifdefd out because it was found to not be necessary in testing 1298 * on our dromedary test machine, which has over 700 locales. There, looking 1299 * at just the currency symbol gave essentially the same results as doing this 1300 * extra work. Executing this also caused segfaults in miniperl. I left it in 1301 * so as to avoid rewriting it if real-world experience indicates that 1302 * dromedary is an outlier. Essentially, instead of returning abpve if we 1303 * haven't found illegal utf8, we continue on and examine all the strerror() 1304 * messages on the platform for utf8ness. If all are ASCII, we still don't 1305 * know the answer; but otherwise we have a pretty good indication of the 1306 * utf8ness. The reason this doesn't necessarily help much is that the 1307 * messages may not have been translated into the locale. The currency symbol 1308 * is much more likely to have been translated. The code below would need to 1309 * be altered somewhat to just be a continuation of testing the currency 1310 * symbol. */ 1311 int e; 1312 unsigned int failures = 0, non_ascii = 0; 1313 char *save_messages_locale = NULL; 1314 1315 /* Like above for LC_CTYPE, we set LC_MESSAGES to the locale of the 1316 * desired category, if it isn't that locale already */ 1317 1318 if (category != LC_MESSAGES) { 1319 1320 save_messages_locale = stdize_locale(savepv(setlocale(LC_MESSAGES, 1321 NULL))); 1322 if (! save_messages_locale) { 1323 goto cant_use_messages; 1324 } 1325 1326 if (strEQ(save_messages_locale, save_input_locale)) { 1327 Safefree(save_input_locale); 1328 } 1329 else if (! setlocale(LC_MESSAGES, save_input_locale)) { 1330 Safefree(save_messages_locale); 1331 goto cant_use_messages; 1332 } 1333 } 1334 1335 /* Here the current LC_MESSAGES is set to the locale of the category 1336 * whose information is desired. Look through all the messages */ 1337 1338 for (e = 0; 1339 #ifdef HAS_SYS_ERRLIST 1340 e <= sys_nerr 1341 #endif 1342 ; e++) 1343 { 1344 const U8* const errmsg = (U8 *) Strerror(e) ; 1345 if (!errmsg) 1346 break; 1347 if (! is_utf8_string(errmsg, 0)) { 1348 failures++; 1349 break; 1350 } 1351 else if (! is_ascii_string(errmsg, 0)) { 1352 non_ascii++; 1353 } 1354 } 1355 1356 /* And, if we changed it, restore LC_MESSAGES to its original locale */ 1357 if (save_messages_locale) { 1358 setlocale(LC_MESSAGES, save_messages_locale); 1359 Safefree(save_messages_locale); 1360 } 1361 1362 /* Any non-UTF-8 message means not a UTF-8 locale; if all are valid, 1363 * any non-ascii means it is one; otherwise we assume it isn't */ 1364 return (failures) ? FALSE : non_ascii; 1365 1366 } 1367 cant_use_messages: 1368 1369 #endif 1370 1371 DEBUG_L(PerlIO_printf(Perl_debug_log, 1372 "Assuming locale %s is not a UTF-8 locale\n", 1373 save_input_locale)); 1374 Safefree(save_input_locale); 1375 return FALSE; 1376 } 1377 1378 #endif 1379 1380 /* 1381 * Local variables: 1382 * c-indentation-style: bsd 1383 * c-basic-offset: 4 1384 * indent-tabs-mode: nil 1385 * End: 1386 * 1387 * ex: set ts=8 sts=4 sw=4 et: 1388 */ 1389