Lines Matching defs:locale

1 /*    locale.c
23 /* utility functions for handling locale-specific stuff like what
26 * All C programs have an underlying locale. Perl code generally doesn't pay
27 * any attention to it except within the scope of a 'use locale'. For most
31 * are used to toggle between the current locale and the C locale depending on
33 * switched to the C locale for outputting the message unless within the scope
34 * of 'use locale'.
37 * regard to locale handling. At the end of these introductory comments, are
43 * This code now has multi-thread-safe locale handling on systems that support
46 * involve a lot of locale switching, and would require XS code changes.
59 * vagaries of the different locale implementations out there.
63 * systems where the same API, after set up, is used for thread-safe locale
71 * in the C locale, except during those relatively rare times when it needs to
72 * be in the underlying locale. There is a bunch of code to accomplish this,
85 * always for unthreaded perls, and when the API for safe locale threading
93 * where perl is to ignore some locale categories that the libc
105 * 3a and 3b) An implementation of POSIX 2008 thread-safe locale handling,
128 * its locale information before the first fork, and be stable thereafter. But
129 * perl toggles LC_NUMERIC if the locale's radix character isn't a dot, as do
135 * _c Means the argument is a locale category number known at compile time.
138 * _r Means the argument is a locale category number whose value might not be
140 * _i Means the argument is our internal index of a locale category
145 * implementation, with the category and new locale. The input locale is
148 * querying the locale, so the input locale must not be NULL.
150 * This macro is suitable for toggling the locale back and forth during an
154 * here will toggle LC_CTYPE into the locale of LC_TIME temporarily to
157 * Several categories require extra work when their locale is changed.
161 * LC_CTYPE locale gets toggled during an operation, and will be toggled
184 * This returns a string that specifies the current locale for the given
188 * across calls, or long term. This returns the actual current locale,
190 * supposed to be a locale whose decimal radix character is a comma. As
193 * querylocale_X() returns the locale that libc has stored at this moment,
194 * so most of the time will return a locale whose radix character is a
196 * locale that an external caller would expect, for all categories except
213 * locale. Thus it combines a bool_setlocale_X() with a querylocale_X(). It
223 * This compiles perl to always use the C locale, ignoring any
225 * crippled locale implementation.
234 * Even if the libc locale operations specified by the Posix 2008
242 * Normally, setlocale() is used for locale operations on perls
243 * compiled without threads. This option causes the locale operations
251 * querylocale() is called only while the locale mutex is locked, and
268 * always keep the named category(ies) in the C locale.
285 * If LC_TIME has been set to a Chinese locale, strftime() can be used
296 * skip its syncing LC_CTYPE and whatever the other locale is.
310 * compile and exercise much of the locale-related code that instead
391 # define start_DEALING_WITH_MISMATCHED_CTYPE(locale) \
392 const char * orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, locale)
393 # define end_DEALING_WITH_MISMATCHED_CTYPE(locale) \
396 # define start_DEALING_WITH_MISMATCHED_CTYPE(locale)
397 # define end_DEALING_WITH_MISMATCHED_CTYPE(locale)
403 * to get a semblance of pretending the locale handling is that of a MingW
406 * and PERL_MEMLOG. This is thus a way to see if locale.c on Windows is
443 /* Windows uses a setlocale that takes a wchar_t* locale. Other boxes
521 S_positional_name_value_xlation(const char * locale, bool direction)
524 assert(locale);
530 switch (parse_LC_ALL_string(locale,
543 return locale;
567 S_positional_setlocale(int cat, const char * locale)
569 if (cat != LC_ALL) return setlocale(cat, locale);
571 if (locale && strNE(locale, "")) {
572 locale = S_positional_name_value_xlation(locale, 0);
573 if (! locale) return NULL;
576 locale = setlocale(cat, locale);
577 if (locale == NULL) return NULL;
578 return S_positional_name_value_xlation(locale, 1);
586 S_positional_newlocale(int mask, const char * locale, locale_t base)
588 assert(locale);
590 if (mask != LC_ALL_MASK) return newlocale(mask, locale, base);
592 if (strNE(locale, "")) locale = S_positional_name_value_xlation(locale, 0);
593 if (locale == NULL) return NULL;
594 return newlocale(LC_ALL_MASK, locale, base);
633 /* Not all categories need be set to the same locale. This macro determines if
651 * C locale. This would be done where the implementation on a platform is
653 * not implemented LC_COLLATE beyond the C locale. The 'category_available[]'
655 * in C. This macro substitutes C for the locale appropriately, expanding to
687 /* Default values come from the C locale */
704 * locale. However, the output of setlocale() is documented to be opaque, but
706 * other locale. Note that VMS includes many non-ASCII characters in these two
726 #define langinfo_sv_c(item, category, locale, sv, utf8ness) \
727 langinfo_sv_i(item, category##_INDEX_, locale, sv, utf8ness)
732 #define langinfo_i(item, index, locale, utf8ness) \
733 langinfo_sv_i(item, index, locale, PL_scratch_langinfo, utf8ness)
735 #define langinfo_c(item, category, locale, utf8ness) \
736 langinfo_i(item, category##_INDEX_, locale, utf8ness)
739 # define toggle_locale_i(index, locale) \
740 ((const char *) (PERL_UNUSED_VAR(locale), NULL))
741 # define restore_toggled_locale_i(index, locale) PERL_UNUSED_VAR(locale)
743 # define toggle_locale_i(index, locale) \
744 S_toggle_locale_i(aTHX_ index, locale, __LINE__)
745 # define restore_toggled_locale_i(index, locale) \
746 S_restore_toggled_locale_i(aTHX_ index, locale, __LINE__)
749 # define toggle_locale_c(cat, locale) toggle_locale_i(cat##_INDEX_, locale)
750 # define restore_toggled_locale_c(cat, locale) \
751 restore_toggled_locale_i(cat##_INDEX_, locale)
754 # define setlocale_debug_string_i(index, locale, result) \
755 my_setlocale_debug_string_i(index, locale, result, __LINE__)
756 # define setlocale_debug_string_c(category, locale, result) \
757 setlocale_debug_string_i(category##_INDEX_, locale, result)
758 # define setlocale_debug_string_r(category, locale, result) \
760 locale, result)
768 /* Below are parallel arrays for locale information indexed by our mapping of
787 /* The first array is the locale categories perl uses on this system, used to
866 * the C locale */
901 * an individual locale, hence marks the elements here as not actually
1014 locale_panic_via_(Perl_form(aTHX_ "Unknown locale category %d", category),
1024 /* Remove any locale mutex, in preperation for an inglorious termination,
1118 /* Macros to report and croak on an unexpected failure to set the locale. The
1132 * locale on this system, or if 'action is 'no_override'. Otherwise it expands
1169 * to be the representation of an LC_ALL locale, and splits the result into
1174 * The locale for each category is independent of the other categories.
1176 * usually keeps LC_NUMERIC in the C locale, regardless of the underlying
1177 * locale. LC_ALL has to be able to represent the case of when not all
1178 * categories have the same locale. Platforms have differing ways of
1185 * Often, all categories will have the same locale. This is special cased
1204 * Otherwise, output[] will be filled with the individual locale names for
1210 * perl may be configured to ignore changes to a category's locale to
1310 /* Parse the input locale string */
1330 * 'category=locale' form */
1365 /* The locale name starts just beyond the '=' */
1381 * category, and 's' points to the beginning of the locale name for
1426 * categories have the same locale. Hence, if it is still 'false', all of
1456 msg = "doesn't list every locale category";
1499 # define base_posix_setlocale_(cat, locale) win32_setlocale(cat, locale)
1501 # define base_posix_setlocale_(cat, locale) \
1502 ((const char *) setlocale(cat, locale))
1512 # define posix_setlocale(cat, locale) base_posix_setlocale_(cat, locale)
1514 # define posix_setlocale(cat, locale) \
1515 S_posix_setlocale_with_complications(aTHX_ cat, locale, __LINE__)
1526 /* Querying the current locale returns the real value */
1589 * only for the relatively rare case of the desired locale being
1618 * locale which needs to be restored */
1656 # define stdized_setlocale(cat, locale) posix_setlocale(cat, locale)
1657 # define stdize_locale(cat, locale) (locale)
1659 # define stdized_setlocale(cat, locale) \
1660 S_stdize_locale(aTHX_ cat, posix_setlocale(cat, locale), __LINE__)
1702 NULL, /* query each individ locale */
1731 /* And parse the locale string, splitting into its individual
1753 /* All categories here are set to the same locale, and the parse
1755 * that locale. */
1841 * 1) bool_setlocale_X attempts to set the given category's locale to the
1846 * 3) querylocale_X to see what the given category's locale is
1885 # define bool_setlocale_r(cat, locale) cBOOL(posix_setlocale(cat, locale))
1892 # define setlocale_i(i, locale) stdized_setlocale(categories[i], locale)
1901 # define setlocale_i(i, locale) S_setlocale_i(aTHX_ categories[i], locale)
1904 S_setlocale_i(pTHX_ const int category, const char * locale)
1907 return stdized_setlocale(category, locale);
1911 const char * retval = save_to_buffer(stdized_setlocale(category, locale),
1935 S_less_dicey_setlocale_r(pTHX_ const int category, const char * locale)
1943 retval = save_to_buffer(stdized_setlocale(category, locale),
1954 # define bool_setlocale_r(cat, locale) \
1955 less_dicey_bool_setlocale_r(cat, locale)
1958 S_less_dicey_bool_setlocale_r(pTHX_ const int cat, const char * locale)
1967 retval = cBOOL(posix_setlocale(cat, locale));
1976 * suffices for both querying and setting the locale. It allows for some
1978 # define setlocale_i(i, locale) less_dicey_setlocale_r(categories[i], locale)
1980 /* The code in this file may change the locale briefly during certain
2019 /* This function returns the name of the locale category given by the input
2023 * the category name of a locale, disregarding a basic linguistic tenet
2027 * systems without querylocale(), we have to keep track of what the locale
2031 * 1) We don't know what calling newlocale() with the locale argument ""
2035 * 2) It's possible for C code in some library to change the locale
2039 * setlocale(), but that changes the global locale, and threads using
2041 * 3) Many systems have multiple names for the same locale. Generally,
2043 * On some systems, if you set the locale to an alias, and then
2049 * The code is structured so that the returned locale name when the
2050 * locale is changed is whatever the result of querylocale() on the
2051 * new locale is. This effectively gives the result the system
2056 * global locale temporarily, using setlocale() to get the base name;
2072 * work in the global locale, and we have the means to get the correct
2080 /* Here we have handled the case of the current locale being the global
2101 /* If the current locale object is the C object, then the answer is
2120 * of the current locale not being the global one on platforms where
2172 * the case of the current locale not being the global one on systems that
2195 /* querylocale() may conflate the C locale with something that
2198 * We know that if the locale object is the C one, we
2199 * are in the C locale, which may go by the name POSIX, as both, by
2230 # define bool_setlocale_i(i, locale) \
2231 bool_setlocale_2008_i(i, locale, __LINE__)
2243 const char * new_locale, /* The locale to set the category to */
2251 * locale functions to emulate the behavior of setlocale(). Similar to
2257 * By doing this, most locale-sensitive functions become thread-safe. The
2267 " new locale=\"%s\", current locale=\"%s\","
2274 /* Here, trying to change the locale, but it is a no-op if the new boss is
2276 * from the global locale, so in that case we will create a per-thread
2277 * locale below (with the current values). It also seemed that newlocale()
2278 * could free up the basis locale memory if we called it with the new and
2295 * happens with setting a locale to "" */
2349 /* For this bug, if the LC_MESSAGES locale changes, we have to do an
2366 * the C library's discretion), hence we can't be using that locale at the
2368 * switch to a known locale object that we don't otherwise mess with. */
2371 /* Not being able to change to the C locale is severe; don't keep
2385 * PL_C_locale_obj because newlocale() generally destroys its locale
2394 /* PL_C_locale_obj is LC_ALL set to the C locale. If this call is to
2415 * create the changed locale, trashing it iff successful.
2438 # define DEBUG_NEW_OBJECT_CREATED(category, locale, new, old, caller_line) \
2443 category, locale, new, old, \
2445 # define DEBUG_NEW_OBJECT_FAILED(category, locale, basis_obj) \
2450 category, locale, basis_obj, \
2453 /* Ready to create a new locale by modification of the existing one.
2482 * can't switch to C or the global locale, so maybe should
2501 /* Loop through the individual categories, setting the locale of
2509 * locale to what it was at the time this function was called
2513 * next one. (The first time we effectively use the locale in
2528 /* Failed. Likely this is because the proposed new locale
2567 /* Here, successfully created an object representing the desired locale;
2573 " into new locale failed",
2599 /* Invalidate the glibc cache of loaded translations if the locale has
2614 /* We earlier switched to the LC_ALL => C locale in anticipation of it
2658 # define void_setlocale_r_with_caller(cat, locale, file, line) \
2660 if (! bool_setlocale_r(cat, locale)) \
2662 NULL, locale, __LINE__, 0, \
2666 # define void_setlocale_c_with_caller(cat, locale, file, line) \
2667 void_setlocale_r_with_caller(cat, locale, file, line)
2669 # define void_setlocale_i_with_caller(i, locale, file, line) \
2670 void_setlocale_r_with_caller(categories[i], locale, file, line)
2672 # define void_setlocale_r(cat, locale) \
2673 void_setlocale_r_with_caller(cat, locale, __FILE__, __LINE__)
2674 # define void_setlocale_c(cat, locale) \
2675 void_setlocale_r(cat, locale)
2676 # define void_setlocale_i(i, locale) \
2677 void_setlocale_r(categories[i], locale)
2683 # define void_setlocale_i_with_caller(i, locale, file, line) \
2685 if (! bool_setlocale_i(i, locale)) \
2686 setlocale_failure_panic_via_i(i, NULL, locale, __LINE__, 0, \
2690 # define void_setlocale_r_with_caller(cat, locale, file, line) \
2691 void_setlocale_i_with_caller(get_category_index(cat), locale, \
2694 # define void_setlocale_c_with_caller(cat, locale, file, line) \
2695 void_setlocale_i_with_caller(cat##_INDEX_, locale, file, line)
2697 # define void_setlocale_i(i, locale) \
2698 void_setlocale_i_with_caller(i, locale, __FILE__, __LINE__)
2699 # define void_setlocale_c(cat, locale) \
2700 void_setlocale_i(cat##_INDEX_, locale)
2701 # define void_setlocale_r(cat, locale) \
2702 void_setlocale_i(get_category_index(cat), locale)
2710 /* Most of the cases in this file just toggle the locale briefly; but there are
2721 * otherwise these toggle to the over-arching locale. When the individual
2722 * toggles are executed, they will check and find that the locale is already in
2725 # define toggle_locale_c_unless_locking(cat, locale) NULL
2726 # define toggle_locale_c_if_locking( cat, locale) \
2727 toggle_locale_i(cat##_INDEX_, locale)
2729 # define restore_toggled_locale_c_unless_locking(cat, locale) \
2730 PERL_UNUSED_ARG(locale)
2731 # define restore_toggled_locale_c_if_locking( cat, locale) \
2732 restore_toggled_locale_i( cat##_INDEX_, locale)
2734 # define toggle_locale_c_unless_locking(cat, locale) \
2735 toggle_locale_i(cat##_INDEX_, locale)
2736 # define toggle_locale_c_if_locking( cat, locale) NULL
2738 # define restore_toggled_locale_c_unless_locking(cat, locale) \
2739 restore_toggled_locale_i(cat##_INDEX_, locale)
2740 # define restore_toggled_locale_c_if_locking( cat, locale) \
2741 PERL_UNUSED_ARG(locale)
2744 /* query_nominal_locale_i() is used when the caller needs the locale that an
2825 * substitute the nominal locale for LC_NUMERIC when returning a value for
2849 * This function calculates a string that defines the locale(s) LC_ALL is
2856 * a) EXTERNAL_FORMAT_FOR_SET returns the actual locale currently in
2858 * b) EXTERNAL_FORMAT_FOR_QUERY returns the nominal locale.
2859 * Currently this can differ only from the actual locale in the
2860 * LC_NUMERIC category when it is set to a locale whose radix is
2861 * not a dot. (The actual locale is kept as a dot to accommodate
2894 * If all individual categories are the same locale, we can just set LC_ALL
2895 * to that locale. But if not, we have to create an aggregation of all the
2913 * locale should be set to LANG or "C". So there is no good solution. khw
2949 /* While we are calculating LC_ALL, we see if every category's locale is
3014 * total strlen()s of the locale name of each individual category. */
3027 /* If all categories have the same locale, we already know the answer */
3053 else { /* Here, not all categories have the same locale */
3115 /* "name=locale;" */
3173 /* NB: This function may actually change the locale on Windows. It
3174 * currently is designed to be called only from setting the locale on
3177 * This function returns the locale specified by the program's environment
3182 * locale; merely returns it.
3204 * than use "" as the locale. This will lead to results that differ
3214 * section, to save the global locale's current value, and do a
3216 * values, destroying the global locale's, which we would then
3218 * using the global locale and isn't using the mutex. And, the only
3227 * in POSIX, and is the system default locale in Windows. To get that
3316 " locale=%s, locale of 0th category=%s, disparate=%d\n",
3356 * category determined by 'cat_index', and the locale that was in effect
3392 * locale is UTF-8, so just dumps bytes. Actually figuring it out can be
3395 "Can't change locale for %s (%d) from '%s' to '%s'"
3420 * new locale, and we are switched into it. It installs this locale as the
3421 * current underlying default, and then switches to the C locale, if
3425 * The default locale and the C locale can be toggled between by use of the
3435 * PL_numeric_name The underlying locale's name: a copy of 'newnum'
3437 * such that the current locale is the program's
3438 * underlying locale
3440 * that the current locale is the C locale or
3441 * indistinguishable from the C locale. If non-zero, it
3445 * indicating that the underlying locale and the standard
3446 * C locale are indistinguishable for the purposes of
3453 * program's underlying locale's radix character string,
3455 * PL_underlying_radix_sv Contains the program's underlying locale's
3466 * of the current locale vs the standard C locale. If the new locale that
3472 * assume that that library changed the locale in unknown ways.)
3474 * Even if our records are valid, the new locale will likely have been
3476 * one indistinguishable from the C locale with regards to LC_NUMERIC
3479 * locale is indistinguishable from the C locale. */
3505 /* We are in the underlying locale until changed at the end of this
3513 /* Find and save this locale's radix character. */
3521 /* This locale is indistinguishable from C (for numeric purposes) if both
3531 * same as C's is the locale indistinguishable from C.
3557 /* Keep LC_NUMERIC so that it has the C locale radix and thousands
3560 * locale change to it temporarily). */
3577 /* Unconditionally toggle the LC_NUMERIC locale to the C locale
3582 * wrong if some XS code has changed the locale behind our back) */
3584 DEBUG_L(PerlIO_printf(Perl_debug_log, "Setting LC_NUMERIC locale to"
3608 /* Unconditionally toggle the LC_NUMERIC locale to the current underlying
3614 * wrong if some XS code has changed the locale behind our back) */
3616 DEBUG_L(PerlIO_printf(Perl_debug_log, "Setting LC_NUMERIC locale to %s;"
3640 * core Perl this and that 'newctype' is the name of the new locale.
3654 /* We will replace any bad locale warning with
3656 * 2) a new warning for the bad new locale */
3668 /* For the C locale, just use the standard folds, and we know there are no
3691 /* A UTF-8 locale gets standard rules. But note that code still has to
3716 else { /* Not a canned locale we know the values for. Compute them */
3749 * locale */
3816 * locale requires more than one byte, there are going to be BIG problems.
3823 /* Some platforms return MB_CUR_MAX > 1 for even the "C" locale.
3853 /* We don't populate the other lists if a UTF-8 locale, but do check that
3865 /* If checking for locale problems, see if the native ASCII-range
3867 * the new locale. If not, this could mean big trouble, upending
4039 /* If we are actually in the scope of the locale or are debugging,
4041 * message to be output at the first operation using this locale,
4084 * called just after a change, so uses the actual underlying locale just
4127 * core Perl this and that 'newcoll' is the name of the new locale.
4129 * The design of locale collation is that every locale change is given an
4131 * operation that requires collation while locale collation is active, it
4136 * cleared. The next time the locale changes, the index is incremented,
4138 * necessarily still valid, and so is recomputed. Note that if the locale
4144 /* Return if the locale isn't changing */
4233 S_wrap_wsetlocale(pTHX_ const int category, const char *locale)
4245 if (locale) {
4246 wlocale = Win_utf8_string_to_wstring(locale);
4269 S_win32_setlocale(pTHX_ int category, const char* locale)
4272 * difference between the two unless the input locale is "", which normally
4275 * In POSIX, it instead means to find the locale from the user's
4281 if (locale != NULL && strEQ(locale, "")) {
4282 /* Note this function may change the locale, but that's ok because we
4284 locale = find_locale_from_environment(get_category_index(category));
4285 if (locale == NULL) {
4291 const char * result = wrap_wsetlocale(category, locale);
4293 setlocale_debug_string_r(category, locale, result)));
4309 * the input locale is NULL, we were just querying, so the original value
4311 if (locale == NULL) {
4339 /* Determine the current locale and return it in the form the platform's
4340 * native locale handling understands. This is different only from our
4350 * the C locale (or equivalent) for it. */
4388 locale using a querylocale function */
4406 returns the correct underlying C<LC_NUMERIC> locale. Regular C<setlocale> will
4407 instead return C<C> if the underlying locale has a non-dot decimal point
4409 numbers. This is because perl keeps that locale category such that it has a
4410 dot and empty separator, changing the locale briefly during the operations
4424 Changing the locale is not a good idea when more than one thread is running,
4426 non-zero. This is because on such systems the locale is global to the whole
4430 failing to actually change the locale. z/OS refuses to try to change the
4431 locale once a second thread is created. C<Perl_setlocale>, should give you
4433 returning NULL if the system forbade the locale change.
4443 Perl_setlocale(const int category, const char * locale)
4450 PERL_UNUSED_ARG(locale);
4460 category, locale));
4469 if (locale == NULL) {
4471 locale = "";
4477 /* diag_listed_as: Unknown locale category %d; can't set it to %s */
4480 "Unknown locale category %d%s%s",
4481 category, conditional_warn_text, locale);
4494 return setlocale_i(cat_index, locale);
4499 /* Get current locale */
4502 /* A NULL locale means only query what the current one is. */
4503 if (locale == NULL) {
4507 if (strEQ(current_locale, locale)) {
4509 "Already in requested locale: no action taken\n"));
4514 if (! bool_setlocale_i(cat_index, locale)) {
4516 setlocale_debug_string_i(cat_index, locale, "NULL")));
4520 /* At this point, the locale has been changed based on the requested value,
4523 * may have returned a synonymous locale name instead of the input one; or,
4524 * if there are locale categories that we are compiled to ignore, any
4530 * to switch into a locale that has a dot radix. */
4536 * the locale, so has accurate
4559 const char* locale, /* Optional locale name */
4562 * to set 'category' to 'locale' */
4575 if (locale == NULL) {
4577 locale = "NULL";
4605 locale_quote, locale, locale_quote,
4624 /* Changes the locale for the category specified by 'index' to 'new_locale,
4628 * Returns a copy of the name of the original locale for 'cat_index'
4632 /* Find the original locale of the category we may need to change, so that
4645 "Could not find current %s locale",
4652 * changing the locale out from under us. */
4657 DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s locale unchanged as %s\n",
4663 /* Finally, change the locale to the new one */
4667 "%s locale switched to %s\n",
4683 /* Restores the locale for LC_category corresponding to cat_index to
4699 "restore_toggled_locale_i: restoring locale for" \
4720 const char * locale,
4730 PERL_UNUSED_ARG(locale);
4735 /* Return to indicate if 'string' in the locale given by the input
4738 * If the input 'locale' is not NULL, use that for the locale; otherwise
4739 * use the current locale for the category specified by 'cat_index'.
4743 "Entering get_locale_string_utf8ness_i; locale=%s,"
4745 locale, cat_index, category_names[cat_index],
4779 * If we already know the UTF-8ness of the locale, then we immediately know
4785 if (locale == NULL) {
4786 locale = querylocale_i(cat_index);
4789 /* If the locale is UTF-8, the string is UTF-8; otherwise it was
4793 * being passed in, you might think that that locale is essentially always
4794 * the C locale, so it would make sense to say it isn't UTF-8. But to get
4795 * here, the string has to contain characters unknown in the C locale. And
4797 * message catalog isn't really a part of the locale system. But those
4801 return (is_locale_utf8(locale)) ? UTF8NESS_YES : UTF8NESS_NO;
4808 S_is_locale_utf8(pTHX_ const char * locale)
4812 /* Returns TRUE if the locale 'locale' is UTF-8; FALSE otherwise. */
4818 PERL_UNUSED_ARG(locale);
4826 /* If the input happens to be the same locale as we are currently setup
4828 if (strEQ(locale, PL_ctype_name)) {
4832 if (isNAME_C_OR_POSIX(locale)) {
4839 * the answer. First, toggle to the desired locale so can query its state
4841 const char * orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, locale);
4848 /* If there are fewer bytes available in this locale than are required
4850 * locale. */
4860 /* With these functions, we can definitively determine a locale's
4865 * as that Unicode code point, this has to be a UTF-8 locale; otherwise it
4891 * get the locale's codeset, which will be some form of 'UTF-8' for a
4892 * UTF-8 locale. langinfo_c() handles this, and we will call that
4899 /* If the above compiled into code, it found the locale's UTF-8ness,
4911 const char * codeset = langinfo_c(CODESET, LC_CTYPE, locale, NULL);
4917 locale, retval));
4966 /* We don't know the locale utf8ness here, and not even the locale itself.
4968 * output than the locale system, it is going to be problematic deciding
5061 * locale as whatever is being worked on */
5199 * localeconv() returns items from two different locale categories,
5332 /* If both NUMERIC and MONETARY must be the "C" locale, simply populate the
5333 * hash using the function that works on just that locale. */
5359 * locale to match LC_NUMERIC's for the numeric fields, and LC_MONETARY's
5362 * twice, once for each locale, setting LC_CTYPE to match the category.
5377 /* This gives the locale to use for the corresponding OFFSET, like the
5415 * locale is "C", or set up the appropriate parameters for the call below
5418 const char *locale;
5447 locale = PL_numeric_name;
5457 locale = querylocale_c(LC_MONETARY);
5458 if (isNAME_C_OR_POSIX(locale)) {
5478 locales[MONETARY_OFFSET] = locales[NUMERIC_OFFSET] = locale;
5551 * string item is UTF-8 or not. This is because the locale's UTF-8ness is
5619 const char * locale, /* Unused */
5634 PERL_UNUSED_ARG(locale);
5635 assert(isNAME_C_OR_POSIX(locale));
5638 * the C locale */
5683 /* Add any int fields. In the C locale, all are -1 */
5703 /* Switch to this locale to run
5705 const char * locale,
5746 * locale that LC_NUMERIC and LC_MONETARY will be toggled to
5747 * b) On calls that process LC_NUMERIC, toggle to the desired locale
5749 * locale
5764 start_DEALING_WITH_MISMATCHED_CTYPE(locale);
5765 # define CTYPE_TEARDOWN end_DEALING_WITH_MISMATCHED_CTYPE(locale)
5772 /* We need to toggle the NUMERIC locale to the desired one if we are
5782 * to make sure that the locale we want is set after LC_CTYPE. We
5783 * unconditionally toggle away from and back to the current locale
5786 (void) toggle_locale_c(LC_NUMERIC, locale);
5799 orig_NUMERIC_locale = toggle_locale_c(LC_NUMERIC, locale);
5826 (void) toggle_locale_c(LC_MONETARY, locale);
5839 orig_MONETARY_locale = toggle_locale_c(LC_MONETARY, locale);
5854 /* Here, have toggled to the correct locale.
5874 * locale instead of the thread one. As a work-around, we toggle to the
5875 * global locale; populate the return; then toggle back. We have to use
5880 * converted to use the global locale, and doesn't protect its locale calls
5882 * Windows, as the locale API is the same regardless of thread-safety,
5885 * some alien-to-Perl library that thinks it owns locale setting. Such a
5891 /* Save the per-thread locale state */
5894 /* Change to the global locale, and note if we already were there */
5904 /* Save the state of the global locale; then convert to our desired
5911 /* Restore the global locale's prior state */ \
5921 /* Restore the per-thread locale state */ \
5986 locale,
6039 C<nl_langinfo()>, and hides the quirks of Perl's locale handling from your
6070 because these are from the C<LC_NUMERIC> locale category, which is normally
6072 string, no matter what the underlying locale is supposed to be, and so to get
6074 locale, and later toggle back. (You could use plain C<nl_langinfo> and
6077 (or equivalent) locale would break a lot of CPAN, which is expecting the radix
6084 C<setlocale>, or other locale change. The returned buffer of these functions
6105 associated with the C locale, or simply C<"">, whichever is more appropriate.
6112 that are controlled by the LC_NUMERIC locale category. Instead, avoid all of
6147 * locale), and so we define this macro to transparently hide the absence of
6193 /* Find the locale category that controls the input 'item', and call
6321 * is confined to the "C" locale on this platform, which the callee also
6332 * calculated. We need to find the current corresponding locale, and pass
6346 /* The locale category that controls it */
6349 /* The locale to look up 'item' in. */
6350 const char * locale,
6365 * nl_langinfo() isn't available for the desired locale, or is completely
6423 "Entering langinfo_sv_i item=%jd, using locale %s\n",
6424 (PERL_INTMAX_T) item, locale));
6429 return emulate_langinfo(item, locale, sv, utf8ness);
6436 * locale object with newlocale() and freeing it afterwards. But doing so
6440 start_DEALING_WITH_MISMATCHED_CTYPE(locale);
6442 const char * orig_switched_locale = toggle_locale_i(cat_index, locale);
6464 * toggled locale categories. These could easily end up
6512 /* Some glibc's return random values for this item and locale;
6514 if (isNAME_C_OR_POSIX(locale)) {
6626 " representation '%s' for locale '%s'",
6629 locale));
6685 end_DEALING_WITH_MISMATCHED_CTYPE(locale);
6691 locale, cat_index);
6715 const char * locale,
6722 const char * locale,
6727 PERL_UNUSED_ARG(locale); /* Too complicated to specify which
6734 * supposed to always stay in the C locale for them. This function
6735 * has hard-coded in the results for those for the C locale.
6747 * (and other similar Windows API functions) use what MS calls "locale
6748 * names", whereas the C functions use what MS calls "locale strings". The
6749 * locale string "English_United_States.1252" is equivalent to the locale
6780 "Entering emulate_langinfo item=%jd, using locale %s\n",
6781 item, locale));
6821 /* The locale's currency symbol may be empty. But if not, the return
6872 const char * orig_switched_locale = toggle_locale_c(LC_NUMERIC, locale);
6875 * the locale */
6973 locale,
6995 if (isNAME_C_OR_POSIX(locale)) {
7001 if (PL_in_utf8_CTYPE_locale && strEQ(locale, PL_ctype_name)) {
7019 orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, locale);
7029 DEBUG_Lv(PerlIO_printf(Perl_debug_log, "locale='%s' cp=%s\n",
7030 locale, SvPVX(sv)));
7038 * UTF-8 locale or not. If it is UTF-8, we (correctly) use that for
7043 if (is_locale_utf8(locale)) {
7051 * could think of is to see if the codeset is part of the locale name.
7061 name = strchr(locale, '.');
7086 /* Here, 'retval' contains any codeset name derived from the locale
7103 if (S_maybe_override_codeset(aTHX_ retval, locale, &retval)) {
7116 * locale values. khw fairly arbitrarily decided which of its non-empty
7276 * "Give me the name in this locale of the 3rd month of the year"
7277 * (March in an English locale). The second main type is "Give me
7279 * formatting the date and time in this locale." The other two
7300 * available for this locale on this platform, but should be good
7428 * may not be what the locale actually says, but should give good
7430 * trying to parse them to figure out what the locale says). The
7449 * locale, the %E modifier is useless, so don't return it. */
7502 = toggle_locale_c_unless_locking(LC_TIME, locale);
7507 ints_to_tm(&mytm, locale, 30, 30, hour, mday, mon, 2011, 0, 0, 0);
7512 locale,
7524 succeeded = strftime_tm(format, sv, locale, &mytm);
7622 orig_TIME_locale = toggle_locale_c_unless_locking(LC_TIME, locale);
7651 ints_to_tm(&mytm, locale, sec, min, hour, mday, mon, year, 0, 0, 0);
7655 locale,
7663 succeeded = strftime_tm(fmts[j], alt_dig_sv, locale, &mytm);
7699 /* Above we accepted 0 for alt-0 in case the locale doesn't have a
7702 * false positive, and the locale doesn't have alternate digits */
7739 "Leaving emulate_langinfo item=%jd, using locale %s\n",
7740 item, locale));
7759 "Leaving emulate_langinfo item=%jd, using locale %s\n",
7760 item, locale));
7776 const char * locale,
7783 * Conversely (but rarely), "UTF-8" in the locale name might be wrong. We
7791 const char * orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, locale);
7802 * number on some platforms for the C locale, but for no others. That
7803 * locale was already ruled out in the code that called this function. (If
7811 /* If there are fewer bytes available in this locale than are required to
7813 * UTF-8 locale, even if the locale name says it is. */
7826 /* But if the locale could be UTF-8, and also the name corroborates this,
7861 /* Otherwise, the locale is likely UTF-8 */
7875 * out such a locale as being UTF-8, even if the code set name checked
7881 * to look at various strings associated with the locale:
7882 * 1) If any are illegal UTF-8, the locale can't be UTF-8.
7886 * 3) If all are ASCII and the locale name and/or MB_CUR_MAX indicate
7887 * UTF-8, assume the locale is UTF-8.
7888 * 4) Otherwise, assume the locale isn't UTF-8
7890 * To save cycles, if the locale name indicates it is a UTF-8 locale, we
7901 /* The first string tried is the locale currency name. Often that will
7936 toggle_locale_c_unless_locking(LC_TIME, locale);
7940 /* The trials array may consist of strings from two different locale
7984 langinfo_sv_i(trials[i], cat_index, locale, sv, NULL);
7999 /* Here, has non-ASCII. If not legal UTF-8, isn't a UTF-8 locale */
8032 * are all ascii, and the locale name indicates it is a UTF-8 locale,
8033 * assume the locale is UTF-8. */
8044 * But what is it? The other locale categories are not likely to be of
8067 /* Otherwise, assume the locale isn't UTF-8. This can be wrong if we don't
8068 * have MB_CUR_MAX, and the locale is English without UTF-8 in its name,
8085 the current locale, the input C<fmt>, and the returned result. Only if the
8086 current C<LC_TIME> locale is a UTF-8 one (and S<C<use bytes>> is not in effect)
8110 C<LC_TIME> locale of the program, giving results based on that locale.
8123 const char * locale = querylocale_c(LC_TIME);
8125 const char * locale = "C";
8129 ints_to_tm(&mytm, locale, sec, min, hour, mday, mon, year, wday, yday,
8131 if (! strftime_tm(fmt, PL_scratch_langinfo, locale, &mytm)) {
8146 const char * locale = querylocale_c(LC_TIME);
8148 const char * locale = "C";
8152 ints_to_tm(&mytm, locale, sec, min, hour, mday, mon, year, wday, yday,
8154 return sv_strftime_common(fmt, locale, &mytm);
8176 const char * locale,
8197 locale,
8216 const char * locale,
8221 * variables for 'locale' */
8235 /* Long-standing behavior is to ignore the effects of locale (in
8246 const char * orig_TIME_locale = toggle_locale_c(LC_TIME, locale);
8267 const char *locale,
8273 * locale.
8295 start_DEALING_WITH_MISMATCHED_CTYPE(locale);
8299 const char * orig_TIME_locale = toggle_locale_c(LC_TIME, locale);
8304 PERL_UNUSED_ARG(locale);
8374 * km_KH locale. If a new script comes along that uses 4 UTF-8 bytes
8399 * that the string is syntactically invalid for the locale. On some
8413 end_DEALING_WITH_MISMATCHED_CTYPE(locale);
8421 const char * locale,
8447 case UTF8NESS_NO: /* Known not to be UTF-8; must not be UTF-8 locale */
8448 if (is_locale_utf8(locale)) {
8456 case UTF8NESS_YES: /* Known to be UTF-8; must be UTF-8 locale if can't
8458 if (! is_locale_utf8(locale)) {
8478 if (! is_locale_utf8(locale)) {
8487 * upgrade it to UTF-8 for a UTF-8 locale. Otherwise the
8488 * locale would find any UTF-8 variant characters to be
8499 if (! strftime_tm(fmt, sv, locale, mytm)) {
8505 locale,
8533 /* This is called when the program is in the global locale and are
8557 /* This platform has per-thread locale handling. Do the conversion. */
8597 "perl: warning: Please check that your locale settings:\n");
8638 ? "the standard locale" \
8640 ? "the system default locale" \
8641 : "a fallback locale")))
8644 * Initialize locale awareness.
8650 * 0 if not to output warning when setup locale is bad
8656 * 0 = fallback to a locale of lower priority
8657 * -1 = fallback to all locales failed, not even to the C locale
8667 * make sure we are in the global locale;
8669 * switch to per-thread locale if applicable;
8671 * The "" causes the locale to be set to what the environment variables at
8694 * "" to examine the environment variables for the locale
8695 * NULL to use the values already set for the locale by the program
8705 * sets up the global locale environment. At the end, if some sort of
8707 * the same locale as the global initially. thread 0 can then change its
8708 * locale at will without affecting the global one.
8710 * At destruction time, thread 0 will revert to the global locale as the
8714 * executed for each instantiation. Since it changes the global locale, it
8717 * set each instantiation's locale and set PERL_SKIP_LOCALE_INIT so this
8765 # define DEBUG_LOCALE_INIT(cat_index, locale, result) \
8767 setlocale_debug_string_i(cat_index, locale, result)));
8830 * systems with only a C locale during ./Configure. Assume that this
8858 locale_panic_("Cannot create POSIX 2008 C locale object");
8868 * malloc'd in the interim. We arbitrarily switch to the C locale,
8886 * initialized. Arbitrarily use the C locale (which we know has to exist
8912 * wants. This is done in the global locale as explained in the
8919 /* We try each locale in the enum, in order, until we get one that works,
8930 C_trial, /* C locale */
8955 const char * locale = NULL;
8967 locale = (PerlEnv_getenv("PERL_SKIP_LOCALE_INIT"))
8977 locale = lc_all;
8985 locale = lang;
8995 /* For Windows, we also try the system default locale before "C".
8999 locale = ".ACP";
9004 locale = "C";
9011 /* If the locale is a substantive name, don't try the same locale
9013 if (locale && strNE(locale, "")) {
9015 if (strEQ(checked[i], locale)) {
9020 /* And, for future iterations, indicate we've tried this locale */
9022 checked[already_checked] = savepv(locale);
9030 lc_all_string = savepv(stdized_setlocale(LC_ALL, locale));
9033 DEBUG_LOCALE_INIT(LC_ALL_INDEX_, locale, lc_all_string);
9042 "perl: warning: Setting locale failed.\n");
9053 curlocales[j] = savepv(stdized_setlocale(categories[j], locale));
9056 DEBUG_LOCALE_INIT(j, locale, curlocales[j]);
9078 "perl: warning: Setting locale failed for the categories:\n");
9097 const char * description = "a fallback locale";
9145 case no_array: /* The original is a single locale */
9149 case only_element_0: /* element[0] is a single locale valid
9177 * some locale underlying the program. Figure it out as best
9178 * we can, by querying the system's current locale */
9187 name = "locale name not determinable";
9247 * locale is UTF-8. give_perl_locale_control() just above has already
9288 /* A locale collation definition includes primary, secondary, tertiary,
9321 * to find coefficients 'm' and 'b' for this locale so that m*x + b equals
9331 * locale. Most of the strings being collated will contain a preponderance
9367 * if the locale is a UTF-8 one. Since the string
9384 /* If the results are nonsensical for this simple test, the whole locale
9385 * definition is suspect. Mark it so that locale collation is not active
9394 "Disabling locale collation for LC_COLLATE='%s';"
9433 "?UTF-8 locale=%d; x_len_shorter=%zu, "
9486 "mem_collxfrm_: locale's collation is defective\n"));
9512 * this locale, find it */
9538 * LC_COLLATE locale */
9554 * controls in a UTF-8 locale are the L1 ones */
9603 * work in the locale, repeat the loop, looking for any
9612 " embedded NULs in locale %s with", PL_collation_name));
9617 "mem_collxfrm_: Replacing embedded NULs in locale %s with "
9666 /* Make sure the UTF8ness of the string and locale match */
9672 /* Here they don't match. Change the string's to be what the locale is
9675 if (! utf8) { /* locale is UTF-8, but input isn't; upgrade the input */
9679 else { /* locale is not UTF-8; but input is; downgrade the input */
9704 * on a locale where only 0-255 are valid. If two strings
9711 * collating order for this locale, do so now */
9756 " replace above-Latin1 chars in locale %s with",
9763 " in locale %s is 0x%02X\n",
9979 " calculated for locale %s, trying again with new"
10036 "mem_collxfrm_[ix %" UVuf "] for locale '%s':\n"
10098 * If not called from within the scope of 'use locale', it uses the text from
10099 * the C locale. If Perl is compiled to not pay attention to LC_CTYPE nor
10101 * derived from the locale, LC_MESSAGES if we have that; LC_CTYPE if not.
10109 * being in the same locale. So the code below uses a common locale for both
10110 * categories. Again, that is C if not within 'use locale' scope; or the
10111 * LC_MESSAGES locale if in scope and we have that category; and LC_CTYPE if we
10116 * strerror_l(). This is the simpler. We just use the already-built C locale
10117 * object if not in locale scope, or build up a custom one otherwise.
10121 * locale.
10132 " Within locale scope=%d\n", \
10159 /* Here, neither category is defined: use the C locale */
10181 * are not within 'use locale' scope of the only one defined, we use the C
10182 * locale; otherwise use the current locale object */
10191 /* Use C if not within locale scope; Otherwise, use current locale */
10207 * either C or the LC_MESSAGES locale */
10217 if (! IN_LC(LC_MESSAGES)) { /* Use C if not within locale scope */
10221 else { /* Otherwise, use the LC_MESSAGES locale, making sure LC_CTYPE
10225 const char * locale = querylocale_c(LC_MESSAGES);
10226 cur = newlocale(LC_CTYPE_MASK, locale, cur);
10230 locale,
10268 * are not within 'use locale' scope of the only one defined, we use the C
10269 * locale; otherwise use the current locale */
10310 * either C or the LC_MESSAGES locale */
10354 * enables the locale category 'category'. 'compiling' should indicate if
10359 SV *these_categories = cop_hints_fetch_pvs(cop, "locale", 0);
10372 =for apidoc_section $locale
10375 This function copies the locale state of the calling thread into the program's
10376 global locale, and converts the thread to use that global locale.
10379 global locale and which can't be converted to not access it. Effectively, this
10384 hide locale gotchas from your code. The service you most likely will miss
10393 The global locale and the per-thread locales are independent. As long as just
10394 one thread converts to the global locale, everything works smoothly. But if
10398 the global locale), but only if you use the following operations:
10437 locale_panic_("Could not change to global locale"); \
10461 * global locale or not. */
10515 /* Switch to the underlying C numeric locale; the application is on its
10530 This function copies the state of the program global locale into the calling
10532 already, and the platform supports them. The LC_NUMERIC locale is toggled into
10533 the standard state (using the C locale's conventions), if not within the
10534 lexical scope of S<C<use locale>>.
10536 Perl will now consider itself to have control of the locale.
10538 Since unthreaded perls have only a global locale, this function is a no-op
10541 This function is intended for use with C libraries that do locale manipulation.
10546 XS code should not manipulate the locale on its own. Instead,
10548 change the locale (though changing the locale is antisocial and dangerous on
10549 multi-threaded systems that don't have multi-thread safe locale operations.
10558 If the library has an option to turn off its locale manipulation, doing that is
10561 The return value is a boolean: TRUE if the global locale at the time of call
10562 was in effect for the caller; and FALSE if a per-thread locale was in effect.
10597 /* Here, we are in the global locale. Get and save the values for each
10637 /* libc keeps per-thread locale status information in some configurations.
10649 * POSIX 2008: The current locale is kept by libc as an object. We save
10657 * thread's locale is. We keep that information in the
10661 * PL_cur_LC_ALL indicates what the locale should be
10662 * if it is a per-thread locale.
10707 "new thread, initial locale is %s;"
10713 /* Not being able to change to the C locale is severe; don't keep
10746 /* Switch to the global locale, so can free up the per-thread object */