xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/locale.c (revision 0:68f95e015346)
1 /*    locale.c
2  *
3  *    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4  *    2000, 2001, 2002, 2003, by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  * A Elbereth Gilthoniel,
13  * silivren penna m�riel
14  * o menel aglar elenath!
15  * Na-chaered palan-d�riel
16  * o galadhremmin ennorath,
17  * Fanuilos, le linnathon
18  * nef aear, si nef aearon!
19  */
20 
21 #include "EXTERN.h"
22 #define PERL_IN_LOCALE_C
23 #include "perl.h"
24 
25 #ifdef I_LOCALE
26 #  include <locale.h>
27 #endif
28 
29 #ifdef I_LANGINFO
30 #   include <langinfo.h>
31 #endif
32 
33 #include "reentr.h"
34 
35 /*
36  * Standardize the locale name from a string returned by 'setlocale'.
37  *
38  * The standard return value of setlocale() is either
39  * (1) "xx_YY" if the first argument of setlocale() is not LC_ALL
40  * (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL
41  *     (the space-separated values represent the various sublocales,
42  *      in some unspecificed order)
43  *
44  * In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n",
45  * which is harmful for further use of the string in setlocale().
46  *
47  */
48 STATIC char *
S_stdize_locale(pTHX_ char * locs)49 S_stdize_locale(pTHX_ char *locs)
50 {
51     char *s;
52     bool okay = TRUE;
53 
54     if ((s = strchr(locs, '='))) {
55 	char *t;
56 
57 	okay = FALSE;
58 	if ((t = strchr(s, '.'))) {
59 	    char *u;
60 
61 	    if ((u = strchr(t, '\n'))) {
62 
63 		if (u[1] == 0) {
64 		    STRLEN len = u - s;
65 		    Move(s + 1, locs, len, char);
66 		    locs[len] = 0;
67 		    okay = TRUE;
68 		}
69 	    }
70 	}
71     }
72 
73     if (!okay)
74 	Perl_croak(aTHX_ "Can't fix broken locale name \"%s\"", locs);
75 
76     return locs;
77 }
78 
79 void
Perl_set_numeric_radix(pTHX)80 Perl_set_numeric_radix(pTHX)
81 {
82 #ifdef USE_LOCALE_NUMERIC
83 # ifdef HAS_LOCALECONV
84     struct lconv* lc;
85 
86     lc = localeconv();
87     if (lc && lc->decimal_point) {
88 	if (lc->decimal_point[0] == '.' && lc->decimal_point[1] == 0) {
89 	    SvREFCNT_dec(PL_numeric_radix_sv);
90 	    PL_numeric_radix_sv = Nullsv;
91 	}
92 	else {
93 	    if (PL_numeric_radix_sv)
94 		sv_setpv(PL_numeric_radix_sv, lc->decimal_point);
95 	    else
96 		PL_numeric_radix_sv = newSVpv(lc->decimal_point, 0);
97 	}
98     }
99     else
100 	PL_numeric_radix_sv = Nullsv;
101 # endif /* HAS_LOCALECONV */
102 #endif /* USE_LOCALE_NUMERIC */
103 }
104 
105 /*
106  * Set up for a new numeric locale.
107  */
108 void
Perl_new_numeric(pTHX_ char * newnum)109 Perl_new_numeric(pTHX_ char *newnum)
110 {
111 #ifdef USE_LOCALE_NUMERIC
112 
113     if (! newnum) {
114 	if (PL_numeric_name) {
115 	    Safefree(PL_numeric_name);
116 	    PL_numeric_name = NULL;
117 	}
118 	PL_numeric_standard = TRUE;
119 	PL_numeric_local = TRUE;
120 	return;
121     }
122 
123     if (! PL_numeric_name || strNE(PL_numeric_name, newnum)) {
124 	Safefree(PL_numeric_name);
125 	PL_numeric_name = stdize_locale(savepv(newnum));
126 	PL_numeric_standard = (strEQ(newnum, "C") || strEQ(newnum, "POSIX"));
127 	PL_numeric_local = TRUE;
128 	set_numeric_radix();
129     }
130 
131 #endif /* USE_LOCALE_NUMERIC */
132 }
133 
134 void
Perl_set_numeric_standard(pTHX)135 Perl_set_numeric_standard(pTHX)
136 {
137 #ifdef USE_LOCALE_NUMERIC
138 
139     if (! PL_numeric_standard) {
140 	setlocale(LC_NUMERIC, "C");
141 	PL_numeric_standard = TRUE;
142 	PL_numeric_local = FALSE;
143 	set_numeric_radix();
144     }
145 
146 #endif /* USE_LOCALE_NUMERIC */
147 }
148 
149 void
Perl_set_numeric_local(pTHX)150 Perl_set_numeric_local(pTHX)
151 {
152 #ifdef USE_LOCALE_NUMERIC
153 
154     if (! PL_numeric_local) {
155 	setlocale(LC_NUMERIC, PL_numeric_name);
156 	PL_numeric_standard = FALSE;
157 	PL_numeric_local = TRUE;
158 	set_numeric_radix();
159     }
160 
161 #endif /* USE_LOCALE_NUMERIC */
162 }
163 
164 /*
165  * Set up for a new ctype locale.
166  */
167 void
Perl_new_ctype(pTHX_ char * newctype)168 Perl_new_ctype(pTHX_ char *newctype)
169 {
170 #ifdef USE_LOCALE_CTYPE
171 
172     int i;
173 
174     for (i = 0; i < 256; i++) {
175 	if (isUPPER_LC(i))
176 	    PL_fold_locale[i] = toLOWER_LC(i);
177 	else if (isLOWER_LC(i))
178 	    PL_fold_locale[i] = toUPPER_LC(i);
179 	else
180 	    PL_fold_locale[i] = i;
181     }
182 
183 #endif /* USE_LOCALE_CTYPE */
184 }
185 
186 /*
187  * Set up for a new collation locale.
188  */
189 void
Perl_new_collate(pTHX_ char * newcoll)190 Perl_new_collate(pTHX_ char *newcoll)
191 {
192 #ifdef USE_LOCALE_COLLATE
193 
194     if (! newcoll) {
195 	if (PL_collation_name) {
196 	    ++PL_collation_ix;
197 	    Safefree(PL_collation_name);
198 	    PL_collation_name = NULL;
199 	}
200 	PL_collation_standard = TRUE;
201 	PL_collxfrm_base = 0;
202 	PL_collxfrm_mult = 2;
203 	return;
204     }
205 
206     if (! PL_collation_name || strNE(PL_collation_name, newcoll)) {
207 	++PL_collation_ix;
208 	Safefree(PL_collation_name);
209 	PL_collation_name = stdize_locale(savepv(newcoll));
210 	PL_collation_standard = (strEQ(newcoll, "C") || strEQ(newcoll, "POSIX"));
211 
212 	{
213 	  /*  2: at most so many chars ('a', 'b'). */
214 	  /* 50: surely no system expands a char more. */
215 #define XFRMBUFSIZE  (2 * 50)
216 	  char xbuf[XFRMBUFSIZE];
217 	  Size_t fa = strxfrm(xbuf, "a",  XFRMBUFSIZE);
218 	  Size_t fb = strxfrm(xbuf, "ab", XFRMBUFSIZE);
219 	  SSize_t mult = fb - fa;
220 	  if (mult < 1)
221 	      Perl_croak(aTHX_ "strxfrm() gets absurd");
222 	  PL_collxfrm_base = (fa > (Size_t)mult) ? (fa - mult) : 0;
223 	  PL_collxfrm_mult = mult;
224 	}
225     }
226 
227 #endif /* USE_LOCALE_COLLATE */
228 }
229 
230 /*
231  * Initialize locale awareness.
232  */
233 int
Perl_init_i18nl10n(pTHX_ int printwarn)234 Perl_init_i18nl10n(pTHX_ int printwarn)
235 {
236     int ok = 1;
237     /* returns
238      *    1 = set ok or not applicable,
239      *    0 = fallback to C locale,
240      *   -1 = fallback to C locale failed
241      */
242 
243 #if defined(USE_LOCALE)
244 
245 #ifdef USE_LOCALE_CTYPE
246     char *curctype   = NULL;
247 #endif /* USE_LOCALE_CTYPE */
248 #ifdef USE_LOCALE_COLLATE
249     char *curcoll    = NULL;
250 #endif /* USE_LOCALE_COLLATE */
251 #ifdef USE_LOCALE_NUMERIC
252     char *curnum     = NULL;
253 #endif /* USE_LOCALE_NUMERIC */
254 #ifdef __GLIBC__
255     char *language   = PerlEnv_getenv("LANGUAGE");
256 #endif
257     char *lc_all     = PerlEnv_getenv("LC_ALL");
258     char *lang       = PerlEnv_getenv("LANG");
259     bool setlocale_failure = FALSE;
260 
261 #ifdef LOCALE_ENVIRON_REQUIRED
262 
263     /*
264      * Ultrix setlocale(..., "") fails if there are no environment
265      * variables from which to get a locale name.
266      */
267 
268     bool done = FALSE;
269 
270 #ifdef LC_ALL
271     if (lang) {
272 	if (setlocale(LC_ALL, ""))
273 	    done = TRUE;
274 	else
275 	    setlocale_failure = TRUE;
276     }
277     if (!setlocale_failure) {
278 #ifdef USE_LOCALE_CTYPE
279 	if (! (curctype =
280 	       setlocale(LC_CTYPE,
281 			 (!done && (lang || PerlEnv_getenv("LC_CTYPE")))
282 				    ? "" : Nullch)))
283 	    setlocale_failure = TRUE;
284 	else
285 	    curctype = savepv(curctype);
286 #endif /* USE_LOCALE_CTYPE */
287 #ifdef USE_LOCALE_COLLATE
288 	if (! (curcoll =
289 	       setlocale(LC_COLLATE,
290 			 (!done && (lang || PerlEnv_getenv("LC_COLLATE")))
291 				   ? "" : Nullch)))
292 	    setlocale_failure = TRUE;
293 	else
294 	    curcoll = savepv(curcoll);
295 #endif /* USE_LOCALE_COLLATE */
296 #ifdef USE_LOCALE_NUMERIC
297 	if (! (curnum =
298 	       setlocale(LC_NUMERIC,
299 			 (!done && (lang || PerlEnv_getenv("LC_NUMERIC")))
300 				  ? "" : Nullch)))
301 	    setlocale_failure = TRUE;
302 	else
303 	    curnum = savepv(curnum);
304 #endif /* USE_LOCALE_NUMERIC */
305     }
306 
307 #endif /* LC_ALL */
308 
309 #endif /* !LOCALE_ENVIRON_REQUIRED */
310 
311 #ifdef LC_ALL
312     if (! setlocale(LC_ALL, ""))
313 	setlocale_failure = TRUE;
314 #endif /* LC_ALL */
315 
316     if (!setlocale_failure) {
317 #ifdef USE_LOCALE_CTYPE
318 	if (! (curctype = setlocale(LC_CTYPE, "")))
319 	    setlocale_failure = TRUE;
320 	else
321 	    curctype = savepv(curctype);
322 #endif /* USE_LOCALE_CTYPE */
323 #ifdef USE_LOCALE_COLLATE
324 	if (! (curcoll = setlocale(LC_COLLATE, "")))
325 	    setlocale_failure = TRUE;
326 	else
327 	    curcoll = savepv(curcoll);
328 #endif /* USE_LOCALE_COLLATE */
329 #ifdef USE_LOCALE_NUMERIC
330 	if (! (curnum = setlocale(LC_NUMERIC, "")))
331 	    setlocale_failure = TRUE;
332 	else
333 	    curnum = savepv(curnum);
334 #endif /* USE_LOCALE_NUMERIC */
335     }
336 
337     if (setlocale_failure) {
338 	char *p;
339 	bool locwarn = (printwarn > 1 ||
340 			(printwarn &&
341 			 (!(p = PerlEnv_getenv("PERL_BADLANG")) || atoi(p))));
342 
343 	if (locwarn) {
344 #ifdef LC_ALL
345 
346 	    PerlIO_printf(Perl_error_log,
347 	       "perl: warning: Setting locale failed.\n");
348 
349 #else /* !LC_ALL */
350 
351 	    PerlIO_printf(Perl_error_log,
352 	       "perl: warning: Setting locale failed for the categories:\n\t");
353 #ifdef USE_LOCALE_CTYPE
354 	    if (! curctype)
355 		PerlIO_printf(Perl_error_log, "LC_CTYPE ");
356 #endif /* USE_LOCALE_CTYPE */
357 #ifdef USE_LOCALE_COLLATE
358 	    if (! curcoll)
359 		PerlIO_printf(Perl_error_log, "LC_COLLATE ");
360 #endif /* USE_LOCALE_COLLATE */
361 #ifdef USE_LOCALE_NUMERIC
362 	    if (! curnum)
363 		PerlIO_printf(Perl_error_log, "LC_NUMERIC ");
364 #endif /* USE_LOCALE_NUMERIC */
365 	    PerlIO_printf(Perl_error_log, "\n");
366 
367 #endif /* LC_ALL */
368 
369 	    PerlIO_printf(Perl_error_log,
370 		"perl: warning: Please check that your locale settings:\n");
371 
372 #ifdef __GLIBC__
373 	    PerlIO_printf(Perl_error_log,
374 			  "\tLANGUAGE = %c%s%c,\n",
375 			  language ? '"' : '(',
376 			  language ? language : "unset",
377 			  language ? '"' : ')');
378 #endif
379 
380 	    PerlIO_printf(Perl_error_log,
381 			  "\tLC_ALL = %c%s%c,\n",
382 			  lc_all ? '"' : '(',
383 			  lc_all ? lc_all : "unset",
384 			  lc_all ? '"' : ')');
385 
386 #if defined(USE_ENVIRON_ARRAY)
387 	    {
388 	      char **e;
389 	      for (e = environ; *e; e++) {
390 		  if (strnEQ(*e, "LC_", 3)
391 			&& strnNE(*e, "LC_ALL=", 7)
392 			&& (p = strchr(*e, '=')))
393 		      PerlIO_printf(Perl_error_log, "\t%.*s = \"%s\",\n",
394 				    (int)(p - *e), *e, p + 1);
395 	      }
396 	    }
397 #else
398 	    PerlIO_printf(Perl_error_log,
399 			  "\t(possibly more locale environment variables)\n");
400 #endif
401 
402 	    PerlIO_printf(Perl_error_log,
403 			  "\tLANG = %c%s%c\n",
404 			  lang ? '"' : '(',
405 			  lang ? lang : "unset",
406 			  lang ? '"' : ')');
407 
408 	    PerlIO_printf(Perl_error_log,
409 			  "    are supported and installed on your system.\n");
410 	}
411 
412 #ifdef LC_ALL
413 
414 	if (setlocale(LC_ALL, "C")) {
415 	    if (locwarn)
416 		PerlIO_printf(Perl_error_log,
417       "perl: warning: Falling back to the standard locale (\"C\").\n");
418 	    ok = 0;
419 	}
420 	else {
421 	    if (locwarn)
422 		PerlIO_printf(Perl_error_log,
423       "perl: warning: Failed to fall back to the standard locale (\"C\").\n");
424 	    ok = -1;
425 	}
426 
427 #else /* ! LC_ALL */
428 
429 	if (0
430 #ifdef USE_LOCALE_CTYPE
431 	    || !(curctype || setlocale(LC_CTYPE, "C"))
432 #endif /* USE_LOCALE_CTYPE */
433 #ifdef USE_LOCALE_COLLATE
434 	    || !(curcoll || setlocale(LC_COLLATE, "C"))
435 #endif /* USE_LOCALE_COLLATE */
436 #ifdef USE_LOCALE_NUMERIC
437 	    || !(curnum || setlocale(LC_NUMERIC, "C"))
438 #endif /* USE_LOCALE_NUMERIC */
439 	    )
440 	{
441 	    if (locwarn)
442 		PerlIO_printf(Perl_error_log,
443       "perl: warning: Cannot fall back to the standard locale (\"C\").\n");
444 	    ok = -1;
445 	}
446 
447 #endif /* ! LC_ALL */
448 
449 #ifdef USE_LOCALE_CTYPE
450 	curctype = savepv(setlocale(LC_CTYPE, Nullch));
451 #endif /* USE_LOCALE_CTYPE */
452 #ifdef USE_LOCALE_COLLATE
453 	curcoll = savepv(setlocale(LC_COLLATE, Nullch));
454 #endif /* USE_LOCALE_COLLATE */
455 #ifdef USE_LOCALE_NUMERIC
456 	curnum = savepv(setlocale(LC_NUMERIC, Nullch));
457 #endif /* USE_LOCALE_NUMERIC */
458     }
459     else {
460 
461 #ifdef USE_LOCALE_CTYPE
462     new_ctype(curctype);
463 #endif /* USE_LOCALE_CTYPE */
464 
465 #ifdef USE_LOCALE_COLLATE
466     new_collate(curcoll);
467 #endif /* USE_LOCALE_COLLATE */
468 
469 #ifdef USE_LOCALE_NUMERIC
470     new_numeric(curnum);
471 #endif /* USE_LOCALE_NUMERIC */
472 
473     }
474 
475 #endif /* USE_LOCALE */
476 
477 #ifdef USE_PERLIO
478     {
479       /* Set PL_utf8locale to TRUE if using PerlIO _and_
480 	 any of the following are true:
481 	 - nl_langinfo(CODESET) contains /^utf-?8/i
482 	 - $ENV{LC_ALL}   contains /^utf-?8/i
483 	 - $ENV{LC_CTYPE} contains /^utf-?8/i
484 	 - $ENV{LANG}     contains /^utf-?8/i
485 	 The LC_ALL, LC_CTYPE, LANG obey the usual override
486 	 hierarchy of locale environment variables.  (LANGUAGE
487 	 affects only LC_MESSAGES only under glibc.) (If present,
488 	 it overrides LC_MESSAGES for GNU gettext, and it also
489 	 can have more than one locale, separated by spaces,
490 	 in case you need to know.)
491 	 If PL_utf8locale and PL_unicode (set by -C or by $ENV{PERL_UNICODE})
492          are true, perl.c:S_parse_body() will turn on the PerlIO :utf8 layer
493 	 on STDIN, STDOUT, STDERR, _and_ the default open discipline.
494       */
495 	 bool utf8locale = FALSE;
496 	 char *codeset = NULL;
497 #if defined(HAS_NL_LANGINFO) && defined(CODESET)
498 	 codeset = nl_langinfo(CODESET);
499 #endif
500 	 if (codeset)
501 	      utf8locale = (ibcmp(codeset,  "UTF-8", 5) == 0 ||
502  			    ibcmp(codeset,  "UTF8",  4) == 0);
503 #if defined(USE_LOCALE)
504 	 else { /* nl_langinfo(CODESET) is supposed to correctly
505 		 * interpret the locale environment variables,
506 		 * but just in case it fails, let's do this manually. */
507 	      if (lang)
508 		   utf8locale = (ibcmp(lang,     "UTF-8", 5) == 0 ||
509 			         ibcmp(lang,     "UTF8",  4) == 0);
510 #ifdef USE_LOCALE_CTYPE
511 	      if (curctype)
512 		   utf8locale = (ibcmp(curctype,     "UTF-8", 5) == 0 ||
513 			         ibcmp(curctype,     "UTF8",  4) == 0);
514 #endif
515 	      if (lc_all)
516 		   utf8locale = (ibcmp(lc_all,   "UTF-8", 5) == 0 ||
517 			         ibcmp(lc_all,   "UTF8",  4) == 0);
518 	 }
519 #endif /* USE_LOCALE */
520 	 if (utf8locale)
521 	      PL_utf8locale = TRUE;
522     }
523     /* Set PL_unicode to $ENV{PERL_UNICODE} if using PerlIO.
524        This is an alternative to using the -C command line switch
525        (the -C if present will override this). */
526     {
527 	 char *p = PerlEnv_getenv("PERL_UNICODE");
528 	 PL_unicode = p ? parse_unicode_opts(&p) : 0;
529     }
530 #endif
531 
532 #ifdef USE_LOCALE_CTYPE
533     if (curctype != NULL)
534 	Safefree(curctype);
535 #endif /* USE_LOCALE_CTYPE */
536 #ifdef USE_LOCALE_COLLATE
537     if (curcoll != NULL)
538 	Safefree(curcoll);
539 #endif /* USE_LOCALE_COLLATE */
540 #ifdef USE_LOCALE_NUMERIC
541     if (curnum != NULL)
542 	Safefree(curnum);
543 #endif /* USE_LOCALE_NUMERIC */
544     return ok;
545 }
546 
547 /* Backwards compatibility. */
548 int
Perl_init_i18nl14n(pTHX_ int printwarn)549 Perl_init_i18nl14n(pTHX_ int printwarn)
550 {
551     return init_i18nl10n(printwarn);
552 }
553 
554 #ifdef USE_LOCALE_COLLATE
555 
556 /*
557  * mem_collxfrm() is a bit like strxfrm() but with two important
558  * differences. First, it handles embedded NULs. Second, it allocates
559  * a bit more memory than needed for the transformed data itself.
560  * The real transformed data begins at offset sizeof(collationix).
561  * Please see sv_collxfrm() to see how this is used.
562  */
563 char *
Perl_mem_collxfrm(pTHX_ const char * s,STRLEN len,STRLEN * xlen)564 Perl_mem_collxfrm(pTHX_ const char *s, STRLEN len, STRLEN *xlen)
565 {
566     char *xbuf;
567     STRLEN xAlloc, xin, xout; /* xalloc is a reserved word in VC */
568 
569     /* the first sizeof(collationix) bytes are used by sv_collxfrm(). */
570     /* the +1 is for the terminating NUL. */
571 
572     xAlloc = sizeof(PL_collation_ix) + PL_collxfrm_base + (PL_collxfrm_mult * len) + 1;
573     New(171, xbuf, xAlloc, char);
574     if (! xbuf)
575 	goto bad;
576 
577     *(U32*)xbuf = PL_collation_ix;
578     xout = sizeof(PL_collation_ix);
579     for (xin = 0; xin < len; ) {
580 	SSize_t xused;
581 
582 	for (;;) {
583 	    xused = strxfrm(xbuf + xout, s + xin, xAlloc - xout);
584 	    if (xused == -1)
585 		goto bad;
586 	    if ((STRLEN)xused < xAlloc - xout)
587 		break;
588 	    xAlloc = (2 * xAlloc) + 1;
589 	    Renew(xbuf, xAlloc, char);
590 	    if (! xbuf)
591 		goto bad;
592 	}
593 
594 	xin += strlen(s + xin) + 1;
595 	xout += xused;
596 
597 	/* Embedded NULs are understood but silently skipped
598 	 * because they make no sense in locale collation. */
599     }
600 
601     xbuf[xout] = '\0';
602     *xlen = xout - sizeof(PL_collation_ix);
603     return xbuf;
604 
605   bad:
606     Safefree(xbuf);
607     *xlen = 0;
608     return NULL;
609 }
610 
611 #endif /* USE_LOCALE_COLLATE */
612 
613