xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/lib/locale.t (revision 0:68f95e015346)
1*0Sstevel@tonic-gate#!./perl -wT
2*0Sstevel@tonic-gate
3*0Sstevel@tonic-gateBEGIN {
4*0Sstevel@tonic-gate    chdir 't' if -d 't';
5*0Sstevel@tonic-gate    @INC = '../lib';
6*0Sstevel@tonic-gate    unshift @INC, '.';
7*0Sstevel@tonic-gate    require Config; import Config;
8*0Sstevel@tonic-gate    if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) {
9*0Sstevel@tonic-gate	print "1..0\n";
10*0Sstevel@tonic-gate	exit;
11*0Sstevel@tonic-gate    }
12*0Sstevel@tonic-gate    $| = 1;
13*0Sstevel@tonic-gate}
14*0Sstevel@tonic-gate
15*0Sstevel@tonic-gateuse strict;
16*0Sstevel@tonic-gate
17*0Sstevel@tonic-gatemy $debug = 1;
18*0Sstevel@tonic-gate
19*0Sstevel@tonic-gateuse Dumpvalue;
20*0Sstevel@tonic-gate
21*0Sstevel@tonic-gatemy $dumper = Dumpvalue->new(
22*0Sstevel@tonic-gate                            tick => qq{"},
23*0Sstevel@tonic-gate                            quoteHighBit => 0,
24*0Sstevel@tonic-gate                            unctrl => "quote"
25*0Sstevel@tonic-gate                           );
26*0Sstevel@tonic-gatesub debug {
27*0Sstevel@tonic-gate  return unless $debug;
28*0Sstevel@tonic-gate  my($mess) = join "", @_;
29*0Sstevel@tonic-gate  chop $mess;
30*0Sstevel@tonic-gate  print $dumper->stringify($mess,1), "\n";
31*0Sstevel@tonic-gate}
32*0Sstevel@tonic-gate
33*0Sstevel@tonic-gatesub debugf {
34*0Sstevel@tonic-gate    printf @_ if $debug;
35*0Sstevel@tonic-gate}
36*0Sstevel@tonic-gate
37*0Sstevel@tonic-gatemy $have_setlocale = 0;
38*0Sstevel@tonic-gateeval {
39*0Sstevel@tonic-gate    require POSIX;
40*0Sstevel@tonic-gate    import POSIX ':locale_h';
41*0Sstevel@tonic-gate    $have_setlocale++;
42*0Sstevel@tonic-gate};
43*0Sstevel@tonic-gate
44*0Sstevel@tonic-gate# Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1"
45*0Sstevel@tonic-gate# and mingw32 uses said silly CRT
46*0Sstevel@tonic-gate$have_setlocale = 0 if (($^O eq 'MSWin32' || $^O eq 'NetWare') && $Config{cc} =~ /^(cl|gcc)/i);
47*0Sstevel@tonic-gate
48*0Sstevel@tonic-gate# UWIN seems to loop after test 98, just skip for now
49*0Sstevel@tonic-gate$have_setlocale = 0 if ($^O =~ /^uwin/);
50*0Sstevel@tonic-gate
51*0Sstevel@tonic-gatemy $last = $have_setlocale ? &last : &last_without_setlocale;
52*0Sstevel@tonic-gate
53*0Sstevel@tonic-gateprint "1..$last\n";
54*0Sstevel@tonic-gate
55*0Sstevel@tonic-gateuse vars qw(&LC_ALL);
56*0Sstevel@tonic-gate
57*0Sstevel@tonic-gate$a = 'abc %';
58*0Sstevel@tonic-gate
59*0Sstevel@tonic-gatesub ok {
60*0Sstevel@tonic-gate    my ($n, $result) = @_;
61*0Sstevel@tonic-gate
62*0Sstevel@tonic-gate    print 'not ' unless ($result);
63*0Sstevel@tonic-gate    print "ok $n\n";
64*0Sstevel@tonic-gate}
65*0Sstevel@tonic-gate
66*0Sstevel@tonic-gate# First we'll do a lot of taint checking for locales.
67*0Sstevel@tonic-gate# This is the easiest to test, actually, as any locale,
68*0Sstevel@tonic-gate# even the default locale will taint under 'use locale'.
69*0Sstevel@tonic-gate
70*0Sstevel@tonic-gatesub is_tainted { # hello, camel two.
71*0Sstevel@tonic-gate    no warnings 'uninitialized' ;
72*0Sstevel@tonic-gate    my $dummy;
73*0Sstevel@tonic-gate    not eval { $dummy = join("", @_), kill 0; 1 }
74*0Sstevel@tonic-gate}
75*0Sstevel@tonic-gate
76*0Sstevel@tonic-gatesub check_taint ($$) {
77*0Sstevel@tonic-gate    ok $_[0], is_tainted($_[1]);
78*0Sstevel@tonic-gate}
79*0Sstevel@tonic-gate
80*0Sstevel@tonic-gatesub check_taint_not ($$) {
81*0Sstevel@tonic-gate    ok $_[0], not is_tainted($_[1]);
82*0Sstevel@tonic-gate}
83*0Sstevel@tonic-gate
84*0Sstevel@tonic-gateuse locale;	# engage locale and therefore locale taint.
85*0Sstevel@tonic-gate
86*0Sstevel@tonic-gatecheck_taint_not   1, $a;
87*0Sstevel@tonic-gate
88*0Sstevel@tonic-gatecheck_taint       2, uc($a);
89*0Sstevel@tonic-gatecheck_taint       3, "\U$a";
90*0Sstevel@tonic-gatecheck_taint       4, ucfirst($a);
91*0Sstevel@tonic-gatecheck_taint       5, "\u$a";
92*0Sstevel@tonic-gatecheck_taint       6, lc($a);
93*0Sstevel@tonic-gatecheck_taint       7, "\L$a";
94*0Sstevel@tonic-gatecheck_taint       8, lcfirst($a);
95*0Sstevel@tonic-gatecheck_taint       9, "\l$a";
96*0Sstevel@tonic-gate
97*0Sstevel@tonic-gatecheck_taint_not  10, sprintf('%e', 123.456);
98*0Sstevel@tonic-gatecheck_taint_not  11, sprintf('%f', 123.456);
99*0Sstevel@tonic-gatecheck_taint_not  12, sprintf('%g', 123.456);
100*0Sstevel@tonic-gatecheck_taint_not  13, sprintf('%d', 123.456);
101*0Sstevel@tonic-gatecheck_taint_not  14, sprintf('%x', 123.456);
102*0Sstevel@tonic-gate
103*0Sstevel@tonic-gate$_ = $a;	# untaint $_
104*0Sstevel@tonic-gate
105*0Sstevel@tonic-gate$_ = uc($a);	# taint $_
106*0Sstevel@tonic-gate
107*0Sstevel@tonic-gatecheck_taint      15, $_;
108*0Sstevel@tonic-gate
109*0Sstevel@tonic-gate/(\w)/;	# taint $&, $`, $', $+, $1.
110*0Sstevel@tonic-gatecheck_taint      16, $&;
111*0Sstevel@tonic-gatecheck_taint      17, $`;
112*0Sstevel@tonic-gatecheck_taint      18, $';
113*0Sstevel@tonic-gatecheck_taint      19, $+;
114*0Sstevel@tonic-gatecheck_taint      20, $1;
115*0Sstevel@tonic-gatecheck_taint_not  21, $2;
116*0Sstevel@tonic-gate
117*0Sstevel@tonic-gate/(.)/;	# untaint $&, $`, $', $+, $1.
118*0Sstevel@tonic-gatecheck_taint_not  22, $&;
119*0Sstevel@tonic-gatecheck_taint_not  23, $`;
120*0Sstevel@tonic-gatecheck_taint_not  24, $';
121*0Sstevel@tonic-gatecheck_taint_not  25, $+;
122*0Sstevel@tonic-gatecheck_taint_not  26, $1;
123*0Sstevel@tonic-gatecheck_taint_not  27, $2;
124*0Sstevel@tonic-gate
125*0Sstevel@tonic-gate/(\W)/;	# taint $&, $`, $', $+, $1.
126*0Sstevel@tonic-gatecheck_taint      28, $&;
127*0Sstevel@tonic-gatecheck_taint      29, $`;
128*0Sstevel@tonic-gatecheck_taint      30, $';
129*0Sstevel@tonic-gatecheck_taint      31, $+;
130*0Sstevel@tonic-gatecheck_taint      32, $1;
131*0Sstevel@tonic-gatecheck_taint_not  33, $2;
132*0Sstevel@tonic-gate
133*0Sstevel@tonic-gate/(\s)/;	# taint $&, $`, $', $+, $1.
134*0Sstevel@tonic-gatecheck_taint      34, $&;
135*0Sstevel@tonic-gatecheck_taint      35, $`;
136*0Sstevel@tonic-gatecheck_taint      36, $';
137*0Sstevel@tonic-gatecheck_taint      37, $+;
138*0Sstevel@tonic-gatecheck_taint      38, $1;
139*0Sstevel@tonic-gatecheck_taint_not  39, $2;
140*0Sstevel@tonic-gate
141*0Sstevel@tonic-gate/(\S)/;	# taint $&, $`, $', $+, $1.
142*0Sstevel@tonic-gatecheck_taint      40, $&;
143*0Sstevel@tonic-gatecheck_taint      41, $`;
144*0Sstevel@tonic-gatecheck_taint      42, $';
145*0Sstevel@tonic-gatecheck_taint      43, $+;
146*0Sstevel@tonic-gatecheck_taint      44, $1;
147*0Sstevel@tonic-gatecheck_taint_not  45, $2;
148*0Sstevel@tonic-gate
149*0Sstevel@tonic-gate$_ = $a;	# untaint $_
150*0Sstevel@tonic-gate
151*0Sstevel@tonic-gatecheck_taint_not  46, $_;
152*0Sstevel@tonic-gate
153*0Sstevel@tonic-gate/(b)/;		# this must not taint
154*0Sstevel@tonic-gatecheck_taint_not  47, $&;
155*0Sstevel@tonic-gatecheck_taint_not  48, $`;
156*0Sstevel@tonic-gatecheck_taint_not  49, $';
157*0Sstevel@tonic-gatecheck_taint_not  50, $+;
158*0Sstevel@tonic-gatecheck_taint_not  51, $1;
159*0Sstevel@tonic-gatecheck_taint_not  52, $2;
160*0Sstevel@tonic-gate
161*0Sstevel@tonic-gate$_ = $a;	# untaint $_
162*0Sstevel@tonic-gate
163*0Sstevel@tonic-gatecheck_taint_not  53, $_;
164*0Sstevel@tonic-gate
165*0Sstevel@tonic-gate$b = uc($a);	# taint $b
166*0Sstevel@tonic-gates/(.+)/$b/;	# this must taint only the $_
167*0Sstevel@tonic-gate
168*0Sstevel@tonic-gatecheck_taint      54, $_;
169*0Sstevel@tonic-gatecheck_taint_not  55, $&;
170*0Sstevel@tonic-gatecheck_taint_not  56, $`;
171*0Sstevel@tonic-gatecheck_taint_not  57, $';
172*0Sstevel@tonic-gatecheck_taint_not  58, $+;
173*0Sstevel@tonic-gatecheck_taint_not  59, $1;
174*0Sstevel@tonic-gatecheck_taint_not  60, $2;
175*0Sstevel@tonic-gate
176*0Sstevel@tonic-gate$_ = $a;	# untaint $_
177*0Sstevel@tonic-gate
178*0Sstevel@tonic-gates/(.+)/b/;	# this must not taint
179*0Sstevel@tonic-gatecheck_taint_not  61, $_;
180*0Sstevel@tonic-gatecheck_taint_not  62, $&;
181*0Sstevel@tonic-gatecheck_taint_not  63, $`;
182*0Sstevel@tonic-gatecheck_taint_not  64, $';
183*0Sstevel@tonic-gatecheck_taint_not  65, $+;
184*0Sstevel@tonic-gatecheck_taint_not  66, $1;
185*0Sstevel@tonic-gatecheck_taint_not  67, $2;
186*0Sstevel@tonic-gate
187*0Sstevel@tonic-gate$b = $a;	# untaint $b
188*0Sstevel@tonic-gate
189*0Sstevel@tonic-gate($b = $a) =~ s/\w/$&/;
190*0Sstevel@tonic-gatecheck_taint      68, $b;	# $b should be tainted.
191*0Sstevel@tonic-gatecheck_taint_not  69, $a;	# $a should be not.
192*0Sstevel@tonic-gate
193*0Sstevel@tonic-gate$_ = $a;	# untaint $_
194*0Sstevel@tonic-gate
195*0Sstevel@tonic-gates/(\w)/\l$1/;	# this must taint
196*0Sstevel@tonic-gatecheck_taint      70, $_;
197*0Sstevel@tonic-gatecheck_taint      71, $&;
198*0Sstevel@tonic-gatecheck_taint      72, $`;
199*0Sstevel@tonic-gatecheck_taint      73, $';
200*0Sstevel@tonic-gatecheck_taint      74, $+;
201*0Sstevel@tonic-gatecheck_taint      75, $1;
202*0Sstevel@tonic-gatecheck_taint_not  76, $2;
203*0Sstevel@tonic-gate
204*0Sstevel@tonic-gate$_ = $a;	# untaint $_
205*0Sstevel@tonic-gate
206*0Sstevel@tonic-gates/(\w)/\L$1/;	# this must taint
207*0Sstevel@tonic-gatecheck_taint      77, $_;
208*0Sstevel@tonic-gatecheck_taint      78, $&;
209*0Sstevel@tonic-gatecheck_taint      79, $`;
210*0Sstevel@tonic-gatecheck_taint      80, $';
211*0Sstevel@tonic-gatecheck_taint      81, $+;
212*0Sstevel@tonic-gatecheck_taint      82, $1;
213*0Sstevel@tonic-gatecheck_taint_not  83, $2;
214*0Sstevel@tonic-gate
215*0Sstevel@tonic-gate$_ = $a;	# untaint $_
216*0Sstevel@tonic-gate
217*0Sstevel@tonic-gates/(\w)/\u$1/;	# this must taint
218*0Sstevel@tonic-gatecheck_taint      84, $_;
219*0Sstevel@tonic-gatecheck_taint      85, $&;
220*0Sstevel@tonic-gatecheck_taint      86, $`;
221*0Sstevel@tonic-gatecheck_taint      87, $';
222*0Sstevel@tonic-gatecheck_taint      88, $+;
223*0Sstevel@tonic-gatecheck_taint      89, $1;
224*0Sstevel@tonic-gatecheck_taint_not  90, $2;
225*0Sstevel@tonic-gate
226*0Sstevel@tonic-gate$_ = $a;	# untaint $_
227*0Sstevel@tonic-gate
228*0Sstevel@tonic-gates/(\w)/\U$1/;	# this must taint
229*0Sstevel@tonic-gatecheck_taint      91, $_;
230*0Sstevel@tonic-gatecheck_taint      92, $&;
231*0Sstevel@tonic-gatecheck_taint      93, $`;
232*0Sstevel@tonic-gatecheck_taint      94, $';
233*0Sstevel@tonic-gatecheck_taint      95, $+;
234*0Sstevel@tonic-gatecheck_taint      96, $1;
235*0Sstevel@tonic-gatecheck_taint_not  97, $2;
236*0Sstevel@tonic-gate
237*0Sstevel@tonic-gate# After all this tainting $a should be cool.
238*0Sstevel@tonic-gate
239*0Sstevel@tonic-gatecheck_taint_not  98, $a;
240*0Sstevel@tonic-gate
241*0Sstevel@tonic-gatesub last_without_setlocale { 98 }
242*0Sstevel@tonic-gate
243*0Sstevel@tonic-gate# I think we've seen quite enough of taint.
244*0Sstevel@tonic-gate# Let us do some *real* locale work now,
245*0Sstevel@tonic-gate# unless setlocale() is missing (i.e. minitest).
246*0Sstevel@tonic-gate
247*0Sstevel@tonic-gateexit unless $have_setlocale;
248*0Sstevel@tonic-gate
249*0Sstevel@tonic-gate# Find locales.
250*0Sstevel@tonic-gate
251*0Sstevel@tonic-gatedebug "# Scanning for locales...\n";
252*0Sstevel@tonic-gate
253*0Sstevel@tonic-gate# Note that it's okay that some languages have their native names
254*0Sstevel@tonic-gate# capitalized here even though that's not "right".  They are lowercased
255*0Sstevel@tonic-gate# anyway later during the scanning process (and besides, some clueless
256*0Sstevel@tonic-gate# vendor might have them capitalized errorneously anyway).
257*0Sstevel@tonic-gate
258*0Sstevel@tonic-gatemy $locales = <<EOF;
259*0Sstevel@tonic-gateAfrikaans:af:za:1 15
260*0Sstevel@tonic-gateArabic:ar:dz eg sa:6 arabic8
261*0Sstevel@tonic-gateBrezhoneg Breton:br:fr:1 15
262*0Sstevel@tonic-gateBulgarski Bulgarian:bg:bg:5
263*0Sstevel@tonic-gateChinese:zh:cn tw:cn.EUC eucCN eucTW euc.CN euc.TW Big5 GB2312 tw.EUC
264*0Sstevel@tonic-gateHrvatski Croatian:hr:hr:2
265*0Sstevel@tonic-gateCymraeg Welsh:cy:cy:1 14 15
266*0Sstevel@tonic-gateCzech:cs:cz:2
267*0Sstevel@tonic-gateDansk Danish:dk:da:1 15
268*0Sstevel@tonic-gateNederlands Dutch:nl:be nl:1 15
269*0Sstevel@tonic-gateEnglish American British:en:au ca gb ie nz us uk zw:1 15 cp850
270*0Sstevel@tonic-gateEsperanto:eo:eo:3
271*0Sstevel@tonic-gateEesti Estonian:et:ee:4 6 13
272*0Sstevel@tonic-gateSuomi Finnish:fi:fi:1 15
273*0Sstevel@tonic-gateFlamish::fl:1 15
274*0Sstevel@tonic-gateDeutsch German:de:at be ch de lu:1 15
275*0Sstevel@tonic-gateEuskaraz Basque:eu:es fr:1 15
276*0Sstevel@tonic-gateGalego Galician:gl:es:1 15
277*0Sstevel@tonic-gateEllada Greek:el:gr:7 g8
278*0Sstevel@tonic-gateFrysk:fy:nl:1 15
279*0Sstevel@tonic-gateGreenlandic:kl:gl:4 6
280*0Sstevel@tonic-gateHebrew:iw:il:8 hebrew8
281*0Sstevel@tonic-gateHungarian:hu:hu:2
282*0Sstevel@tonic-gateIndonesian:in:id:1 15
283*0Sstevel@tonic-gateGaeilge Irish:ga:IE:1 14 15
284*0Sstevel@tonic-gateItaliano Italian:it:ch it:1 15
285*0Sstevel@tonic-gateNihongo Japanese:ja:jp:euc eucJP jp.EUC sjis
286*0Sstevel@tonic-gateKorean:ko:kr:
287*0Sstevel@tonic-gateLatine Latin:la:va:1 15
288*0Sstevel@tonic-gateLatvian:lv:lv:4 6 13
289*0Sstevel@tonic-gateLithuanian:lt:lt:4 6 13
290*0Sstevel@tonic-gateMacedonian:mk:mk:1 15
291*0Sstevel@tonic-gateMaltese:mt:mt:3
292*0Sstevel@tonic-gateMoldovan:mo:mo:2
293*0Sstevel@tonic-gateNorsk Norwegian:no no\@nynorsk:no:1 15
294*0Sstevel@tonic-gateOccitan:oc:es:1 15
295*0Sstevel@tonic-gatePolski Polish:pl:pl:2
296*0Sstevel@tonic-gateRumanian:ro:ro:2
297*0Sstevel@tonic-gateRusski Russian:ru:ru su ua:5 koi8 koi8r KOI8-R koi8u cp1251 cp866
298*0Sstevel@tonic-gateSerbski Serbian:sr:yu:5
299*0Sstevel@tonic-gateSlovak:sk:sk:2
300*0Sstevel@tonic-gateSlovene Slovenian:sl:si:2
301*0Sstevel@tonic-gateSqhip Albanian:sq:sq:1 15
302*0Sstevel@tonic-gateSvenska Swedish:sv:fi se:1 15
303*0Sstevel@tonic-gateThai:th:th:11 tis620
304*0Sstevel@tonic-gateTurkish:tr:tr:9 turkish8
305*0Sstevel@tonic-gateYiddish:yi::1 15
306*0Sstevel@tonic-gateEOF
307*0Sstevel@tonic-gate
308*0Sstevel@tonic-gateif ($^O eq 'os390') {
309*0Sstevel@tonic-gate    # These cause heartburn.  Broken locales?
310*0Sstevel@tonic-gate    $locales =~ s/Svenska Swedish:sv:fi se:1 15\n//;
311*0Sstevel@tonic-gate    $locales =~ s/Thai:th:th:11 tis620\n//;
312*0Sstevel@tonic-gate}
313*0Sstevel@tonic-gate
314*0Sstevel@tonic-gatesub in_utf8 () { $^H & 0x08 || (${^OPEN} || "") =~ /:utf8/ }
315*0Sstevel@tonic-gate
316*0Sstevel@tonic-gateif (in_utf8) {
317*0Sstevel@tonic-gate    require "lib/locale/utf8";
318*0Sstevel@tonic-gate} else {
319*0Sstevel@tonic-gate    require "lib/locale/latin1";
320*0Sstevel@tonic-gate}
321*0Sstevel@tonic-gate
322*0Sstevel@tonic-gatemy @Locale;
323*0Sstevel@tonic-gatemy $Locale;
324*0Sstevel@tonic-gatemy @Alnum_;
325*0Sstevel@tonic-gate
326*0Sstevel@tonic-gatemy @utf8locale;
327*0Sstevel@tonic-gatemy %utf8skip;
328*0Sstevel@tonic-gate
329*0Sstevel@tonic-gatesub getalnum_ {
330*0Sstevel@tonic-gate    sort grep /\w/, map { chr } 0..255
331*0Sstevel@tonic-gate}
332*0Sstevel@tonic-gate
333*0Sstevel@tonic-gatesub trylocale {
334*0Sstevel@tonic-gate    my $locale = shift;
335*0Sstevel@tonic-gate    if (setlocale(LC_ALL, $locale)) {
336*0Sstevel@tonic-gate	push @Locale, $locale;
337*0Sstevel@tonic-gate    }
338*0Sstevel@tonic-gate}
339*0Sstevel@tonic-gate
340*0Sstevel@tonic-gatesub decode_encodings {
341*0Sstevel@tonic-gate    my @enc;
342*0Sstevel@tonic-gate
343*0Sstevel@tonic-gate    foreach (split(/ /, shift)) {
344*0Sstevel@tonic-gate	if (/^(\d+)$/) {
345*0Sstevel@tonic-gate	    push @enc, "ISO8859-$1";
346*0Sstevel@tonic-gate	    push @enc, "iso8859$1";	# HP
347*0Sstevel@tonic-gate	    if ($1 eq '1') {
348*0Sstevel@tonic-gate		 push @enc, "roman8";	# HP
349*0Sstevel@tonic-gate	    }
350*0Sstevel@tonic-gate	} else {
351*0Sstevel@tonic-gate	    push @enc, $_;
352*0Sstevel@tonic-gate   	    push @enc, "$_.UTF-8";
353*0Sstevel@tonic-gate	}
354*0Sstevel@tonic-gate    }
355*0Sstevel@tonic-gate    if ($^O eq 'os390') {
356*0Sstevel@tonic-gate	push @enc, qw(IBM-037 IBM-819 IBM-1047);
357*0Sstevel@tonic-gate    }
358*0Sstevel@tonic-gate
359*0Sstevel@tonic-gate    return @enc;
360*0Sstevel@tonic-gate}
361*0Sstevel@tonic-gate
362*0Sstevel@tonic-gatetrylocale("C");
363*0Sstevel@tonic-gatetrylocale("POSIX");
364*0Sstevel@tonic-gateforeach (0..15) {
365*0Sstevel@tonic-gate    trylocale("ISO8859-$_");
366*0Sstevel@tonic-gate    trylocale("iso8859$_");
367*0Sstevel@tonic-gate    trylocale("iso8859-$_");
368*0Sstevel@tonic-gate    trylocale("iso_8859_$_");
369*0Sstevel@tonic-gate    trylocale("isolatin$_");
370*0Sstevel@tonic-gate    trylocale("isolatin-$_");
371*0Sstevel@tonic-gate    trylocale("iso_latin_$_");
372*0Sstevel@tonic-gate}
373*0Sstevel@tonic-gate
374*0Sstevel@tonic-gate# Sanitize the environment so that we can run the external 'locale'
375*0Sstevel@tonic-gate# program without the taint mode getting grumpy.
376*0Sstevel@tonic-gate
377*0Sstevel@tonic-gate# $ENV{PATH} is special in VMS.
378*0Sstevel@tonic-gatedelete $ENV{PATH} if $^O ne 'VMS' or $Config{d_setenv};
379*0Sstevel@tonic-gate
380*0Sstevel@tonic-gate# Other subversive stuff.
381*0Sstevel@tonic-gatedelete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
382*0Sstevel@tonic-gate
383*0Sstevel@tonic-gateif (-x "/usr/bin/locale" && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) {
384*0Sstevel@tonic-gate    while (<LOCALES>) {
385*0Sstevel@tonic-gate        chomp;
386*0Sstevel@tonic-gate	trylocale($_);
387*0Sstevel@tonic-gate    }
388*0Sstevel@tonic-gate    close(LOCALES);
389*0Sstevel@tonic-gate} elsif ($^O eq 'VMS' && defined($ENV{'SYS$I18N_LOCALE'}) && -d 'SYS$I18N_LOCALE') {
390*0Sstevel@tonic-gate# The SYS$I18N_LOCALE logical name search list was not present on
391*0Sstevel@tonic-gate# VAX VMS V5.5-12, but was on AXP && VAX VMS V6.2 as well as later versions.
392*0Sstevel@tonic-gate    opendir(LOCALES, "SYS\$I18N_LOCALE:");
393*0Sstevel@tonic-gate    while ($_ = readdir(LOCALES)) {
394*0Sstevel@tonic-gate        chomp;
395*0Sstevel@tonic-gate        trylocale($_);
396*0Sstevel@tonic-gate    }
397*0Sstevel@tonic-gate    close(LOCALES);
398*0Sstevel@tonic-gate} else {
399*0Sstevel@tonic-gate
400*0Sstevel@tonic-gate    # This is going to be slow.
401*0Sstevel@tonic-gate
402*0Sstevel@tonic-gate    foreach my $locale (split(/\n/, $locales)) {
403*0Sstevel@tonic-gate	my ($locale_name, $language_codes, $country_codes, $encodings) =
404*0Sstevel@tonic-gate	    split(/:/, $locale);
405*0Sstevel@tonic-gate	my @enc = decode_encodings($encodings);
406*0Sstevel@tonic-gate	foreach my $loc (split(/ /, $locale_name)) {
407*0Sstevel@tonic-gate	    trylocale($loc);
408*0Sstevel@tonic-gate	    foreach my $enc (@enc) {
409*0Sstevel@tonic-gate		trylocale("$loc.$enc");
410*0Sstevel@tonic-gate	    }
411*0Sstevel@tonic-gate	    $loc = lc $loc;
412*0Sstevel@tonic-gate	    foreach my $enc (@enc) {
413*0Sstevel@tonic-gate		trylocale("$loc.$enc");
414*0Sstevel@tonic-gate	    }
415*0Sstevel@tonic-gate	}
416*0Sstevel@tonic-gate	foreach my $lang (split(/ /, $language_codes)) {
417*0Sstevel@tonic-gate	    trylocale($lang);
418*0Sstevel@tonic-gate	    foreach my $country (split(/ /, $country_codes)) {
419*0Sstevel@tonic-gate		my $lc = "${lang}_${country}";
420*0Sstevel@tonic-gate		trylocale($lc);
421*0Sstevel@tonic-gate		foreach my $enc (@enc) {
422*0Sstevel@tonic-gate		    trylocale("$lc.$enc");
423*0Sstevel@tonic-gate		}
424*0Sstevel@tonic-gate		my $lC = "${lang}_\U${country}";
425*0Sstevel@tonic-gate		trylocale($lC);
426*0Sstevel@tonic-gate		foreach my $enc (@enc) {
427*0Sstevel@tonic-gate		    trylocale("$lC.$enc");
428*0Sstevel@tonic-gate		}
429*0Sstevel@tonic-gate	    }
430*0Sstevel@tonic-gate	}
431*0Sstevel@tonic-gate    }
432*0Sstevel@tonic-gate}
433*0Sstevel@tonic-gate
434*0Sstevel@tonic-gatesetlocale(LC_ALL, "C");
435*0Sstevel@tonic-gate
436*0Sstevel@tonic-gate@Locale = sort @Locale;
437*0Sstevel@tonic-gate
438*0Sstevel@tonic-gatedebug "# Locales =\n";
439*0Sstevel@tonic-gatefor ( @Locale ) {
440*0Sstevel@tonic-gate    debug "# $_\n";
441*0Sstevel@tonic-gate}
442*0Sstevel@tonic-gate
443*0Sstevel@tonic-gatemy %Problem;
444*0Sstevel@tonic-gatemy %Okay;
445*0Sstevel@tonic-gatemy %Testing;
446*0Sstevel@tonic-gatemy @Neoalpha;
447*0Sstevel@tonic-gatemy %Neoalpha;
448*0Sstevel@tonic-gate
449*0Sstevel@tonic-gatesub tryneoalpha {
450*0Sstevel@tonic-gate    my ($Locale, $i, $test) = @_;
451*0Sstevel@tonic-gate    unless ($test) {
452*0Sstevel@tonic-gate	$Problem{$i}{$Locale} = 1;
453*0Sstevel@tonic-gate	debug "# failed $i with locale '$Locale'\n";
454*0Sstevel@tonic-gate    } else {
455*0Sstevel@tonic-gate	push @{$Okay{$i}}, $Locale;
456*0Sstevel@tonic-gate    }
457*0Sstevel@tonic-gate}
458*0Sstevel@tonic-gate
459*0Sstevel@tonic-gateforeach $Locale (@Locale) {
460*0Sstevel@tonic-gate    debug "# Locale = $Locale\n";
461*0Sstevel@tonic-gate    @Alnum_ = getalnum_();
462*0Sstevel@tonic-gate    debug "# w = ", join("",@Alnum_), "\n";
463*0Sstevel@tonic-gate
464*0Sstevel@tonic-gate    unless (setlocale(LC_ALL, $Locale)) {
465*0Sstevel@tonic-gate	foreach (99..103) {
466*0Sstevel@tonic-gate	    $Problem{$_}{$Locale} = -1;
467*0Sstevel@tonic-gate	}
468*0Sstevel@tonic-gate	next;
469*0Sstevel@tonic-gate    }
470*0Sstevel@tonic-gate
471*0Sstevel@tonic-gate    # Sieve the uppercase and the lowercase.
472*0Sstevel@tonic-gate
473*0Sstevel@tonic-gate    my %UPPER = ();
474*0Sstevel@tonic-gate    my %lower = ();
475*0Sstevel@tonic-gate    my %BoThCaSe = ();
476*0Sstevel@tonic-gate    for (@Alnum_) {
477*0Sstevel@tonic-gate	if (/[^\d_]/) { # skip digits and the _
478*0Sstevel@tonic-gate	    if (uc($_) eq $_) {
479*0Sstevel@tonic-gate		$UPPER{$_} = $_;
480*0Sstevel@tonic-gate	    }
481*0Sstevel@tonic-gate	    if (lc($_) eq $_) {
482*0Sstevel@tonic-gate		$lower{$_} = $_;
483*0Sstevel@tonic-gate	    }
484*0Sstevel@tonic-gate	}
485*0Sstevel@tonic-gate    }
486*0Sstevel@tonic-gate    foreach (keys %UPPER) {
487*0Sstevel@tonic-gate	$BoThCaSe{$_}++ if exists $lower{$_};
488*0Sstevel@tonic-gate    }
489*0Sstevel@tonic-gate    foreach (keys %lower) {
490*0Sstevel@tonic-gate	$BoThCaSe{$_}++ if exists $UPPER{$_};
491*0Sstevel@tonic-gate    }
492*0Sstevel@tonic-gate    foreach (keys %BoThCaSe) {
493*0Sstevel@tonic-gate	delete $UPPER{$_};
494*0Sstevel@tonic-gate	delete $lower{$_};
495*0Sstevel@tonic-gate    }
496*0Sstevel@tonic-gate
497*0Sstevel@tonic-gate    debug "# UPPER    = ", join("", sort keys %UPPER   ), "\n";
498*0Sstevel@tonic-gate    debug "# lower    = ", join("", sort keys %lower   ), "\n";
499*0Sstevel@tonic-gate    debug "# BoThCaSe = ", join("", sort keys %BoThCaSe), "\n";
500*0Sstevel@tonic-gate
501*0Sstevel@tonic-gate    # Find the alphabets that are not alphabets in the default locale.
502*0Sstevel@tonic-gate
503*0Sstevel@tonic-gate    {
504*0Sstevel@tonic-gate	no locale;
505*0Sstevel@tonic-gate
506*0Sstevel@tonic-gate	@Neoalpha = ();
507*0Sstevel@tonic-gate	for (keys %UPPER, keys %lower) {
508*0Sstevel@tonic-gate	    push(@Neoalpha, $_) if (/\W/);
509*0Sstevel@tonic-gate	    $Neoalpha{$_} = $_;
510*0Sstevel@tonic-gate	}
511*0Sstevel@tonic-gate    }
512*0Sstevel@tonic-gate
513*0Sstevel@tonic-gate    @Neoalpha = sort @Neoalpha;
514*0Sstevel@tonic-gate
515*0Sstevel@tonic-gate    debug "# Neoalpha = ", join("",@Neoalpha), "\n";
516*0Sstevel@tonic-gate
517*0Sstevel@tonic-gate    if (@Neoalpha == 0) {
518*0Sstevel@tonic-gate	# If we have no Neoalphas the remaining tests are no-ops.
519*0Sstevel@tonic-gate	debug "# no Neoalpha, skipping tests 99..102 for locale '$Locale'\n";
520*0Sstevel@tonic-gate	foreach (99..102) {
521*0Sstevel@tonic-gate	    push @{$Okay{$_}}, $Locale;
522*0Sstevel@tonic-gate	}
523*0Sstevel@tonic-gate    } else {
524*0Sstevel@tonic-gate
525*0Sstevel@tonic-gate	# Test \w.
526*0Sstevel@tonic-gate
527*0Sstevel@tonic-gate	my $word = join('', @Neoalpha);
528*0Sstevel@tonic-gate
529*0Sstevel@tonic-gate	my $badutf8;
530*0Sstevel@tonic-gate	{
531*0Sstevel@tonic-gate	    local $SIG{__WARN__} = sub {
532*0Sstevel@tonic-gate		$badutf8 = $_[0] =~ /Malformed UTF-8/;
533*0Sstevel@tonic-gate	    };
534*0Sstevel@tonic-gate	    $Locale =~ /utf-?8/i;
535*0Sstevel@tonic-gate	}
536*0Sstevel@tonic-gate
537*0Sstevel@tonic-gate	if ($badutf8) {
538*0Sstevel@tonic-gate	    debug "# Locale name contains bad UTF-8, skipping test 99 for locale '$Locale'\n";
539*0Sstevel@tonic-gate	} elsif ($Locale =~ /utf-?8/i) {
540*0Sstevel@tonic-gate	    debug "# unknown whether locale and Unicode have the same \\w, skipping test 99 for locale '$Locale'\n";
541*0Sstevel@tonic-gate	    push @{$Okay{99}}, $Locale;
542*0Sstevel@tonic-gate	} else {
543*0Sstevel@tonic-gate	    if ($word =~ /^(\w+)$/) {
544*0Sstevel@tonic-gate		tryneoalpha($Locale, 99, 1);
545*0Sstevel@tonic-gate	    } else {
546*0Sstevel@tonic-gate		tryneoalpha($Locale, 99, 0);
547*0Sstevel@tonic-gate	    }
548*0Sstevel@tonic-gate	}
549*0Sstevel@tonic-gate
550*0Sstevel@tonic-gate	# Cross-check the whole 8-bit character set.
551*0Sstevel@tonic-gate
552*0Sstevel@tonic-gate	for (map { chr } 0..255) {
553*0Sstevel@tonic-gate	    tryneoalpha($Locale, 100,
554*0Sstevel@tonic-gate			(/\w/ xor /\W/) ||
555*0Sstevel@tonic-gate			(/\d/ xor /\D/) ||
556*0Sstevel@tonic-gate			(/\s/ xor /\S/));
557*0Sstevel@tonic-gate	}
558*0Sstevel@tonic-gate
559*0Sstevel@tonic-gate	# Test for read-only scalars' locale vs non-locale comparisons.
560*0Sstevel@tonic-gate
561*0Sstevel@tonic-gate	{
562*0Sstevel@tonic-gate	    no locale;
563*0Sstevel@tonic-gate	    $a = "qwerty";
564*0Sstevel@tonic-gate	    {
565*0Sstevel@tonic-gate		use locale;
566*0Sstevel@tonic-gate		tryneoalpha($Locale, 101, ($a cmp "qwerty") == 0);
567*0Sstevel@tonic-gate	    }
568*0Sstevel@tonic-gate	}
569*0Sstevel@tonic-gate
570*0Sstevel@tonic-gate	{
571*0Sstevel@tonic-gate	    my ($from, $to, $lesser, $greater,
572*0Sstevel@tonic-gate		@test, %test, $test, $yes, $no, $sign);
573*0Sstevel@tonic-gate
574*0Sstevel@tonic-gate	    for (0..9) {
575*0Sstevel@tonic-gate		# Select a slice.
576*0Sstevel@tonic-gate		$from = int(($_*@Alnum_)/10);
577*0Sstevel@tonic-gate		$to = $from + int(@Alnum_/10);
578*0Sstevel@tonic-gate		$to = $#Alnum_ if ($to > $#Alnum_);
579*0Sstevel@tonic-gate		$lesser  = join('', @Alnum_[$from..$to]);
580*0Sstevel@tonic-gate		# Select a slice one character on.
581*0Sstevel@tonic-gate		$from++; $to++;
582*0Sstevel@tonic-gate		$to = $#Alnum_ if ($to > $#Alnum_);
583*0Sstevel@tonic-gate		$greater = join('', @Alnum_[$from..$to]);
584*0Sstevel@tonic-gate		($yes, $no, $sign) = ($lesser lt $greater
585*0Sstevel@tonic-gate				      ? ("    ", "not ", 1)
586*0Sstevel@tonic-gate				      : ("not ", "    ", -1));
587*0Sstevel@tonic-gate		# all these tests should FAIL (return 0).
588*0Sstevel@tonic-gate		# Exact lt or gt cannot be tested because
589*0Sstevel@tonic-gate		# in some locales, say, eacute and E may test equal.
590*0Sstevel@tonic-gate		@test =
591*0Sstevel@tonic-gate		    (
592*0Sstevel@tonic-gate		     $no.'    ($lesser  le $greater)',  # 1
593*0Sstevel@tonic-gate		     'not      ($lesser  ne $greater)', # 2
594*0Sstevel@tonic-gate		     '         ($lesser  eq $greater)', # 3
595*0Sstevel@tonic-gate		     $yes.'    ($lesser  ge $greater)', # 4
596*0Sstevel@tonic-gate		     $yes.'    ($lesser  ge $greater)', # 5
597*0Sstevel@tonic-gate		     $yes.'    ($greater le $lesser )', # 7
598*0Sstevel@tonic-gate		     'not      ($greater ne $lesser )', # 8
599*0Sstevel@tonic-gate		     '         ($greater eq $lesser )', # 9
600*0Sstevel@tonic-gate		     $no.'     ($greater ge $lesser )', # 10
601*0Sstevel@tonic-gate		     'not (($lesser cmp $greater) == -($sign))' # 11
602*0Sstevel@tonic-gate		     );
603*0Sstevel@tonic-gate		@test{@test} = 0 x @test;
604*0Sstevel@tonic-gate		$test = 0;
605*0Sstevel@tonic-gate		for my $ti (@test) {
606*0Sstevel@tonic-gate		    $test{$ti} = eval $ti;
607*0Sstevel@tonic-gate		    $test ||= $test{$ti}
608*0Sstevel@tonic-gate		}
609*0Sstevel@tonic-gate		tryneoalpha($Locale, 102, $test == 0);
610*0Sstevel@tonic-gate		if ($test) {
611*0Sstevel@tonic-gate		    debug "# lesser  = '$lesser'\n";
612*0Sstevel@tonic-gate		    debug "# greater = '$greater'\n";
613*0Sstevel@tonic-gate		    debug "# lesser cmp greater = ",
614*0Sstevel@tonic-gate		          $lesser cmp $greater, "\n";
615*0Sstevel@tonic-gate		    debug "# greater cmp lesser = ",
616*0Sstevel@tonic-gate		          $greater cmp $lesser, "\n";
617*0Sstevel@tonic-gate		    debug "# (greater) from = $from, to = $to\n";
618*0Sstevel@tonic-gate		    for my $ti (@test) {
619*0Sstevel@tonic-gate			debugf("# %-40s %-4s", $ti,
620*0Sstevel@tonic-gate			       $test{$ti} ? 'FAIL' : 'ok');
621*0Sstevel@tonic-gate			if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
622*0Sstevel@tonic-gate			    debugf("(%s == %4d)", $1, eval $1);
623*0Sstevel@tonic-gate			}
624*0Sstevel@tonic-gate			debug "\n#";
625*0Sstevel@tonic-gate		    }
626*0Sstevel@tonic-gate
627*0Sstevel@tonic-gate		    last;
628*0Sstevel@tonic-gate		}
629*0Sstevel@tonic-gate	    }
630*0Sstevel@tonic-gate	}
631*0Sstevel@tonic-gate    }
632*0Sstevel@tonic-gate
633*0Sstevel@tonic-gate    use locale;
634*0Sstevel@tonic-gate
635*0Sstevel@tonic-gate    my ($x, $y) = (1.23, 1.23);
636*0Sstevel@tonic-gate
637*0Sstevel@tonic-gate    $a = "$x";
638*0Sstevel@tonic-gate    printf ''; # printf used to reset locale to "C"
639*0Sstevel@tonic-gate    $b = "$y";
640*0Sstevel@tonic-gate
641*0Sstevel@tonic-gate    debug "# 103..107: a = $a, b = $b, Locale = $Locale\n";
642*0Sstevel@tonic-gate
643*0Sstevel@tonic-gate    tryneoalpha($Locale, 103, $a eq $b);
644*0Sstevel@tonic-gate
645*0Sstevel@tonic-gate    my $c = "$x";
646*0Sstevel@tonic-gate    my $z = sprintf ''; # sprintf used to reset locale to "C"
647*0Sstevel@tonic-gate    my $d = "$y";
648*0Sstevel@tonic-gate
649*0Sstevel@tonic-gate    debug "# 104..107: c = $c, d = $d, Locale = $Locale\n";
650*0Sstevel@tonic-gate
651*0Sstevel@tonic-gate    tryneoalpha($Locale, 104, $c eq $d);
652*0Sstevel@tonic-gate
653*0Sstevel@tonic-gate    {
654*0Sstevel@tonic-gate	use warnings;
655*0Sstevel@tonic-gate	my $w = 0;
656*0Sstevel@tonic-gate	local $SIG{__WARN__} =
657*0Sstevel@tonic-gate	    sub {
658*0Sstevel@tonic-gate		print "# @_\n";
659*0Sstevel@tonic-gate		$w++;
660*0Sstevel@tonic-gate	    };
661*0Sstevel@tonic-gate
662*0Sstevel@tonic-gate	# The == (among other ops) used to warn for locales
663*0Sstevel@tonic-gate	# that had something else than "." as the radix character.
664*0Sstevel@tonic-gate
665*0Sstevel@tonic-gate	tryneoalpha($Locale, 105, $c == 1.23);
666*0Sstevel@tonic-gate
667*0Sstevel@tonic-gate	tryneoalpha($Locale, 106, $c == $x);
668*0Sstevel@tonic-gate
669*0Sstevel@tonic-gate	tryneoalpha($Locale, 107, $c == $d);
670*0Sstevel@tonic-gate
671*0Sstevel@tonic-gate	{
672*0Sstevel@tonic-gate#	    no locale; # XXX did this ever work correctly?
673*0Sstevel@tonic-gate
674*0Sstevel@tonic-gate	    my $e = "$x";
675*0Sstevel@tonic-gate
676*0Sstevel@tonic-gate	    debug "# 108..110: e = $e, Locale = $Locale\n";
677*0Sstevel@tonic-gate
678*0Sstevel@tonic-gate	    tryneoalpha($Locale, 108, $e == 1.23);
679*0Sstevel@tonic-gate
680*0Sstevel@tonic-gate	    tryneoalpha($Locale, 109, $e == $x);
681*0Sstevel@tonic-gate
682*0Sstevel@tonic-gate	    tryneoalpha($Locale, 110, $e == $c);
683*0Sstevel@tonic-gate	}
684*0Sstevel@tonic-gate
685*0Sstevel@tonic-gate	my $f = "1.23";
686*0Sstevel@tonic-gate	my $g = 2.34;
687*0Sstevel@tonic-gate
688*0Sstevel@tonic-gate	debug "# 111..115: f = $f, g = $g, locale = $Locale\n";
689*0Sstevel@tonic-gate
690*0Sstevel@tonic-gate	tryneoalpha($Locale, 111, $f == 1.23);
691*0Sstevel@tonic-gate
692*0Sstevel@tonic-gate	tryneoalpha($Locale, 112, $f == $x);
693*0Sstevel@tonic-gate
694*0Sstevel@tonic-gate	tryneoalpha($Locale, 113, $f == $c);
695*0Sstevel@tonic-gate
696*0Sstevel@tonic-gate	tryneoalpha($Locale, 114, abs(($f + $g) - 3.57) < 0.01);
697*0Sstevel@tonic-gate
698*0Sstevel@tonic-gate	tryneoalpha($Locale, 115, $w == 0);
699*0Sstevel@tonic-gate    }
700*0Sstevel@tonic-gate
701*0Sstevel@tonic-gate    # Does taking lc separately differ from taking
702*0Sstevel@tonic-gate    # the lc "in-line"?  (This was the bug 19990704.002, change #3568.)
703*0Sstevel@tonic-gate    # The bug was in the caching of the 'o'-magic.
704*0Sstevel@tonic-gate    {
705*0Sstevel@tonic-gate	use locale;
706*0Sstevel@tonic-gate
707*0Sstevel@tonic-gate	sub lcA {
708*0Sstevel@tonic-gate	    my $lc0 = lc $_[0];
709*0Sstevel@tonic-gate	    my $lc1 = lc $_[1];
710*0Sstevel@tonic-gate	    return $lc0 cmp $lc1;
711*0Sstevel@tonic-gate	}
712*0Sstevel@tonic-gate
713*0Sstevel@tonic-gate        sub lcB {
714*0Sstevel@tonic-gate	    return lc($_[0]) cmp lc($_[1]);
715*0Sstevel@tonic-gate	}
716*0Sstevel@tonic-gate
717*0Sstevel@tonic-gate        my $x = "ab";
718*0Sstevel@tonic-gate        my $y = "aa";
719*0Sstevel@tonic-gate        my $z = "AB";
720*0Sstevel@tonic-gate
721*0Sstevel@tonic-gate        tryneoalpha($Locale, 116,
722*0Sstevel@tonic-gate		    lcA($x, $y) == 1 && lcB($x, $y) == 1 ||
723*0Sstevel@tonic-gate		    lcA($x, $z) == 0 && lcB($x, $z) == 0);
724*0Sstevel@tonic-gate    }
725*0Sstevel@tonic-gate
726*0Sstevel@tonic-gate    # Does lc of an UPPER (if different from the UPPER) match
727*0Sstevel@tonic-gate    # case-insensitively the UPPER, and does the UPPER match
728*0Sstevel@tonic-gate    # case-insensitively the lc of the UPPER.  And vice versa.
729*0Sstevel@tonic-gate    {
730*0Sstevel@tonic-gate        use locale;
731*0Sstevel@tonic-gate        no utf8;
732*0Sstevel@tonic-gate        my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
733*0Sstevel@tonic-gate
734*0Sstevel@tonic-gate        my @f = ();
735*0Sstevel@tonic-gate        foreach my $x (keys %UPPER) {
736*0Sstevel@tonic-gate	    my $y = lc $x;
737*0Sstevel@tonic-gate	    next unless uc $y eq $x;
738*0Sstevel@tonic-gate	    print "# UPPER $x lc $y ",
739*0Sstevel@tonic-gate	    $x =~ /$y/i ? 1 : 0, " ",
740*0Sstevel@tonic-gate	    $y =~ /$x/i ? 1 : 0, "\n" if 0;
741*0Sstevel@tonic-gate	    #
742*0Sstevel@tonic-gate	    # If $x and $y contain regular expression characters
743*0Sstevel@tonic-gate	    # AND THEY lowercase (/i) to regular expression characters,
744*0Sstevel@tonic-gate	    # regcomp() will be mightily confused.  No, the \Q doesn't
745*0Sstevel@tonic-gate	    # help here (maybe regex engine internal lowercasing
746*0Sstevel@tonic-gate	    # is done after the \Q?)  An example of this happening is
747*0Sstevel@tonic-gate	    # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
748*0Sstevel@tonic-gate	    # the chr(173) (the "[") is the lowercase of the chr(235).
749*0Sstevel@tonic-gate	    #
750*0Sstevel@tonic-gate	    # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
751*0Sstevel@tonic-gate	    # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
752*0Sstevel@tonic-gate	    # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
753*0Sstevel@tonic-gate	    # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
754*0Sstevel@tonic-gate	    # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
755*0Sstevel@tonic-gate	    # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
756*0Sstevel@tonic-gate	    #
757*0Sstevel@tonic-gate	    # Similar things can happen even under (bastardised)
758*0Sstevel@tonic-gate	    # non-EBCDIC locales: in many European countries before the
759*0Sstevel@tonic-gate	    # advent of ISO 8859-x nationally customised versions of
760*0Sstevel@tonic-gate	    # ISO 646 were devised, reusing certain punctuation
761*0Sstevel@tonic-gate	    # characters for modified characters needed by the
762*0Sstevel@tonic-gate	    # country/language.  For example, the "|" might have
763*0Sstevel@tonic-gate	    # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
764*0Sstevel@tonic-gate	    #
765*0Sstevel@tonic-gate	    if ($x =~ $re || $y =~ $re) {
766*0Sstevel@tonic-gate		print "# Regex characters in '$x' or '$y', skipping test 117 for locale '$Locale'\n";
767*0Sstevel@tonic-gate		next;
768*0Sstevel@tonic-gate	    }
769*0Sstevel@tonic-gate	    # With utf8 both will fail since the locale concept
770*0Sstevel@tonic-gate	    # of upper/lower does not work well in Unicode.
771*0Sstevel@tonic-gate	    push @f, $x unless $x =~ /$y/i == $y =~ /$x/i;
772*0Sstevel@tonic-gate
773*0Sstevel@tonic-gate	    foreach my $x (keys %lower) {
774*0Sstevel@tonic-gate		my $y = uc $x;
775*0Sstevel@tonic-gate		next unless lc $y eq $x;
776*0Sstevel@tonic-gate		print "# lower $x uc $y ",
777*0Sstevel@tonic-gate		$x =~ /$y/i ? 1 : 0, " ",
778*0Sstevel@tonic-gate		$y =~ /$x/i ? 1 : 0, "\n" if 0;
779*0Sstevel@tonic-gate		if ($x =~ $re || $y =~ $re) { # See above.
780*0Sstevel@tonic-gate		    print "# Regex characters in '$x' or '$y', skipping test 117 for locale '$Locale'\n";
781*0Sstevel@tonic-gate		    next;
782*0Sstevel@tonic-gate		}
783*0Sstevel@tonic-gate		# With utf8 both will fail since the locale concept
784*0Sstevel@tonic-gate		# of upper/lower does not work well in Unicode.
785*0Sstevel@tonic-gate		push @f, $x unless $x =~ /$y/i == $y =~ /$x/i;
786*0Sstevel@tonic-gate	    }
787*0Sstevel@tonic-gate	    tryneoalpha($Locale, 117, @f == 0);
788*0Sstevel@tonic-gate	    if (@f) {
789*0Sstevel@tonic-gate		print "# failed 117 locale '$Locale' characters @f\n"
790*0Sstevel@tonic-gate  	    }
791*0Sstevel@tonic-gate        }
792*0Sstevel@tonic-gate    }
793*0Sstevel@tonic-gate}
794*0Sstevel@tonic-gate
795*0Sstevel@tonic-gate# Recount the errors.
796*0Sstevel@tonic-gate
797*0Sstevel@tonic-gateforeach (&last_without_setlocale()+1..$last) {
798*0Sstevel@tonic-gate    if ($Problem{$_} || !defined $Okay{$_} || !@{$Okay{$_}}) {
799*0Sstevel@tonic-gate	if ($_ == 102) {
800*0Sstevel@tonic-gate	    print "# The failure of test 102 is not necessarily fatal.\n";
801*0Sstevel@tonic-gate	    print "# It usually indicates a problem in the environment,\n";
802*0Sstevel@tonic-gate	    print "# not in Perl itself.\n";
803*0Sstevel@tonic-gate	}
804*0Sstevel@tonic-gate	print "not ";
805*0Sstevel@tonic-gate    }
806*0Sstevel@tonic-gate    print "ok $_\n";
807*0Sstevel@tonic-gate}
808*0Sstevel@tonic-gate
809*0Sstevel@tonic-gate# Give final advice.
810*0Sstevel@tonic-gate
811*0Sstevel@tonic-gatemy $didwarn = 0;
812*0Sstevel@tonic-gate
813*0Sstevel@tonic-gateforeach (99..$last) {
814*0Sstevel@tonic-gate    if ($Problem{$_}) {
815*0Sstevel@tonic-gate	my @f = sort keys %{ $Problem{$_} };
816*0Sstevel@tonic-gate	my $f = join(" ", @f);
817*0Sstevel@tonic-gate	$f =~ s/(.{50,60}) /$1\n#\t/g;
818*0Sstevel@tonic-gate	print
819*0Sstevel@tonic-gate	    "#\n",
820*0Sstevel@tonic-gate            "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n",
821*0Sstevel@tonic-gate	    "#\t", $f, "\n#\n",
822*0Sstevel@tonic-gate	    "# on your system may have errors because the locale test $_\n",
823*0Sstevel@tonic-gate            "# failed in ", (@f == 1 ? "that locale" : "those locales"),
824*0Sstevel@tonic-gate            ".\n";
825*0Sstevel@tonic-gate	print <<EOW;
826*0Sstevel@tonic-gate#
827*0Sstevel@tonic-gate# If your users are not using these locales you are safe for the moment,
828*0Sstevel@tonic-gate# but please report this failure first to perlbug\@perl.com using the
829*0Sstevel@tonic-gate# perlbug script (as described in the INSTALL file) so that the exact
830*0Sstevel@tonic-gate# details of the failures can be sorted out first and then your operating
831*0Sstevel@tonic-gate# system supplier can be alerted about these anomalies.
832*0Sstevel@tonic-gate#
833*0Sstevel@tonic-gateEOW
834*0Sstevel@tonic-gate	$didwarn = 1;
835*0Sstevel@tonic-gate    }
836*0Sstevel@tonic-gate}
837*0Sstevel@tonic-gate
838*0Sstevel@tonic-gate# Tell which locales were okay and which were not.
839*0Sstevel@tonic-gate
840*0Sstevel@tonic-gateif ($didwarn) {
841*0Sstevel@tonic-gate    my (@s, @F);
842*0Sstevel@tonic-gate
843*0Sstevel@tonic-gate    foreach my $l (@Locale) {
844*0Sstevel@tonic-gate	my $p = 0;
845*0Sstevel@tonic-gate	foreach my $t (102..$last) {
846*0Sstevel@tonic-gate	    $p++ if $Problem{$t}{$l};
847*0Sstevel@tonic-gate	}
848*0Sstevel@tonic-gate	push @s, $l if $p == 0;
849*0Sstevel@tonic-gate      push @F, $l unless $p == 0;
850*0Sstevel@tonic-gate    }
851*0Sstevel@tonic-gate
852*0Sstevel@tonic-gate    if (@s) {
853*0Sstevel@tonic-gate        my $s = join(" ", @s);
854*0Sstevel@tonic-gate        $s =~ s/(.{50,60}) /$1\n#\t/g;
855*0Sstevel@tonic-gate
856*0Sstevel@tonic-gate        warn
857*0Sstevel@tonic-gate    	    "# The following locales\n#\n",
858*0Sstevel@tonic-gate            "#\t", $s, "\n#\n",
859*0Sstevel@tonic-gate	    "# tested okay.\n#\n",
860*0Sstevel@tonic-gate    } else {
861*0Sstevel@tonic-gate        warn "# None of your locales were fully okay.\n";
862*0Sstevel@tonic-gate    }
863*0Sstevel@tonic-gate
864*0Sstevel@tonic-gate    if (@F) {
865*0Sstevel@tonic-gate        my $F = join(" ", @F);
866*0Sstevel@tonic-gate        $F =~ s/(.{50,60}) /$1\n#\t/g;
867*0Sstevel@tonic-gate
868*0Sstevel@tonic-gate        warn
869*0Sstevel@tonic-gate          "# The following locales\n#\n",
870*0Sstevel@tonic-gate          "#\t", $F, "\n#\n",
871*0Sstevel@tonic-gate          "# had problems.\n#\n",
872*0Sstevel@tonic-gate    } else {
873*0Sstevel@tonic-gate        warn "# None of your locales were broken.\n";
874*0Sstevel@tonic-gate    }
875*0Sstevel@tonic-gate
876*0Sstevel@tonic-gate    if (@utf8locale) {
877*0Sstevel@tonic-gate        my $S = join(" ", @utf8locale);
878*0Sstevel@tonic-gate        $S =~ s/(.{50,60}) /$1\n#\t/g;
879*0Sstevel@tonic-gate
880*0Sstevel@tonic-gate        warn "#\n# The following locales\n#\n",
881*0Sstevel@tonic-gate             "#\t", $S, "\n#\n",
882*0Sstevel@tonic-gate             "# were skipped for the tests ",
883*0Sstevel@tonic-gate             join(" ", sort {$a<=>$b} keys %utf8skip), "\n",
884*0Sstevel@tonic-gate            "# because UTF-8 and locales do not work together in Perl.\n#\n";
885*0Sstevel@tonic-gate    }
886*0Sstevel@tonic-gate}
887*0Sstevel@tonic-gate
888*0Sstevel@tonic-gatesub last { 117 }
889*0Sstevel@tonic-gate
890*0Sstevel@tonic-gate# eof
891