xref: /openbsd-src/gnu/usr.bin/perl/lib/locale.t (revision 3d61058aa5c692477b6d18acfbbdb653a9930ff9)
1#!./perl -wT
2
3use strict;
4use warnings;
5use Config;
6
7# This tests plain 'use locale' and adorned 'use locale ":not_characters"'
8# Because these pragmas are compile time, and I (khw) am trying to test
9# without using 'eval' as much as possible, which might cloud the issue,  the
10# crucial parts of the code are duplicated in a block for each pragma.
11
12# Unfortunately, many systems have defective locale definitions.  This test
13# file looks for both perl bugs and bugs in the system's locale definitions.
14# It can be difficult to tease apart which is which.  For the latter, there
15# are tests that are based on the POSIX standard.  A character isn't supposed
16# to be both a space and graphic, for example.  Another example is if a
17# character is the uppercase of another, that other should be the lowercase of
18# the first.  Including tests for these allows you to test for defective
19# locales, as described in perllocale.  The way this file distinguishes
20# between defective locales, and perl bugs is to see what percentage of
21# locales fail a given test.  If it's a lot, then it's more likely to be a
22# perl bug; only a few, those particular locales are likely defective.  In
23# that case the failing tests are marked TODO.  (They should be reported to
24# the vendor, however; but it's not perl's problem.)  In some cases, this
25# script has caused tickets to be filed against perl which turn out to be the
26# platform's bug, but a higher percentage of locales are failing than the
27# built-in cut-off point.  For those platforms, code has been added to
28# increase the cut-off, so those platforms don't trigger failing test reports.
29# Ideally, the platforms would get fixed and that code would be changed to
30# only kick-in when run on versions that are earlier than the fixed one.  But,
31# this rarely happens in practice.
32
33# To make a TODO test, add the string 'TODO' to its %test_names value
34
35my $is_ebcdic = ord("A") == 193;
36my $os = lc $^O;
37
38# Configure now lets you build a perl that silently ignores taint features
39my $NoTaintSupport = exists($Config{taint_support}) && !$Config{taint_support};
40
41no warnings 'locale';  # We test even weird locales; and do some scary things
42                       # in ok locales
43
44binmode STDOUT, ':utf8';
45binmode STDERR, ':utf8';
46
47BEGIN {
48    chdir 't' if -d 't';
49    @INC = '../lib';
50    unshift @INC, '.';
51    require './loc_tools.pl';
52    unless (locales_enabled('LC_CTYPE')) {
53	print "1..0\n";
54	exit;
55    }
56    $| = 1;
57    require Config; import Config;
58}
59
60use feature 'fc';
61my @langinfo;
62BEGIN {
63    @langinfo = qw(
64                    CODESET
65                    RADIXCHAR
66                    THOUSEP
67                    CRNCYSTR
68                    ALT_DIGITS
69                    YESEXPR
70                    YESSTR
71                    NOEXPR
72                    NOSTR
73                    ERA
74                    ABDAY_1
75                    DAY_1
76                    ABMON_1
77                    MON_1
78                    AM_STR
79                    PM_STR
80                    D_FMT
81                    D_T_FMT
82                    ERA_D_FMT
83                    ERA_D_T_FMT
84                    ERA_T_FMT
85                    T_FMT
86                    T_FMT_AMPM
87                    _NL_ADDRESS_POSTAL_FMT
88                    _NL_ADDRESS_COUNTRY_NAME
89                    _NL_ADDRESS_COUNTRY_POST
90                    _NL_ADDRESS_COUNTRY_AB2
91                    _NL_ADDRESS_COUNTRY_AB3
92                    _NL_ADDRESS_COUNTRY_CAR
93                    _NL_ADDRESS_COUNTRY_NUM
94                    _NL_ADDRESS_COUNTRY_ISBN
95                    _NL_ADDRESS_LANG_NAME
96                    _NL_ADDRESS_LANG_AB
97                    _NL_ADDRESS_LANG_TERM
98                    _NL_ADDRESS_LANG_LIB
99                    _NL_IDENTIFICATION_TITLE
100                    _NL_IDENTIFICATION_SOURCE
101                    _NL_IDENTIFICATION_ADDRESS
102                    _NL_IDENTIFICATION_CONTACT
103                    _NL_IDENTIFICATION_EMAIL
104                    _NL_IDENTIFICATION_TEL
105                    _NL_IDENTIFICATION_FAX
106                    _NL_IDENTIFICATION_LANGUAGE
107                    _NL_IDENTIFICATION_TERRITORY
108                    _NL_IDENTIFICATION_AUDIENCE
109                    _NL_IDENTIFICATION_APPLICATION
110                    _NL_IDENTIFICATION_ABBREVIATION
111                    _NL_IDENTIFICATION_REVISION
112                    _NL_IDENTIFICATION_DATE
113                    _NL_IDENTIFICATION_CATEGORY
114                    _NL_MEASUREMENT_MEASUREMENT
115                    _NL_NAME_NAME_FMT
116                    _NL_NAME_NAME_GEN
117                    _NL_NAME_NAME_MR
118                    _NL_NAME_NAME_MRS
119                    _NL_NAME_NAME_MISS
120                    _NL_NAME_NAME_MS
121                    _NL_PAPER_HEIGHT
122                    _NL_PAPER_WIDTH
123                    _NL_TELEPHONE_TEL_INT_FMT
124                    _NL_TELEPHONE_TEL_DOM_FMT
125                    _NL_TELEPHONE_INT_SELECT
126                    _NL_TELEPHONE_INT_PREFIX
127                  );
128}
129
130use I18N::Langinfo 'langinfo', @langinfo;
131
132# =1 adds debugging output; =2 increases the verbosity somewhat
133our $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0;
134
135# Certain tests have been shown to be problematical for a few locales.  Don't
136# fail them unless at least this percentage of the tested locales fail.
137# EBCDIC os390 has more locales fail than normal, because it has locales that
138# move various critical characters like '['.
139my $acceptable_failure_percentage = ($os =~ / ^ ( os390 ) $ /x)
140                                    ? 10
141                                    : 5;
142
143# The list of test numbers of the problematic tests.
144my %problematical_tests;
145
146# If any %problematical_tests fails in one of these locales, it is
147# considered a TODO.
148my %known_bad_locales = (
149                          irix => qr/ ^ (?: cs | hu | sk ) $/x,
150                          darwin => qr/ ^ lt_LT.ISO8859 /ix,
151                          os390 => qr/ ^ italian /ix,
152                          netbsd => qr/\bISO8859-2\b/i,
153
154                          # This may be the same bug as the cygwin below; it's
155                          # generating malformed UTF-8 on the radix being
156                          # mulit-byte
157                          solaris => qr/ ^ ( ar_ | pa_ ) /x,
158                        );
159
160# cygwin isn't returning proper radix length in this locale, but supposedly to
161# be fixed in later versions.
162if ($os eq 'cygwin' && version->new(($Config{osvers} =~ /^(\d+(?:\.\d+)+)/)[0]) le v2.4.1) {
163    $known_bad_locales{'cygwin'} = qr/ ^ ps_AF /ix;
164}
165
166use Dumpvalue;
167
168my $dumper = Dumpvalue->new(
169                            tick => qq{"},
170                            quoteHighBit => 0,
171                            unctrl => "quote"
172                           );
173
174sub debug {
175  return unless $debug;
176  my($mess) = join "", '# ', @_;
177  chomp $mess;
178  print STDERR $dumper->stringify($mess,1), "\n";
179}
180
181sub note {
182    local $debug = 1;
183    debug @_;
184}
185
186sub debug_more {
187  return unless $debug > 1;
188  return debug(@_);
189}
190
191sub debugf {
192    printf STDERR @_ if $debug;
193}
194
195$a = 'abc %9';
196
197my $test_num = 0;
198
199sub ok {
200    my ($result, $message) = @_;
201    $message = "" unless defined $message;
202
203    print 'not ' unless ($result);
204    print "ok " . ++$test_num;
205    print " $message";
206    print "\n";
207    return ($result) ? 1 : 0;
208}
209
210sub skip {
211    return ok 1, "skipped: " . shift;
212}
213
214sub fail {
215    return ok 0, shift;
216}
217
218# First we'll do a lot of taint checking for locales.
219# This is the easiest to test, actually, as any locale,
220# even the default locale will taint under 'use locale'.
221
222sub is_tainted { # hello, camel two.
223    no warnings 'uninitialized' ;
224    my $dummy;
225    local $@;
226    not eval { $dummy = join("", @_), kill 0; 1 }
227}
228
229sub check_taint ($;$) {
230    my $message_tail = $_[1] // "";
231
232    # Extra blanks are so aligns with taint_not output
233    $message_tail = ":     $message_tail" if $message_tail;
234    if ($NoTaintSupport) {
235        skip("your perl was built without taint support");
236    }
237    else {
238        ok is_tainted($_[0]), "verify that is tainted$message_tail";
239    }
240}
241
242sub check_taint_not ($;$) {
243    my $message_tail = $_[1] // "";
244    $message_tail = ":  $message_tail" if $message_tail;
245    ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
246}
247
248foreach my $category (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) {
249    my $short_result = locales_enabled($category);
250    ok ($short_result == 0 || $short_result == 1,
251        "Verify locales_enabled('$category') returns 0 or 1");
252    debug("locales_enabled('$category') returned '$short_result'");
253    my $long_result = locales_enabled("LC_$category");
254    if (! ok ($long_result == $short_result,
255              "   and locales_enabled('LC_$category') returns "
256            . "the same value")
257    ) {
258        debug("locales_enabled('LC_$category') returned $long_result");
259    }
260}
261
262"\tb\t" =~ /^m?(\s)(.*)\1$/;
263check_taint_not   $&, "not tainted outside 'use locale'";
264;
265
266use locale;	# engage locale and therefore locale taint.
267
268# BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for
269# ":notcharacters"
270
271check_taint_not   $a, '$a';
272
273check_taint       uc($a), 'uc($a)';
274check_taint       "\U$a", '"\U$a"';
275check_taint       ucfirst($a), 'ucfirst($a)';
276check_taint       "\u$a", '"\u$a"';
277check_taint       lc($a), 'lc($a)';
278check_taint       fc($a), 'fc($a)';
279check_taint       "\L$a", '"\L$a"';
280check_taint       "\F$a", '"\F$a"';
281check_taint       lcfirst($a), 'lcfirst($a)';
282check_taint       "\l$a", '"\l$a"';
283
284check_taint_not  sprintf('%e', 123.456), "sprintf('%e', 123.456)";
285check_taint_not  sprintf('%f', 123.456), "sprintf('%f', 123.456)";
286check_taint_not  sprintf('%g', 123.456), "sprintf('%g', 123.456)";
287check_taint_not  sprintf('%d', 123.456), "sprintf('%d', 123.456)";
288check_taint_not  sprintf('%x', 123.456), "sprintf('%x', 123.456)";
289
290$_ = $a;	# untaint $_
291
292$_ = uc($a);	# taint $_
293
294check_taint      $_, '$_ = uc($a)';
295
296/(\w)/;	# taint $&, $`, $', $+, $1.
297check_taint      $&, "\$& from /(\\w)/";
298check_taint      $`, "\t\$`";
299check_taint      $', "\t\$'";
300check_taint      $+, "\t\$+";
301check_taint      $1, "\t\$1";
302check_taint_not  $2, "\t\$2";
303
304/(.)/;	# untaint $&, $`, $', $+, $1.
305check_taint_not  $&, "\$& from /(.)/";
306check_taint_not  $`, "\t\$`";
307check_taint_not  $', "\t\$'";
308check_taint_not  $+, "\t\$+";
309check_taint_not  $1, "\t\$1";
310check_taint_not  $2, "\t\$2";
311
312/(\W)/;	# taint $&, $`, $', $+, $1.
313check_taint      $&, "\$& from /(\\W)/";
314check_taint      $`, "\t\$`";
315check_taint      $', "\t\$'";
316check_taint      $+, "\t\$+";
317check_taint      $1, "\t\$1";
318check_taint_not  $2, "\t\$2";
319
320/(.)/;	# untaint $&, $`, $', $+, $1.
321check_taint_not  $&, "\$& from /(.)/";
322check_taint_not  $`, "\t\$`";
323check_taint_not  $', "\t\$'";
324check_taint_not  $+, "\t\$+";
325check_taint_not  $1, "\t\$1";
326check_taint_not  $2, "\t\$2";
327
328/(\s)/;	# taint $&, $`, $', $+, $1.
329check_taint      $&, "\$& from /(\\s)/";
330check_taint      $`, "\t\$`";
331check_taint      $', "\t\$'";
332check_taint      $+, "\t\$+";
333check_taint      $1, "\t\$1";
334check_taint_not  $2, "\t\$2";
335
336/(.)/;	# untaint $&, $`, $', $+, $1.
337check_taint_not  $&, "\$& from /(.)/";
338
339/(\S)/;	# taint $&, $`, $', $+, $1.
340check_taint      $&, "\$& from /(\\S)/";
341check_taint      $`, "\t\$`";
342check_taint      $', "\t\$'";
343check_taint      $+, "\t\$+";
344check_taint      $1, "\t\$1";
345check_taint_not  $2, "\t\$2";
346
347/(.)/;	# untaint $&, $`, $', $+, $1.
348check_taint_not  $&, "\$& from /(.)/";
349
350"0" =~ /(\d)/;	# taint $&, $`, $', $+, $1.
351check_taint      $&, "\$& from /(\\d)/";
352check_taint      $`, "\t\$`";
353check_taint      $', "\t\$'";
354check_taint      $+, "\t\$+";
355check_taint      $1, "\t\$1";
356check_taint_not  $2, "\t\$2";
357
358/(.)/;	# untaint $&, $`, $', $+, $1.
359check_taint_not  $&, "\$& from /(.)/";
360
361/(\D)/;	# taint $&, $`, $', $+, $1.
362check_taint      $&, "\$& from /(\\D)/";
363check_taint      $`, "\t\$`";
364check_taint      $', "\t\$'";
365check_taint      $+, "\t\$+";
366check_taint      $1, "\t\$1";
367check_taint_not  $2, "\t\$2";
368
369/(.)/;	# untaint $&, $`, $', $+, $1.
370check_taint_not  $&, "\$& from /(.)/";
371
372/([[:alnum:]])/;	# taint $&, $`, $', $+, $1.
373check_taint      $&, "\$& from /([[:alnum:]])/";
374check_taint      $`, "\t\$`";
375check_taint      $', "\t\$'";
376check_taint      $+, "\t\$+";
377check_taint      $1, "\t\$1";
378check_taint_not  $2, "\t\$2";
379
380/(.)/;	# untaint $&, $`, $', $+, $1.
381check_taint_not  $&, "\$& from /(.)/";
382
383/([[:^alnum:]])/;	# taint $&, $`, $', $+, $1.
384check_taint      $&, "\$& from /([[:^alnum:]])/";
385check_taint      $`, "\t\$`";
386check_taint      $', "\t\$'";
387check_taint      $+, "\t\$+";
388check_taint      $1, "\t\$1";
389check_taint_not  $2, "\t\$2";
390
391"a" =~ /(a)|(\w)/;	# taint $&, $`, $', $+, $1.
392check_taint      $&, "\$& from /(a)|(\\w)/";
393check_taint      $`, "\t\$`";
394check_taint      $', "\t\$'";
395check_taint      $+, "\t\$+";
396check_taint      $1, "\t\$1";
397ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
398ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
399check_taint_not  $2, "\t\$2";
400check_taint_not  $3, "\t\$3";
401
402/(.)/;	# untaint $&, $`, $', $+, $1.
403check_taint_not  $&, "\$& from /(.)/";
404
405"\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;	# no tainting because no locale dependence
406check_taint_not      $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
407check_taint_not      $`, "\t\$`";
408check_taint_not      $', "\t\$'";
409check_taint_not      $+, "\t\$+";
410check_taint_not      $1, "\t\$1";
411ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
412check_taint_not      $2, "\t\$2";
413
414/(.)/;	# untaint $&, $`, $', $+, $1.
415check_taint_not  $&, "\$& from /./";
416
417"(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i;	# taints because depends on locale
418check_taint      $&, "\$& from /(\\N{KELVIN SIGN})/i";
419check_taint      $`, "\t\$`";
420check_taint      $', "\t\$'";
421check_taint      $+, "\t\$+";
422check_taint      $1, "\t\$1";
423check_taint_not      $2, "\t\$2";
424
425/(.)/;	# untaint $&, $`, $', $+, $1.
426check_taint_not  $&, "\$& from /(.)/";
427
428"a:" =~ /(.)\b(.)/;	# taint $&, $`, $', $+, $1.
429check_taint      $&, "\$& from /(.)\\b(.)/";
430check_taint      $`, "\t\$`";
431check_taint      $', "\t\$'";
432check_taint      $+, "\t\$+";
433check_taint      $1, "\t\$1";
434check_taint      $2, "\t\$2";
435check_taint_not  $3, "\t\$3";
436
437/(.)/;	# untaint $&, $`, $', $+, $1.
438check_taint_not  $&, "\$& from /./";
439
440"aa" =~ /(.)\B(.)/;	# taint $&, $`, $', $+, $1.
441check_taint      $&, "\$& from /(.)\\B(.)/";
442check_taint      $`, "\t\$`";
443check_taint      $', "\t\$'";
444check_taint      $+, "\t\$+";
445check_taint      $1, "\t\$1";
446check_taint      $2, "\t\$2";
447check_taint_not  $3, "\t\$3";
448
449/(.)/;	# untaint $&, $`, $', $+, $1.
450check_taint_not  $&, "\$& from /./";
451
452"aaa" =~ /(.).(\1)/i;	# notaint because not locale dependent
453check_taint_not      $&, "\$ & from /(.).(\\1)/";
454check_taint_not      $`, "\t\$`";
455check_taint_not      $', "\t\$'";
456check_taint_not      $+, "\t\$+";
457check_taint_not      $1, "\t\$1";
458check_taint_not      $2, "\t\$2";
459check_taint_not      $3, "\t\$3";
460
461/(.)/;	# untaint $&, $`, $', $+, $1.
462check_taint_not  $&, "\$ & from /./";
463
464$_ = $a;	# untaint $_
465
466check_taint_not  $_, 'untainting $_ works';
467
468/(b)/;		# this must not taint
469check_taint_not  $&, "\$ & from /(b)/";
470check_taint_not  $`, "\t\$`";
471check_taint_not  $', "\t\$'";
472check_taint_not  $+, "\t\$+";
473check_taint_not  $1, "\t\$1";
474check_taint_not  $2, "\t\$2";
475
476$_ = $a;	# untaint $_
477
478check_taint_not  $_, 'untainting $_ works';
479
480$b = uc($a);	# taint $b
481s/(.+)/$b/;	# this must taint only the $_
482
483check_taint      $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted';
484check_taint_not  $&, "\t\$&";
485check_taint_not  $`, "\t\$`";
486check_taint_not  $', "\t\$'";
487check_taint_not  $+, "\t\$+";
488check_taint_not  $1, "\t\$1";
489check_taint_not  $2, "\t\$2";
490
491$_ = $a;	# untaint $_
492
493s/(.+)/b/;	# this must not taint
494check_taint_not  $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
495check_taint_not  $&, "\t\$&";
496check_taint_not  $`, "\t\$`";
497check_taint_not  $', "\t\$'";
498check_taint_not  $+, "\t\$+";
499check_taint_not  $1, "\t\$1";
500check_taint_not  $2, "\t\$2";
501
502$b = $a;	# untaint $b
503
504($b = $a) =~ s/\w/$&/;
505check_taint      $b, '$b from ($b = $a) =~ s/\w/$&/';	# $b should be tainted.
506check_taint_not  $a, '$a from ($b = $a) =~ s/\w/$&/';	# $a should be not.
507
508$_ = $a;	# untaint $_
509
510s/(\w)/\l$1/;	# this must taint
511check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,';	# this must taint
512check_taint      $&, "\t\$&";
513check_taint      $`, "\t\$`";
514check_taint      $', "\t\$'";
515check_taint      $+, "\t\$+";
516check_taint      $1, "\t\$1";
517check_taint_not  $2, "\t\$2";
518
519$_ = $a;	# untaint $_
520
521s/(\w)/\L$1/;	# this must taint
522check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
523check_taint      $&, "\t\$&";
524check_taint      $`, "\t\$`";
525check_taint      $', "\t\$'";
526check_taint      $+, "\t\$+";
527check_taint      $1, "\t\$1";
528check_taint_not  $2, "\t\$2";
529
530$_ = $a;	# untaint $_
531
532s/(\w)/\u$1/;	# this must taint
533check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
534check_taint      $&, "\t\$&";
535check_taint      $`, "\t\$`";
536check_taint      $', "\t\$'";
537check_taint      $+, "\t\$+";
538check_taint      $1, "\t\$1";
539check_taint_not  $2, "\t\$2";
540
541$_ = $a;	# untaint $_
542
543s/(\w)/\U$1/;	# this must taint
544check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
545check_taint      $&, "\t\$&";
546check_taint      $`, "\t\$`";
547check_taint      $', "\t\$'";
548check_taint      $+, "\t\$+";
549check_taint      $1, "\t\$1";
550check_taint_not  $2, "\t\$2";
551
552# After all this tainting $a should be cool.
553
554check_taint_not  $a, '$a still not tainted';
555
556"a" =~ /([a-z])/;
557check_taint_not $1, '"a" =~ /([a-z])/';
558"foo.bar_baz" =~ /^(.*)[._](.*?)$/;  # Bug 120675
559check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
560
561# BE SURE TO COPY ANYTHING YOU ADD to the block below
562
563{   # This is just the previous tests copied here with a different
564    # compile-time pragma.
565
566    use locale ':not_characters'; # engage restricted locale with different
567                                  # tainting rules
568    check_taint_not   $a, '$a';
569
570    check_taint_not   uc($a), 'uc($a)';
571    check_taint_not   "\U$a", '"\U$a"';
572    check_taint_not   ucfirst($a), 'ucfirst($a)';
573    check_taint_not   "\u$a", '"\u$a"';
574    check_taint_not   lc($a), 'lc($a)';
575    check_taint_not   fc($a), 'fc($a)';
576    check_taint_not   "\L$a", '"\L$a"';
577    check_taint_not   "\F$a", '"\F$a"';
578    check_taint_not   lcfirst($a), 'lcfirst($a)';
579    check_taint_not   "\l$a", '"\l$a"';
580
581    check_taint_not  sprintf('%e', 123.456), "sprintf('%e', 123.456)";
582    check_taint_not  sprintf('%f', 123.456), "sprintf('%f', 123.456)";
583    check_taint_not  sprintf('%g', 123.456), "sprintf('%g', 123.456)";
584    check_taint_not  sprintf('%d', 123.456), "sprintf('%d', 123.456)";
585    check_taint_not  sprintf('%x', 123.456), "sprintf('%x', 123.456)";
586
587    $_ = $a;	# untaint $_
588
589    $_ = uc($a);
590
591    check_taint_not  $_, '$_ = uc($a)';
592
593    /(\w)/;
594    check_taint_not  $&, "\$& from /(\\w)/";
595    check_taint_not  $`, "\t\$`";
596    check_taint_not  $', "\t\$'";
597    check_taint_not  $+, "\t\$+";
598    check_taint_not  $1, "\t\$1";
599    check_taint_not  $2, "\t\$2";
600
601    /(.)/;	# untaint $&, $`, $', $+, $1.
602    check_taint_not  $&, "\$& from /(.)/";
603    check_taint_not  $`, "\t\$`";
604    check_taint_not  $', "\t\$'";
605    check_taint_not  $+, "\t\$+";
606    check_taint_not  $1, "\t\$1";
607    check_taint_not  $2, "\t\$2";
608
609    /(\W)/;
610    check_taint_not  $&, "\$& from /(\\W)/";
611    check_taint_not  $`, "\t\$`";
612    check_taint_not  $', "\t\$'";
613    check_taint_not  $+, "\t\$+";
614    check_taint_not  $1, "\t\$1";
615    check_taint_not  $2, "\t\$2";
616
617    /(.)/;	# untaint $&, $`, $', $+, $1.
618    check_taint_not  $&, "\$& from /(.)/";
619    check_taint_not  $`, "\t\$`";
620    check_taint_not  $', "\t\$'";
621    check_taint_not  $+, "\t\$+";
622    check_taint_not  $1, "\t\$1";
623    check_taint_not  $2, "\t\$2";
624
625    /(\s)/;
626    check_taint_not  $&, "\$& from /(\\s)/";
627    check_taint_not  $`, "\t\$`";
628    check_taint_not  $', "\t\$'";
629    check_taint_not  $+, "\t\$+";
630    check_taint_not  $1, "\t\$1";
631    check_taint_not  $2, "\t\$2";
632
633    /(.)/;	# untaint $&, $`, $', $+, $1.
634    check_taint_not  $&, "\$& from /(.)/";
635
636    /(\S)/;
637    check_taint_not  $&, "\$& from /(\\S)/";
638    check_taint_not  $`, "\t\$`";
639    check_taint_not  $', "\t\$'";
640    check_taint_not  $+, "\t\$+";
641    check_taint_not  $1, "\t\$1";
642    check_taint_not  $2, "\t\$2";
643
644    /(.)/;	# untaint $&, $`, $', $+, $1.
645    check_taint_not  $&, "\$& from /(.)/";
646
647    "0" =~ /(\d)/;
648    check_taint_not  $&, "\$& from /(\\d)/";
649    check_taint_not  $`, "\t\$`";
650    check_taint_not  $', "\t\$'";
651    check_taint_not  $+, "\t\$+";
652    check_taint_not  $1, "\t\$1";
653    check_taint_not  $2, "\t\$2";
654
655    /(.)/;	# untaint $&, $`, $', $+, $1.
656    check_taint_not  $&, "\$& from /(.)/";
657
658    /(\D)/;
659    check_taint_not  $&, "\$& from /(\\D)/";
660    check_taint_not  $`, "\t\$`";
661    check_taint_not  $', "\t\$'";
662    check_taint_not  $+, "\t\$+";
663    check_taint_not  $1, "\t\$1";
664    check_taint_not  $2, "\t\$2";
665
666    /(.)/;	# untaint $&, $`, $', $+, $1.
667    check_taint_not  $&, "\$& from /(.)/";
668
669    /([[:alnum:]])/;
670    check_taint_not  $&, "\$& from /([[:alnum:]])/";
671    check_taint_not  $`, "\t\$`";
672    check_taint_not  $', "\t\$'";
673    check_taint_not  $+, "\t\$+";
674    check_taint_not  $1, "\t\$1";
675    check_taint_not  $2, "\t\$2";
676
677    /(.)/;	# untaint $&, $`, $', $+, $1.
678    check_taint_not  $&, "\$& from /(.)/";
679
680    /([[:^alnum:]])/;
681    check_taint_not  $&, "\$& from /([[:^alnum:]])/";
682    check_taint_not  $`, "\t\$`";
683    check_taint_not  $', "\t\$'";
684    check_taint_not  $+, "\t\$+";
685    check_taint_not  $1, "\t\$1";
686    check_taint_not  $2, "\t\$2";
687
688    "a" =~ /(a)|(\w)/;
689    check_taint_not  $&, "\$& from /(a)|(\\w)/";
690    check_taint_not  $`, "\t\$`";
691    check_taint_not  $', "\t\$'";
692    check_taint_not  $+, "\t\$+";
693    check_taint_not  $1, "\t\$1";
694    ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
695    ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
696    check_taint_not  $2, "\t\$2";
697    check_taint_not  $3, "\t\$3";
698
699    /(.)/;	# untaint $&, $`, $', $+, $1.
700    check_taint_not  $&, "\$& from /(.)/";
701
702    "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;
703    check_taint_not      $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
704    check_taint_not      $`, "\t\$`";
705    check_taint_not      $', "\t\$'";
706    check_taint_not      $+, "\t\$+";
707    check_taint_not      $1, "\t\$1";
708    ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
709    check_taint_not      $2, "\t\$2";
710
711    /(.)/;	# untaint $&, $`, $', $+, $1.
712    check_taint_not  $&, "\$& from /./";
713
714    "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i;
715    check_taint_not  $&, "\$& from /(\\N{KELVIN SIGN})/i";
716    check_taint_not  $`, "\t\$`";
717    check_taint_not  $', "\t\$'";
718    check_taint_not  $+, "\t\$+";
719    check_taint_not  $1, "\t\$1";
720    check_taint_not      $2, "\t\$2";
721
722    /(.)/;	# untaint $&, $`, $', $+, $1.
723    check_taint_not  $&, "\$& from /(.)/";
724
725    "a:" =~ /(.)\b(.)/;
726    check_taint_not  $&, "\$& from /(.)\\b(.)/";
727    check_taint_not  $`, "\t\$`";
728    check_taint_not  $', "\t\$'";
729    check_taint_not  $+, "\t\$+";
730    check_taint_not  $1, "\t\$1";
731    check_taint_not  $2, "\t\$2";
732    check_taint_not  $3, "\t\$3";
733
734    /(.)/;	# untaint $&, $`, $', $+, $1.
735    check_taint_not  $&, "\$& from /./";
736
737    "aa" =~ /(.)\B(.)/;
738    check_taint_not  $&, "\$& from /(.)\\B(.)/";
739    check_taint_not  $`, "\t\$`";
740    check_taint_not  $', "\t\$'";
741    check_taint_not  $+, "\t\$+";
742    check_taint_not  $1, "\t\$1";
743    check_taint_not  $2, "\t\$2";
744    check_taint_not  $3, "\t\$3";
745
746    /(.)/;	# untaint $&, $`, $', $+, $1.
747    check_taint_not  $&, "\$& from /./";
748
749    "aaa" =~ /(.).(\1)/i;	# notaint because not locale dependent
750    check_taint_not      $&, "\$ & from /(.).(\\1)/";
751    check_taint_not      $`, "\t\$`";
752    check_taint_not      $', "\t\$'";
753    check_taint_not      $+, "\t\$+";
754    check_taint_not      $1, "\t\$1";
755    check_taint_not      $2, "\t\$2";
756    check_taint_not      $3, "\t\$3";
757
758    /(.)/;	# untaint $&, $`, $', $+, $1.
759    check_taint_not  $&, "\$ & from /./";
760
761    $_ = $a;	# untaint $_
762
763    check_taint_not  $_, 'untainting $_ works';
764
765    /(b)/;
766    check_taint_not  $&, "\$ & from /(b)/";
767    check_taint_not  $`, "\t\$`";
768    check_taint_not  $', "\t\$'";
769    check_taint_not  $+, "\t\$+";
770    check_taint_not  $1, "\t\$1";
771    check_taint_not  $2, "\t\$2";
772
773    $_ = $a;	# untaint $_
774
775    check_taint_not  $_, 'untainting $_ works';
776
777    s/(.+)/b/;
778    check_taint_not  $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
779    check_taint_not  $&, "\t\$&";
780    check_taint_not  $`, "\t\$`";
781    check_taint_not  $', "\t\$'";
782    check_taint_not  $+, "\t\$+";
783    check_taint_not  $1, "\t\$1";
784    check_taint_not  $2, "\t\$2";
785
786    $b = $a;	# untaint $b
787
788    ($b = $a) =~ s/\w/$&/;
789    check_taint_not     $b, '$b from ($b = $a) =~ s/\w/$&/';
790    check_taint_not  $a, '$a from ($b = $a) =~ s/\w/$&/';
791
792    $_ = $a;	# untaint $_
793
794    s/(\w)/\l$1/;
795    check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,';	# this must taint
796    check_taint_not     $&, "\t\$&";
797    check_taint_not     $`, "\t\$`";
798    check_taint_not     $', "\t\$'";
799    check_taint_not     $+, "\t\$+";
800    check_taint_not     $1, "\t\$1";
801    check_taint_not  $2, "\t\$2";
802
803    $_ = $a;	# untaint $_
804
805    s/(\w)/\L$1/;
806    check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
807    check_taint_not     $&, "\t\$&";
808    check_taint_not     $`, "\t\$`";
809    check_taint_not     $', "\t\$'";
810    check_taint_not     $+, "\t\$+";
811    check_taint_not     $1, "\t\$1";
812    check_taint_not  $2, "\t\$2";
813
814    $_ = $a;	# untaint $_
815
816    s/(\w)/\u$1/;
817    check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
818    check_taint_not     $&, "\t\$&";
819    check_taint_not     $`, "\t\$`";
820    check_taint_not     $', "\t\$'";
821    check_taint_not     $+, "\t\$+";
822    check_taint_not     $1, "\t\$1";
823    check_taint_not  $2, "\t\$2";
824
825    $_ = $a;	# untaint $_
826
827    s/(\w)/\U$1/;
828    check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
829    check_taint_not     $&, "\t\$&";
830    check_taint_not     $`, "\t\$`";
831    check_taint_not     $', "\t\$'";
832    check_taint_not     $+, "\t\$+";
833    check_taint_not     $1, "\t\$1";
834    check_taint_not  $2, "\t\$2";
835
836    # After all this tainting $a should be cool.
837
838    check_taint_not  $a, '$a still not tainted';
839
840    "a" =~ /([a-z])/;
841    check_taint_not $1, '"a" =~ /([a-z])/';
842    "foo.bar_baz" =~ /^(.*)[._](.*?)$/;  # Bug 120675
843    check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
844
845}
846
847# Here are in scope of 'use locale'
848
849# I think we've seen quite enough of taint.
850# Let us do some *real* locale work now,
851# unless setlocale() is missing (i.e. minitest).
852
853# The test number before our first setlocale()
854my $final_without_setlocale = $test_num;
855
856# Find locales.
857
858debug "Scanning for locales...\n";
859
860require POSIX; import POSIX ':locale_h';
861
862debug "Scanning for just perl-compatible locales";
863my $category = 'LC_CTYPE';
864my @Locale = find_locales($category);
865if (! @Locale) {
866    $category = 'LC_ALL';
867    @Locale = find_locales($category);
868}
869debug "Scanning for even incompatible locales";
870my @include_incompatible_locales = find_locales($category,
871                                                'even incompatible locales');
872
873# The locales included in the incompatible list that aren't in the compatible
874# one.
875my @incompatible_locales;
876
877if (@Locale < @include_incompatible_locales) {
878    my %seen;
879    @seen{@Locale} = ();
880
881    foreach my $item (@include_incompatible_locales) {
882        push @incompatible_locales, $item unless exists $seen{$item};
883    }
884
885    # For each bad locale, switch into it to find out why it's incompatible
886    for my $bad_locale (@incompatible_locales) {
887        my @warnings;
888
889        use warnings 'locale';
890
891        local $SIG{__WARN__} = sub {
892            my $warning = $_[0];
893            chomp $warning;
894            push @warnings, ($warning =~ s/\n/\n# /sgr);
895        };
896
897        debug "Trying incompatible $bad_locale";
898        my $ret = setlocale(&POSIX::LC_CTYPE, $bad_locale);
899
900        my $message = "testing of locale '$bad_locale' is skipped";
901        if (@warnings) {
902            skip $message . ":\n# " . join "\n# ", @warnings;
903        }
904        elsif (! $ret) {
905            skip("$message:\n#"
906               . " setlocale(&POSIX::LC_CTYPE, '$bad_locale') failed");
907        }
908        else {
909            fail $message . ", because it is was found to be incompatible with"
910                          . " Perl, but could not discern reason";
911        }
912    }
913}
914
915debug "Locales =\n";
916for ( @Locale ) {
917    debug "$_\n";
918}
919
920unless (@Locale) {
921    print "1..$test_num\n";
922    exit;
923}
924
925
926setlocale(&POSIX::LC_ALL, "C");
927
928my %posixes;
929
930my %Problem;
931my %Okay;
932my %Known_bad_locale;   # Failed test for a locale known to be bad
933my %Testing;
934my @Added_alpha;   # Alphas that aren't in the C locale.
935my %test_names;
936
937sub disp_chars {
938    # This returns a display string denoting the input parameter @_, each
939    # entry of which is a single character in the range 0-255.  The first part
940    # of the output is a string of the characters in @_ that are ASCII
941    # graphics, and hence unambiguously displayable.  They are given by code
942    # point order.  The second part is the remaining code points, the ordinals
943    # of which are each displayed as 2-digit hex.  Blanks are inserted so as
944    # to keep anything from the first part looking like a 2-digit hex number.
945
946    no locale;
947    my @chars = sort { ord $a <=> ord $b } @_;
948    my $output = "";
949    my $range_start;
950    my $start_class;
951    push @chars, chr(258);  # This sentinel simplifies the loop termination
952                            # logic
953    foreach my $i (0 .. @chars - 1) {
954        my $char = $chars[$i];
955        my $range_end;
956        my $class;
957
958        # We avoid using [:posix:] classes, as these are being tested in this
959        # file.  Each equivalence class below is for things that can appear in
960        # a range; those that can't be in a range have class -1.  0 for those
961        # which should be output in hex; and >0 for the other ranges
962        if ($char =~ /[A-Z]/) {
963            $class = 2;
964        }
965        elsif ($char =~ /[a-z]/) {
966            $class = 3;
967        }
968        elsif ($char =~ /[0-9]/) {
969            $class = 4;
970        }
971        # Uncomment to get literal punctuation displayed instead of hex
972        #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) {
973        #    $class = -1;    # Punct never appears in a range
974        #}
975        else {
976            $class = 0;     # Output in hex
977        }
978
979        if (! defined $range_start) {
980            if ($class < 0) {
981                $output .= " " . $char;
982            }
983            else {
984                $range_start = ord $char;
985                $start_class = $class;
986            }
987        } # A range ends if not consecutive, or the class-type changes
988        elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1
989              || $class != $start_class)
990        {
991
992            # Here, the current character is not in the range.  This means the
993            # previous character must have been.  Output the range up through
994            # that one.
995            my $range_length = $range_end - $range_start + 1;
996            if ($start_class > 0) {
997                $output .= " " . chr($range_start);
998                $output .= "-" . chr($range_end) if $range_length > 1;
999            }
1000            else {
1001                $output .= sprintf(" %02X", $range_start);
1002                $output .= sprintf("-%02X", $range_end) if $range_length > 1;
1003            }
1004
1005            # Handle the new current character, as potentially beginning a new
1006            # range
1007            undef $range_start;
1008            redo;
1009        }
1010    }
1011
1012    $output =~ s/^ //;
1013    return $output;
1014}
1015
1016sub disp_str ($) {
1017    my $string = shift;
1018
1019    # Displays the string unambiguously.  ASCII printables are always output
1020    # as-is, though perhaps separated by blanks from other characters.  If
1021    # entirely printable ASCII, just returns the string.  Otherwise if valid
1022    # UTF-8 it uses the character names for non-printable-ASCII.  Otherwise it
1023    # outputs hex for each non-ASCII-printable byte.
1024
1025    return $string if $string =~ / ^ [[:print:]]* $/xa;
1026
1027    my $result = "";
1028    my $prev_was_punct = 1; # Beginning is considered punct
1029    if (utf8::valid($string) && utf8::is_utf8($string)) {
1030        use charnames ();
1031        foreach my $char (split "", $string) {
1032
1033            # Keep punctuation adjacent to other characters; otherwise
1034            # separate them with a blank
1035            if ($char =~ /[[:punct:]]/a) {
1036                $result .= $char;
1037                $prev_was_punct = 1;
1038            }
1039            elsif ($char =~ /[[:print:]]/a) {
1040                $result .= "  " unless $prev_was_punct;
1041                $result .= $char;
1042                $prev_was_punct = 0;
1043            }
1044            else {
1045                $result .= "  " unless $prev_was_punct;
1046                my $name = charnames::viacode(ord $char);
1047                $result .= (defined $name) ? $name : ':unknown:';
1048                $prev_was_punct = 0;
1049            }
1050        }
1051    }
1052    else {
1053        use bytes;
1054        foreach my $char (split "", $string) {
1055            if ($char =~ /[[:punct:]]/a) {
1056                $result .= $char;
1057                $prev_was_punct = 1;
1058            }
1059            elsif ($char =~ /[[:print:]]/a) {
1060                $result .= " " unless $prev_was_punct;
1061                $result .= $char;
1062                $prev_was_punct = 0;
1063            }
1064            else {
1065                $result .= " " unless $prev_was_punct;
1066                $result .= sprintf("%02X", ord $char);
1067                $prev_was_punct = 0;
1068            }
1069        }
1070    }
1071
1072    return $result;
1073}
1074
1075sub report_result {
1076    my ($Locale, $i, $pass_fail, $message) = @_;
1077    if ($pass_fail) {
1078	push @{$Okay{$i}}, $Locale;
1079    }
1080    else {
1081        $message //= "";
1082        $message = "  ($message)" if $message;
1083	$Known_bad_locale{$i}{$Locale} = 1 if exists $known_bad_locales{$os}
1084                                         && $Locale =~ $known_bad_locales{$os};
1085	$Problem{$i}{$Locale} = 1;
1086	debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n";
1087    }
1088}
1089
1090sub report_multi_result {
1091    my ($Locale, $i, $results_ref) = @_;
1092
1093    # $results_ref points to an array, each element of which is a character that was
1094    # in error for this test numbered '$i'.  If empty, the test passed
1095
1096    my $message = "";
1097    if (@$results_ref) {
1098        $message = join " ", "for", disp_chars(@$results_ref);
1099    }
1100    report_result($Locale, $i, @$results_ref == 0, $message);
1101}
1102
1103my $first_locales_test_number = $final_without_setlocale
1104                              + 1 + @incompatible_locales;
1105my $locales_test_number;
1106my $not_necessarily_a_problem_test_number;
1107my $first_casing_test_number;
1108my %setlocale_failed;   # List of locales that setlocale() didn't work on
1109my $has_glibc_extra_categories = grep { $_ =~ /^ _NL /x }
1110                                                    valid_locale_categories();
1111
1112foreach my $Locale (@Locale) {
1113    $locales_test_number = $first_locales_test_number - 1;
1114    debug "\n";
1115    debug "Locale = $Locale\n";
1116
1117    unless (setlocale(&POSIX::LC_ALL, $Locale)) {
1118        $setlocale_failed{$Locale} = $Locale;
1119	next;
1120    }
1121
1122    # We test UTF-8 locales only under ':not_characters';  It is easier to
1123    # test them in other test files than here.  Non- UTF-8 locales are tested
1124    # only under plain 'use locale', as otherwise we would have to convert
1125    # everything in them to Unicode.
1126
1127    my %UPPER = ();     # All alpha X for which uc(X) == X and lc(X) != X
1128    my %lower = ();     # All alpha X for which lc(X) == X and uc(X) != X
1129    my %BoThCaSe = ();  # All alpha X for which uc(X) == lc(X) == X
1130
1131    my $is_utf8_locale = is_locale_utf8($Locale);
1132
1133    if ($debug) {
1134        debug "is utf8 locale? = $is_utf8_locale\n";
1135        for my $item (@langinfo) {
1136            my $numeric_item = eval $item;
1137            my $value = langinfo($numeric_item);
1138
1139            # All items should return a value; if not, this will warn.  But on
1140            # platforms without the extra categories, almost all items will be
1141            # empty.  Skip reporting such.
1142            next if $value eq ""
1143                 && $item =~ / ^ _NL_ / && ! $has_glibc_extra_categories;
1144
1145            debug "$item = " . disp_str($value);
1146        }
1147    }
1148
1149    if (! $is_utf8_locale) {
1150        use locale;
1151        @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
1152        @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
1153        @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
1154        @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
1155        @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
1156        @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
1157        @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
1158        @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
1159        @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
1160        @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
1161        @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
1162        @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
1163        @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
1164        @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
1165        @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255;
1166
1167        # Sieve the uppercase and the lowercase.
1168
1169        for (@{$posixes{'word'}}) {
1170            if (/[^\d_]/) { # skip digits and the _
1171                if (uc($_) eq $_) {
1172                    $UPPER{$_} = $_;
1173                }
1174                if (lc($_) eq $_) {
1175                    $lower{$_} = $_;
1176                }
1177            }
1178        }
1179    }
1180    else {
1181        use locale ':not_characters';
1182        @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
1183        @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
1184        @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
1185        @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
1186        @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
1187        @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
1188        @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
1189        @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
1190        @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
1191        @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
1192        @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
1193        @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
1194        @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
1195        @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
1196        @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255;
1197        for (@{$posixes{'word'}}) {
1198            if (/[^\d_]/) { # skip digits and the _
1199                if (uc($_) eq $_) {
1200                    $UPPER{$_} = $_;
1201                }
1202                if (lc($_) eq $_) {
1203                    $lower{$_} = $_;
1204                }
1205            }
1206        }
1207    }
1208
1209    # Ordered, where possible,  in groups of "this is a subset of the next
1210    # one"
1211    debug ":upper:  = ", disp_chars(@{$posixes{'upper'}}), "\n";
1212    debug ":lower:  = ", disp_chars(@{$posixes{'lower'}}), "\n";
1213    debug ":cased:  = ", disp_chars(@{$posixes{'cased'}}), "\n";
1214    debug ":alpha:  = ", disp_chars(@{$posixes{'alpha'}}), "\n";
1215    debug ":alnum:  = ", disp_chars(@{$posixes{'alnum'}}), "\n";
1216    debug ' \w      = ', disp_chars(@{$posixes{'word'}}), "\n";
1217    debug ":graph:  = ", disp_chars(@{$posixes{'graph'}}), "\n";
1218    debug ":print:  = ", disp_chars(@{$posixes{'print'}}), "\n";
1219    debug ' \d      = ', disp_chars(@{$posixes{'digit'}}), "\n";
1220    debug ":xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n";
1221    debug ":blank:  = ", disp_chars(@{$posixes{'blank'}}), "\n";
1222    debug ' \s      = ', disp_chars(@{$posixes{'space'}}), "\n";
1223    debug ":punct:  = ", disp_chars(@{$posixes{'punct'}}), "\n";
1224    debug ":cntrl:  = ", disp_chars(@{$posixes{'cntrl'}}), "\n";
1225    debug ":ascii:  = ", disp_chars(@{$posixes{'ascii'}}), "\n";
1226
1227    foreach (keys %UPPER) {
1228
1229	$BoThCaSe{$_}++ if exists $lower{$_};
1230    }
1231    foreach (keys %lower) {
1232	$BoThCaSe{$_}++ if exists $UPPER{$_};
1233    }
1234    foreach (keys %BoThCaSe) {
1235	delete $UPPER{$_};
1236	delete $lower{$_};
1237    }
1238
1239    my %Unassigned;
1240    foreach my $ord ( 0 .. 255 ) {
1241        $Unassigned{chr $ord} = 1;
1242    }
1243    foreach my $class (keys %posixes) {
1244        foreach my $char (@{$posixes{$class}}) {
1245            delete $Unassigned{$char};
1246        }
1247    }
1248
1249    debug "UPPER    = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n";
1250    debug "lower    = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n";
1251    debug "BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n";
1252    debug "Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n";
1253
1254    my @failures;
1255    my @fold_failures;
1256    foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
1257        my $ok;
1258        my $fold_ok;
1259        if ($is_utf8_locale) {
1260            use locale ':not_characters';
1261            $ok = $x =~ /[[:upper:]]/;
1262            $fold_ok = $x =~ /[[:lower:]]/i;
1263        }
1264        else {
1265            use locale;
1266            $ok = $x =~ /[[:upper:]]/;
1267            $fold_ok = $x =~ /[[:lower:]]/i;
1268        }
1269        push @failures, $x unless $ok;
1270        push @fold_failures, $x unless $fold_ok;
1271    }
1272    $locales_test_number++;
1273    $first_casing_test_number = $locales_test_number;
1274    $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X';
1275    report_multi_result($Locale, $locales_test_number, \@failures);
1276
1277    $locales_test_number++;
1278
1279    $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X';
1280    report_multi_result($Locale, $locales_test_number, \@fold_failures);
1281
1282    undef @failures;
1283    undef @fold_failures;
1284
1285    foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
1286        my $ok;
1287        my $fold_ok;
1288        if ($is_utf8_locale) {
1289            use locale ':not_characters';
1290            $ok = $x =~ /[[:lower:]]/;
1291            $fold_ok = $x =~ /[[:upper:]]/i;
1292        }
1293        else {
1294            use locale;
1295            $ok = $x =~ /[[:lower:]]/;
1296            $fold_ok = $x =~ /[[:upper:]]/i;
1297        }
1298        push @failures, $x unless $ok;
1299        push @fold_failures, $x unless $fold_ok;
1300    }
1301
1302    $locales_test_number++;
1303    $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X';
1304    report_multi_result($Locale, $locales_test_number, \@failures);
1305
1306    $locales_test_number++;
1307    $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X';
1308    report_multi_result($Locale, $locales_test_number, \@fold_failures);
1309
1310    {   # Find the alphabetic characters that are not considered alphabetics
1311        # in the default (C) locale.
1312
1313	no locale;
1314
1315	@Added_alpha = ();
1316	for (keys %UPPER, keys %lower, keys %BoThCaSe) {
1317	    push(@Added_alpha, $_) if (/\W/);
1318	}
1319    }
1320
1321    @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha;
1322
1323    debug "Added_alpha = ", disp_chars(@Added_alpha), "\n";
1324
1325    # Cross-check the whole 8-bit character set.
1326
1327    ++$locales_test_number;
1328    my @f;
1329    $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical';
1330    for (map { chr } 0..255) {
1331        if ($is_utf8_locale) {
1332            use locale ':not_characters';
1333            push @f, $_ unless /[[:word:]]/ == /\w/;
1334        }
1335        else {
1336            push @f, $_ unless /[[:word:]]/ == /\w/;
1337        }
1338    }
1339    report_multi_result($Locale, $locales_test_number, \@f);
1340
1341    ++$locales_test_number;
1342    undef @f;
1343    $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical';
1344    for (map { chr } 0..255) {
1345        if ($is_utf8_locale) {
1346            use locale ':not_characters';
1347            push @f, $_ unless /[[:digit:]]/ == /\d/;
1348        }
1349        else {
1350            push @f, $_ unless /[[:digit:]]/ == /\d/;
1351        }
1352    }
1353    report_multi_result($Locale, $locales_test_number, \@f);
1354
1355    ++$locales_test_number;
1356    undef @f;
1357    $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical';
1358    for (map { chr } 0..255) {
1359        if ($is_utf8_locale) {
1360            use locale ':not_characters';
1361            push @f, $_ unless /[[:space:]]/ == /\s/;
1362        }
1363        else {
1364            push @f, $_ unless /[[:space:]]/ == /\s/;
1365        }
1366    }
1367    report_multi_result($Locale, $locales_test_number, \@f);
1368
1369    ++$locales_test_number;
1370    undef @f;
1371    $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive';
1372    for (map { chr } 0..255) {
1373        if ($is_utf8_locale) {
1374            use locale ':not_characters';
1375            push @f, $_ unless   (/[[:alpha:]]/ xor /[[:^alpha:]]/)   ||
1376                    (/[[:alnum:]]/ xor /[[:^alnum:]]/)   ||
1377                    (/[[:ascii:]]/ xor /[[:^ascii:]]/)   ||
1378                    (/[[:blank:]]/ xor /[[:^blank:]]/)   ||
1379                    (/[[:cntrl:]]/ xor /[[:^cntrl:]]/)   ||
1380                    (/[[:digit:]]/ xor /[[:^digit:]]/)   ||
1381                    (/[[:graph:]]/ xor /[[:^graph:]]/)   ||
1382                    (/[[:lower:]]/ xor /[[:^lower:]]/)   ||
1383                    (/[[:print:]]/ xor /[[:^print:]]/)   ||
1384                    (/[[:space:]]/ xor /[[:^space:]]/)   ||
1385                    (/[[:upper:]]/ xor /[[:^upper:]]/)   ||
1386                    (/[[:word:]]/  xor /[[:^word:]]/)    ||
1387                    (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1388
1389                    # effectively is what [:cased:] would be if it existed.
1390                    (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i);
1391        }
1392        else {
1393            push @f, $_ unless   (/[[:alpha:]]/ xor /[[:^alpha:]]/)   ||
1394                    (/[[:alnum:]]/ xor /[[:^alnum:]]/)   ||
1395                    (/[[:ascii:]]/ xor /[[:^ascii:]]/)   ||
1396                    (/[[:blank:]]/ xor /[[:^blank:]]/)   ||
1397                    (/[[:cntrl:]]/ xor /[[:^cntrl:]]/)   ||
1398                    (/[[:digit:]]/ xor /[[:^digit:]]/)   ||
1399                    (/[[:graph:]]/ xor /[[:^graph:]]/)   ||
1400                    (/[[:lower:]]/ xor /[[:^lower:]]/)   ||
1401                    (/[[:print:]]/ xor /[[:^print:]]/)   ||
1402                    (/[[:space:]]/ xor /[[:^space:]]/)   ||
1403                    (/[[:upper:]]/ xor /[[:^upper:]]/)   ||
1404                    (/[[:word:]]/  xor /[[:^word:]]/)    ||
1405                    (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1406                    (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i);
1407        }
1408    }
1409    report_multi_result($Locale, $locales_test_number, \@f);
1410
1411    # The rules for the relationships are given in:
1412    # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
1413
1414
1415    ++$locales_test_number;
1416    undef @f;
1417    $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z';
1418    for ('a' .. 'z') {
1419        if ($is_utf8_locale) {
1420            use locale ':not_characters';
1421            push @f, $_  unless /[[:lower:]]/;
1422        }
1423        else {
1424            push @f, $_  unless /[[:lower:]]/;
1425        }
1426    }
1427    report_multi_result($Locale, $locales_test_number, \@f);
1428
1429    ++$locales_test_number;
1430    undef @f;
1431    $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]';
1432    for (map { chr } 0..255) {
1433        if ($is_utf8_locale) {
1434            use locale ':not_characters';
1435            push @f, $_  if /[[:lower:]]/ and ! /[[:alpha:]]/;
1436        }
1437        else {
1438            push @f, $_  if /[[:lower:]]/ and ! /[[:alpha:]]/;
1439        }
1440    }
1441    report_multi_result($Locale, $locales_test_number, \@f);
1442
1443    ++$locales_test_number;
1444    undef @f;
1445    $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z';
1446    for ('A' .. 'Z') {
1447        if ($is_utf8_locale) {
1448            use locale ':not_characters';
1449            push @f, $_  unless /[[:upper:]]/;
1450        }
1451        else {
1452            push @f, $_  unless /[[:upper:]]/;
1453        }
1454    }
1455    report_multi_result($Locale, $locales_test_number, \@f);
1456
1457    ++$locales_test_number;
1458    undef @f;
1459    $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]';
1460    for (map { chr } 0..255) {
1461        if ($is_utf8_locale) {
1462            use locale ':not_characters';
1463            push @f, $_  if /[[:upper:]]/ and ! /[[:alpha:]]/;
1464        }
1465        else {
1466            push @f, $_ if /[[:upper:]]/  and ! /[[:alpha:]]/;
1467        }
1468    }
1469    report_multi_result($Locale, $locales_test_number, \@f);
1470
1471    ++$locales_test_number;
1472    undef @f;
1473    $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]';
1474    for (map { chr } 0..255) {
1475        if ($is_utf8_locale) {
1476            use locale ':not_characters';
1477            push @f, $_ if /[[:lower:]]/i  and ! /[[:alpha:]]/;
1478        }
1479        else {
1480            push @f, $_ if /[[:lower:]]/i  and ! /[[:alpha:]]/;
1481        }
1482    }
1483    report_multi_result($Locale, $locales_test_number, \@f);
1484
1485    ++$locales_test_number;
1486    undef @f;
1487    $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]';
1488    for (map { chr } 0..255) {
1489        if ($is_utf8_locale) {
1490            use locale ':not_characters';
1491            push @f, $_ if /[[:alpha:]]/  and ! /[[:alnum:]]/;
1492        }
1493        else {
1494            push @f, $_ if /[[:alpha:]]/  and ! /[[:alnum:]]/;
1495        }
1496    }
1497    report_multi_result($Locale, $locales_test_number, \@f);
1498
1499    ++$locales_test_number;
1500    undef @f;
1501    $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9';
1502    for ('0' .. '9') {
1503        if ($is_utf8_locale) {
1504            use locale ':not_characters';
1505            push @f, $_  unless /[[:digit:]]/;
1506        }
1507        else {
1508            push @f, $_  unless /[[:digit:]]/;
1509        }
1510    }
1511    report_multi_result($Locale, $locales_test_number, \@f);
1512
1513    ++$locales_test_number;
1514    undef @f;
1515    $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]';
1516    for (map { chr } 0..255) {
1517        if ($is_utf8_locale) {
1518            use locale ':not_characters';
1519            push @f, $_ if /[[:digit:]]/  and ! /[[:alnum:]]/;
1520        }
1521        else {
1522            push @f, $_ if /[[:digit:]]/  and ! /[[:alnum:]]/;
1523        }
1524    }
1525    report_multi_result($Locale, $locales_test_number, \@f);
1526
1527    ++$locales_test_number;
1528    undef @f;
1529    $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points';
1530    report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20);
1531
1532    ++$locales_test_number;
1533    undef @f;
1534    $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive';
1535    if (@{$posixes{'digit'}} == 20) {
1536        my $previous_ord;
1537        for (map { chr } 0..255) {
1538            next unless /[[:digit:]]/;
1539            next if /[0-9]/;
1540            if (defined $previous_ord) {
1541                if ($is_utf8_locale) {
1542                    use locale ':not_characters';
1543                    push @f, $_ if ord $_ != $previous_ord + 1;
1544                }
1545                else {
1546                    push @f, $_ if ord $_ != $previous_ord + 1;
1547                }
1548            }
1549            $previous_ord = ord $_;
1550        }
1551    }
1552    report_multi_result($Locale, $locales_test_number, \@f);
1553
1554    ++$locales_test_number;
1555    undef @f;
1556    my @xdigit_digits;  # :digit: & :xdigit:
1557    $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars';
1558    for (map { chr } 0..255) {
1559        if ($is_utf8_locale) {
1560            use locale ':not_characters';
1561            # For utf8 locales, we actually use a stricter test: that :digit:
1562            # is a subset of :xdigit:, as we know that only 0-9 should match
1563            push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
1564        }
1565        else {
1566            push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/;
1567        }
1568    }
1569    if (! $is_utf8_locale) {
1570
1571        # For non-utf8 locales, @xdigit_digits is a list of the characters
1572        # that are both :xdigit: and :digit:.  Because :digit: is stored in
1573        # increasing code point order (unless the tests above failed),
1574        # @xdigit_digits is as well.  There should be exactly 10 or
1575        # 20 of these.
1576        if (@xdigit_digits != 10 && @xdigit_digits != 20) {
1577            @f = @xdigit_digits;
1578        }
1579        else {
1580
1581            # Look for contiguity in the series, adding any wrong ones to @f
1582            my @temp = @xdigit_digits;
1583            while (@temp > 1) {
1584                push @f, $temp[1] if ($temp[0] != $temp[1] - 1)
1585
1586                                     # Skip this test for the 0th character of
1587                                     # the second block of 10, as it won't be
1588                                     # contiguous with the previous block
1589                                     && (! defined $xdigit_digits[10]
1590                                         || $temp[1] != $xdigit_digits[10]);
1591                shift @temp;
1592            }
1593        }
1594    }
1595
1596    report_multi_result($Locale, $locales_test_number, \@f);
1597
1598    ++$locales_test_number;
1599    undef @f;
1600    $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f';
1601    for ('A' .. 'F', 'a' .. 'f') {
1602        if ($is_utf8_locale) {
1603            use locale ':not_characters';
1604            push @f, $_  unless /[[:xdigit:]]/;
1605        }
1606        else {
1607            push @f, $_  unless /[[:xdigit:]]/;
1608        }
1609    }
1610    report_multi_result($Locale, $locales_test_number, \@f);
1611
1612    ++$locales_test_number;
1613    undef @f;
1614    $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points';
1615    my $previous_ord;
1616    my $count = 0;
1617    for my $chr (map { chr } 0..255) {
1618        next unless $chr =~ /[[:xdigit:]]/;
1619        if ($is_utf8_locale) {
1620            next if $chr =~ /[[:digit:]]/;
1621        }
1622        else {
1623            next if grep { $chr eq $_ } @xdigit_digits;
1624        }
1625        next if $chr =~ /[A-Fa-f]/;
1626        if (defined $previous_ord) {
1627            if ($is_utf8_locale) {
1628                use locale ':not_characters';
1629                push @f, $chr if ord $chr != $previous_ord + 1;
1630            }
1631            else {
1632                push @f, $chr if ord $chr != $previous_ord + 1;
1633            }
1634        }
1635        $count++;
1636        if ($count == 6) {
1637            undef $previous_ord;
1638        }
1639        else {
1640            $previous_ord = ord $chr;
1641        }
1642    }
1643    report_multi_result($Locale, $locales_test_number, \@f);
1644
1645    ++$locales_test_number;
1646    undef @f;
1647    $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]';
1648    for (map { chr } 0..255) {
1649        if ($is_utf8_locale) {
1650            use locale ':not_characters';
1651            push @f, $_ if /[[:xdigit:]]/  and ! /[[:graph:]]/;
1652        }
1653        else {
1654            push @f, $_ if /[[:xdigit:]]/  and ! /[[:graph:]]/;
1655        }
1656    }
1657    report_multi_result($Locale, $locales_test_number, \@f);
1658
1659    # Note that xdigit doesn't have to be a subset of alnum
1660
1661    ++$locales_test_number;
1662    undef @f;
1663    $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]';
1664    for (map { chr } 0..255) {
1665        if ($is_utf8_locale) {
1666            use locale ':not_characters';
1667            push @f, $_ if /[[:punct:]]/  and ! /[[:graph:]]/;
1668        }
1669        else {
1670            push @f, $_ if /[[:punct:]]/  and ! /[[:graph:]]/;
1671        }
1672    }
1673    report_multi_result($Locale, $locales_test_number, \@f);
1674
1675    ++$locales_test_number;
1676    undef @f;
1677    $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]';
1678    if ($is_utf8_locale) {
1679        use locale ':not_characters';
1680        push @f, " " if " " =~ /[[:graph:]]/;
1681    }
1682    else {
1683        push @f, " " if " " =~ /[[:graph:]]/;
1684    }
1685    report_multi_result($Locale, $locales_test_number, \@f);
1686
1687    ++$locales_test_number;
1688    undef @f;
1689    $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]';
1690    for (' ', "\f", "\n", "\r", "\t", "\cK") {
1691        if ($is_utf8_locale) {
1692            use locale ':not_characters';
1693            push @f, $_  unless /[[:space:]]/;
1694        }
1695        else {
1696            push @f, $_  unless /[[:space:]]/;
1697        }
1698    }
1699    report_multi_result($Locale, $locales_test_number, \@f);
1700
1701    ++$locales_test_number;
1702    undef @f;
1703    $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]';
1704    for (' ', "\t") {
1705        if ($is_utf8_locale) {
1706            use locale ':not_characters';
1707            push @f, $_  unless /[[:blank:]]/;
1708        }
1709        else {
1710            push @f, $_  unless /[[:blank:]]/;
1711        }
1712    }
1713    report_multi_result($Locale, $locales_test_number, \@f);
1714
1715    ++$locales_test_number;
1716    undef @f;
1717    $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]';
1718    for (map { chr } 0..255) {
1719        if ($is_utf8_locale) {
1720            use locale ':not_characters';
1721            push @f, $_ if /[[:blank:]]/  and ! /[[:space:]]/;
1722        }
1723        else {
1724            push @f, $_ if /[[:blank:]]/  and ! /[[:space:]]/;
1725        }
1726    }
1727    report_multi_result($Locale, $locales_test_number, \@f);
1728
1729    ++$locales_test_number;
1730    undef @f;
1731    $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]';
1732    for (map { chr } 0..255) {
1733        if ($is_utf8_locale) {
1734            use locale ':not_characters';
1735            push @f, $_ if /[[:graph:]]/  and ! /[[:print:]]/;
1736        }
1737        else {
1738            push @f, $_ if /[[:graph:]]/  and ! /[[:print:]]/;
1739        }
1740    }
1741    report_multi_result($Locale, $locales_test_number, \@f);
1742
1743    ++$locales_test_number;
1744    undef @f;
1745    $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]';
1746    if ($is_utf8_locale) {
1747        use locale ':not_characters';
1748        push @f, " " if " " !~ /[[:print:]]/;
1749    }
1750    else {
1751        push @f, " " if " " !~ /[[:print:]]/;
1752    }
1753    report_multi_result($Locale, $locales_test_number, \@f);
1754
1755    ++$locales_test_number;
1756    undef @f;
1757    $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]';
1758    for (map { chr } 0..255) {
1759        if ($is_utf8_locale) {
1760            use locale ':not_characters';
1761            push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1762        }
1763        else {
1764            push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1765        }
1766    }
1767    report_multi_result($Locale, $locales_test_number, \@f);
1768
1769    ++$locales_test_number;
1770    undef @f;
1771    $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]';
1772    for (map { chr } 0..255) {
1773        if ($is_utf8_locale) {
1774            use locale ':not_characters';
1775            push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1776        }
1777        else {
1778            push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1779        }
1780    }
1781    report_multi_result($Locale, $locales_test_number, \@f);
1782
1783    ++$locales_test_number;
1784    undef @f;
1785    $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]';
1786    for (map { chr } 0..255) {
1787        if ($is_utf8_locale) {
1788            use locale ':not_characters';
1789            push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1790        }
1791        else {
1792            push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1793        }
1794    }
1795    report_multi_result($Locale, $locales_test_number, \@f);
1796
1797    ++$locales_test_number;
1798    undef @f;
1799    $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]';
1800    for (map { chr } 0..255) {
1801        if ($is_utf8_locale) {
1802            use locale ':not_characters';
1803            push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1804        }
1805        else {
1806            push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1807        }
1808    }
1809    report_multi_result($Locale, $locales_test_number, \@f);
1810
1811    ++$locales_test_number;
1812    undef @f;
1813    $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]';
1814    for (map { chr } 0..255) {
1815        if ($is_utf8_locale) {
1816            use locale ':not_characters';
1817            push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1818        }
1819        else {
1820            push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1821        }
1822    }
1823    report_multi_result($Locale, $locales_test_number, \@f);
1824
1825    foreach ($first_casing_test_number..$locales_test_number) {
1826        $problematical_tests{$_} = 1;
1827    }
1828
1829
1830    # Test for read-only scalars' locale vs non-locale comparisons.
1831
1832    {
1833        no locale;
1834        my $ok;
1835        $a = "qwerty";
1836        if ($is_utf8_locale) {
1837            use locale ':not_characters';
1838            $ok = ($a cmp "qwerty") == 0;
1839        }
1840        else {
1841            use locale;
1842            $ok = ($a cmp "qwerty") == 0;
1843        }
1844        report_result($Locale, ++$locales_test_number, $ok);
1845        $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale';
1846    }
1847
1848    {
1849        my ($from, $to, $lesser, $greater,
1850            @test, %test, $test, $yes, $no, $sign);
1851
1852        ++$locales_test_number;
1853        $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work';
1854        $not_necessarily_a_problem_test_number = $locales_test_number;
1855        for (0..9) {
1856            # Select a slice.
1857            $from = int(($_*@{$posixes{'word'}})/10);
1858            $to = $from + int(@{$posixes{'word'}}/10);
1859            $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1860            $lesser  = join('', @{$posixes{'word'}}[$from..$to]);
1861            # Select a slice one character on.
1862            $from++; $to++;
1863            $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1864            $greater = join('', @{$posixes{'word'}}[$from..$to]);
1865            if ($is_utf8_locale) {
1866                use locale ':not_characters';
1867                ($yes, $no, $sign) = ($lesser lt $greater
1868                                    ? ("    ", "not ", 1)
1869                                    : ("not ", "    ", -1));
1870            }
1871            else {
1872                use locale;
1873                ($yes, $no, $sign) = ($lesser lt $greater
1874                                    ? ("    ", "not ", 1)
1875                                    : ("not ", "    ", -1));
1876            }
1877            # all these tests should FAIL (return 0).  Exact lt or gt cannot
1878            # be tested because in some locales, say, eacute and E may test
1879            # equal.
1880            @test =
1881                (
1882                    $no.'    ($lesser  le $greater)',  # 1
1883                    'not      ($lesser  ne $greater)', # 2
1884                    '         ($lesser  eq $greater)', # 3
1885                    $yes.'    ($lesser  ge $greater)', # 4
1886                    $yes.'    ($lesser  ge $greater)', # 5
1887                    $yes.'    ($greater le $lesser )', # 7
1888                    'not      ($greater ne $lesser )', # 8
1889                    '         ($greater eq $lesser )', # 9
1890                    $no.'     ($greater ge $lesser )', # 10
1891                    'not (($lesser cmp $greater) == -($sign))' # 11
1892                    );
1893            @test{@test} = 0 x @test;
1894            $test = 0;
1895            for my $ti (@test) {
1896                if ($is_utf8_locale) {
1897                    use locale ':not_characters';
1898                    $test{$ti} = eval $ti;
1899                }
1900                else {
1901                    # Already in 'use locale';
1902                    $test{$ti} = eval $ti;
1903                }
1904                $test ||= $test{$ti}
1905            }
1906            report_result($Locale, $locales_test_number, $test == 0);
1907            if ($test) {
1908                debug "lesser  = '$lesser'\n";
1909                debug "greater = '$greater'\n";
1910                debug "lesser cmp greater = ",
1911                        $lesser cmp $greater, "\n";
1912                debug "greater cmp lesser = ",
1913                        $greater cmp $lesser, "\n";
1914                debug "(greater) from = $from, to = $to\n";
1915                for my $ti (@test) {
1916                    debugf("# %-40s %-4s", $ti,
1917                            $test{$ti} ? 'FAIL' : 'ok');
1918                    if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
1919                        debugf("(%s == %4d)", $1, eval $1);
1920                    }
1921                    debugf("\n#");
1922                }
1923
1924                last;
1925            }
1926        }
1927
1928        use locale;
1929
1930        my @sorted_controls;
1931
1932        ++$locales_test_number;
1933        $test_names{$locales_test_number}
1934                = 'Skip in locales where there are no controls;'
1935                . ' otherwise verify that \0 sorts before any (other) control';
1936        if (! $posixes{'cntrl'}) {
1937            report_result($Locale, $locales_test_number, 1);
1938
1939            # We use all code points for the tests below since there aren't
1940            # any controls
1941            push @sorted_controls, chr $_ for 1..255;
1942            @sorted_controls = sort @sorted_controls;
1943        }
1944        else {
1945            @sorted_controls = @{$posixes{'cntrl'}};
1946            push @sorted_controls, "\0",
1947                                unless grep { $_ eq "\0" } @sorted_controls;
1948            @sorted_controls = sort @sorted_controls;
1949            my $output = "";
1950            for my $control (@sorted_controls) {
1951                $output .= " " . disp_chars($control);
1952            }
1953            debug "sorted :cntrl: (plus NUL) = $output\n";
1954            my $ok = $sorted_controls[0] eq "\0";
1955            report_result($Locale, $locales_test_number, $ok);
1956
1957            shift @sorted_controls if $ok;
1958        }
1959
1960        my $lowest_control = $sorted_controls[0];
1961
1962        ++$locales_test_number;
1963        $test_names{$locales_test_number}
1964            = 'Skip in locales where all controls have primary sorting weight; '
1965            . 'otherwise verify that \0 doesn\'t have primary sorting weight';
1966        if ("a${lowest_control}c" lt "ab") {
1967            report_result($Locale, $locales_test_number, 1);
1968        }
1969        else {
1970            my $ok = "ab" lt "a\0c";
1971            report_result($Locale, $locales_test_number, $ok);
1972        }
1973
1974        ++$locales_test_number;
1975        $test_names{$locales_test_number}
1976                            = 'Verify that strings with embedded NUL collate';
1977        my $ok = "a\0a\0a" lt "a${lowest_control}a${lowest_control}a";
1978        report_result($Locale, $locales_test_number, $ok);
1979
1980        ++$locales_test_number;
1981        $test_names{$locales_test_number}
1982                            = 'Verify that strings with embedded NUL and '
1983                            . 'extra trailing NUL collate';
1984        $ok = "a\0a\0" lt "a${lowest_control}a${lowest_control}";
1985        report_result($Locale, $locales_test_number, $ok);
1986
1987        ++$locales_test_number;
1988        $test_names{$locales_test_number}
1989                            = 'Verify that empty strings collate';
1990        $ok = "" le "";
1991        report_result($Locale, $locales_test_number, $ok);
1992
1993        ++$locales_test_number;
1994        $test_names{$locales_test_number}
1995            = "Skip in non-UTF-8 locales; otherwise verify that UTF8ness "
1996            . "doesn't matter with collation";
1997        if (! $is_utf8_locale) {
1998            report_result($Locale, $locales_test_number, 1);
1999        }
2000        else {
2001
2002            # khw can't think of anything better.  Start with a string that is
2003            # higher than its UTF-8 representation in both EBCDIC and ASCII
2004            my $string = chr utf8::unicode_to_native(0xff);
2005            my $utf8_string = $string;
2006            utf8::upgrade($utf8_string);
2007
2008            # 8 should be lt 9 in all locales (except ones that aren't
2009            # ASCII-based, which might fail this)
2010            $ok = ("a${string}8") lt ("a${utf8_string}9");
2011            report_result($Locale, $locales_test_number, $ok);
2012        }
2013
2014        ++$locales_test_number;
2015        $test_names{$locales_test_number}
2016            = "Skip in UTF-8 locales; otherwise verify that single byte "
2017            . "collates before 0x100 and above";
2018        if ($is_utf8_locale) {
2019            report_result($Locale, $locales_test_number, 1);
2020        }
2021        else {
2022            my $max_collating = chr 0;  # Find byte that collates highest
2023            for my $i (0 .. 255) {
2024                my $char = chr $i;
2025                $max_collating = $char if $char gt $max_collating;
2026            }
2027            $ok = $max_collating lt chr 0x100;
2028            report_result($Locale, $locales_test_number, $ok);
2029        }
2030
2031        ++$locales_test_number;
2032        $test_names{$locales_test_number}
2033            = "Skip in UTF-8 locales; otherwise verify that 0x100 and "
2034            . "above collate in code point order";
2035        if ($is_utf8_locale) {
2036            report_result($Locale, $locales_test_number, 1);
2037        }
2038        else {
2039            $ok = chr 0x100 lt chr 0x101;
2040            report_result($Locale, $locales_test_number, $ok);
2041        }
2042    }
2043
2044    my $ok1;
2045    my $ok2;
2046    my $ok3;
2047    my $ok4;
2048    my $ok5;
2049    my $ok6;
2050    my $ok7;
2051    my $ok8;
2052    my $ok9;
2053    my $ok10;
2054    my $ok11;
2055    my $ok12;
2056    my $ok13;
2057    my $ok14;
2058    my $ok14_5;
2059    my $ok15;
2060    my $ok16;
2061    my $ok17;
2062    my $ok18;
2063    my $ok19;
2064    my $ok20;
2065    my $ok21;
2066
2067    my $c;
2068    my $d;
2069    my $e;
2070    my $f;
2071    my $g;
2072    my $h;
2073    my $i;
2074    my $j;
2075
2076    if (! $is_utf8_locale) {
2077        use locale;
2078
2079        my ($x, $y) = (1.23, 1.23);
2080
2081        $a = "$x";
2082        printf ''; # printf used to reset locale to "C"
2083        $b = "$y";
2084        $ok1 = $a eq $b;
2085
2086        $c = "$x";
2087        my $z = sprintf ''; # sprintf used to reset locale to "C"
2088        $d = "$y";
2089        $ok2 = $c eq $d;
2090        {
2091
2092            use warnings;
2093            my $w = 0;
2094            local $SIG{__WARN__} =
2095                sub {
2096                    print "# @_\n";
2097                    $w++;
2098                };
2099
2100            # The == (among other ops) used to warn for locales
2101            # that had something else than "." as the radix character.
2102
2103            $ok3 = $c == 1.23;
2104            $ok4 = $c == $x;
2105            $ok5 = $c == $d;
2106            {
2107                no locale;
2108
2109                $e = "$x";
2110
2111                $ok6 = $e == 1.23;
2112                $ok7 = $e == $x;
2113                $ok8 = $e == $c;
2114            }
2115
2116            $f = "1.23";
2117            $g = 2.34;
2118            $h = 1.5;
2119            $i = 1.25;
2120            $j = "$h:$i";
2121
2122            $ok9 = $f == 1.23;
2123            $ok10 = $f == $x;
2124            $ok11 = $f == $c;
2125            $ok12 = abs(($f + $g) - 3.57) < 0.01;
2126            $ok13 = $w == 0;
2127            $ok14 = $ok14_5 = $ok15 = $ok16 = 1;  # Skip for non-utf8 locales
2128        }
2129        {
2130            no locale;
2131            $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
2132        }
2133        $ok18 = $j eq sprintf("%g:%g", $h, $i);
2134    }
2135    else {
2136        use locale ':not_characters';
2137
2138        my ($x, $y) = (1.23, 1.23);
2139        $a = "$x";
2140        printf ''; # printf used to reset locale to "C"
2141        $b = "$y";
2142        $ok1 = $a eq $b;
2143
2144        $c = "$x";
2145        my $z = sprintf ''; # sprintf used to reset locale to "C"
2146        $d = "$y";
2147        $ok2 = $c eq $d;
2148        {
2149            use warnings;
2150            my $w = 0;
2151            local $SIG{__WARN__} =
2152                sub {
2153                    print "# @_\n";
2154                    $w++;
2155                };
2156            $ok3 = $c == 1.23;
2157            $ok4 = $c == $x;
2158            $ok5 = $c == $d;
2159            {
2160                no locale;
2161                $e = "$x";
2162
2163                $ok6 = $e == 1.23;
2164                $ok7 = $e == $x;
2165                $ok8 = $e == $c;
2166            }
2167
2168            $f = "1.23";
2169            $g = 2.34;
2170            $h = 1.5;
2171            $i = 1.25;
2172            $j = "$h:$i";
2173
2174            $ok9 = $f == 1.23;
2175            $ok10 = $f == $x;
2176            $ok11 = $f == $c;
2177            $ok12 = abs(($f + $g) - 3.57) < 0.01;
2178            $ok13 = $w == 0;
2179
2180            # Look for non-ASCII error messages, and verify that the first
2181            # such is in UTF-8 (the others almost certainly will be like the
2182            # first).  This is only done if the current locale has LC_MESSAGES
2183            $ok14 = 1;
2184            $ok14_5 = 1;
2185            if (   locales_enabled('LC_MESSAGES')
2186                && setlocale(&POSIX::LC_MESSAGES, $Locale))
2187            {
2188                foreach my $err (keys %!) {
2189                    use Errno;
2190                    $! = eval "&Errno::$err";   # Convert to strerror() output
2191                    my $errnum = 0+$!;
2192                    my $strerror = "$!";
2193                    if ("$strerror" =~ /\P{ASCII}/) {
2194                        $ok14 = utf8::is_utf8($strerror);
2195                        no locale;
2196                        $ok14_5 = "$!" !~ /\P{ASCII}/;
2197                        debug( disp_str(
2198                        "non-ASCII \$! for error $errnum='$strerror'"))
2199                                                                   if ! $ok14_5;
2200                        last;
2201                    }
2202                }
2203            }
2204
2205            # Similarly, we verify that a non-ASCII radix is in UTF-8.  This
2206            # also catches if there is a disparity between sprintf and
2207            # stringification.
2208
2209            my $string_g = "$g";
2210            my $sprintf_g = sprintf("%g", $g);
2211
2212            $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g);
2213            $ok16 = $sprintf_g eq $string_g;
2214        }
2215        {
2216            no locale;
2217            $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
2218        }
2219        $ok18 = $j eq sprintf("%g:%g", $h, $i);
2220    }
2221
2222    $ok19 = $ok20 = 1;
2223    if (locales_enabled('LC_TIME')) {
2224        if (setlocale(&POSIX::LC_TIME, $Locale)) { # These tests aren't
2225                                                   # affected by
2226                                                   # :not_characters
2227            my @times = CORE::localtime();
2228
2229            use locale;
2230            $ok19 = POSIX::strftime("%p", @times) ne "%p"; # [perl #119425]
2231            my $date = POSIX::strftime("'%A'  '%B'  '%Z'  '%p'", @times);
2232            debug("'Day' 'Month' 'TZ' 'am/pm' = ", disp_str($date));
2233
2234            # If there is any non-ascii, it better be UTF-8 in a UTF-8 locale,
2235            # and not UTF-8 if the locale isn't UTF-8.
2236            $ok20 = $date =~ / ^ \p{ASCII}+ $ /x
2237                    || $is_utf8_locale == utf8::is_utf8($date);
2238        }
2239    }
2240
2241    $ok21 = 1;
2242    if (locales_enabled('LC_MESSAGES')) {
2243        foreach my $err (keys %!) {
2244            no locale;
2245            use Errno;
2246            $! = eval "&Errno::$err";   # Convert to strerror() output
2247            my $strerror = "$!";
2248            if ($strerror =~ /\P{ASCII}/) {
2249                $ok21 = 0;
2250                debug(disp_str("non-ASCII strerror=$strerror"));
2251                last;
2252            }
2253        }
2254    }
2255
2256    report_result($Locale, ++$locales_test_number, $ok1);
2257    $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results';
2258    my $first_a_test = $locales_test_number;
2259
2260    debug "$first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n";
2261
2262    report_result($Locale, ++$locales_test_number, $ok2);
2263    $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results';
2264
2265    my $first_c_test = $locales_test_number;
2266
2267    $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant';
2268    report_result($Locale, $locales_test_number, $ok3);
2269    $problematical_tests{$locales_test_number} = 1;
2270
2271    $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar';
2272    report_result($Locale, $locales_test_number, $ok4);
2273    $problematical_tests{$locales_test_number} = 1;
2274
2275    report_result($Locale, ++$locales_test_number, $ok5);
2276    $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf';
2277    $problematical_tests{$locales_test_number} = 1;
2278
2279    debug "$first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n";
2280
2281    report_result($Locale, ++$locales_test_number, $ok6);
2282    $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block';
2283    my $first_e_test = $locales_test_number;
2284
2285    report_result($Locale, ++$locales_test_number, $ok7);
2286    $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale';
2287
2288    $test_names{++$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale';
2289    report_result($Locale, $locales_test_number, $ok8);
2290    $problematical_tests{$locales_test_number} = 1;
2291
2292    debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n";
2293
2294    report_result($Locale, ++$locales_test_number, $ok9);
2295    $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant';
2296    $problematical_tests{$locales_test_number} = 1;
2297    my $first_f_test = $locales_test_number;
2298
2299    report_result($Locale, ++$locales_test_number, $ok10);
2300    $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar';
2301    $problematical_tests{$locales_test_number} = 1;
2302
2303    $test_names{++$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf';
2304    report_result($Locale, $locales_test_number, $ok11);
2305    $problematical_tests{$locales_test_number} = 1;
2306
2307    report_result($Locale, ++$locales_test_number, $ok12);
2308    $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric';
2309    $problematical_tests{$locales_test_number} = 1;
2310
2311    report_result($Locale, ++$locales_test_number, $ok13);
2312    $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot';
2313    $problematical_tests{$locales_test_number} = 1;
2314
2315    report_result($Locale, ++$locales_test_number, $ok14);
2316    $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8';
2317
2318    report_result($Locale, ++$locales_test_number, $ok14_5);
2319    $test_names{$locales_test_number} = '... and are ASCII outside "use locale"';
2320
2321    report_result($Locale, ++$locales_test_number, $ok15);
2322    $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification';
2323    $problematical_tests{$locales_test_number} = 1;
2324
2325    report_result($Locale, ++$locales_test_number, $ok16);
2326    $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8';
2327    $problematical_tests{$locales_test_number} = 1;
2328
2329    report_result($Locale, ++$locales_test_number, $ok17);
2330    $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix';
2331
2332    report_result($Locale, ++$locales_test_number, $ok18);
2333    $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix';
2334    $problematical_tests{$locales_test_number} = 1;
2335
2336    report_result($Locale, ++$locales_test_number, $ok19);
2337    $test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty';
2338
2339    report_result($Locale, ++$locales_test_number, $ok20);
2340    $test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set';
2341    $problematical_tests{$locales_test_number} = 1;   # This is broken in
2342                                                      # OS X 10.9.3
2343
2344    report_result($Locale, ++$locales_test_number, $ok21);
2345    $test_names{$locales_test_number} = '"$!" is ASCII-only outside of locale scope';
2346
2347    debug "$first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n";
2348
2349    # Does taking lc separately differ from taking
2350    # the lc "in-line"?  (This was the bug 19990704.002 (#965), change #3568.)
2351    # The bug was in the caching of the 'o'-magic.
2352    if (! $is_utf8_locale) {
2353	use locale;
2354
2355	sub lcA {
2356	    my $lc0 = lc $_[0];
2357	    my $lc1 = lc $_[1];
2358	    return $lc0 cmp $lc1;
2359	}
2360
2361        sub lcB {
2362	    return lc($_[0]) cmp lc($_[1]);
2363	}
2364
2365        my $x = "ab";
2366        my $y = "aa";
2367        my $z = "AB";
2368
2369        report_result($Locale, ++$locales_test_number,
2370		    lcA($x, $y) == 1 && lcB($x, $y) == 1 ||
2371		    lcA($x, $z) == 0 && lcB($x, $z) == 0);
2372    }
2373    else {
2374	use locale ':not_characters';
2375
2376	sub lcC {
2377	    my $lc0 = lc $_[0];
2378	    my $lc1 = lc $_[1];
2379	    return $lc0 cmp $lc1;
2380	}
2381
2382        sub lcD {
2383	    return lc($_[0]) cmp lc($_[1]);
2384	}
2385
2386        my $x = "ab";
2387        my $y = "aa";
2388        my $z = "AB";
2389
2390        report_result($Locale, ++$locales_test_number,
2391		    lcC($x, $y) == 1 && lcD($x, $y) == 1 ||
2392		    lcC($x, $z) == 0 && lcD($x, $z) == 0);
2393    }
2394    $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp';
2395
2396    # Does lc of an UPPER (if different from the UPPER) match
2397    # case-insensitively the UPPER, and does the UPPER match
2398    # case-insensitively the lc of the UPPER.  And vice versa.
2399    {
2400        use locale;
2401        no utf8;
2402        my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
2403
2404        my @f = ();
2405        ++$locales_test_number;
2406        $test_names{$locales_test_number} = 'Verify case insensitive matching works';
2407        foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
2408            if (! $is_utf8_locale) {
2409                my $y = lc $x;
2410                next unless uc $y eq $x;
2411                debug_more( "UPPER=", disp_chars(($x)),
2412                            "; lc=", disp_chars(($y)), "; ",
2413                            "; fc=", disp_chars((fc $x)), "; ",
2414                            disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2415                            $x =~ /\Q$y/i ? 1 : 0,
2416                            "; ",
2417                            disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2418                            $y =~ /\Q$x/i ? 1 : 0,
2419                            "\n");
2420                #
2421                # If $x and $y contain regular expression characters
2422                # AND THEY lowercase (/i) to regular expression characters,
2423                # regcomp() will be mightily confused.  No, the \Q doesn't
2424                # help here (maybe regex engine internal lowercasing
2425                # is done after the \Q?)  An example of this happening is
2426                # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
2427                # the chr(173) (the "[") is the lowercase of the chr(235).
2428                #
2429                # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
2430                # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
2431                # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
2432                # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
2433                # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
2434                # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
2435                #
2436                # Similar things can happen even under (bastardised)
2437                # non-EBCDIC locales: in many European countries before the
2438                # advent of ISO 8859-x nationally customised versions of
2439                # ISO 646 were devised, reusing certain punctuation
2440                # characters for modified characters needed by the
2441                # country/language.  For example, the "|" might have
2442                # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
2443                #
2444                if ($x =~ $re || $y =~ $re) {
2445                    print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2446                    next;
2447                }
2448                push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2449
2450                # fc is not a locale concept, so Perl uses lc for it.
2451                push @f, $x unless lc $x eq fc $x;
2452            }
2453            else {
2454                use locale ':not_characters';
2455                my $y = lc $x;
2456                next unless uc $y eq $x;
2457                debug_more( "UPPER=", disp_chars(($x)),
2458                            "; lc=", disp_chars(($y)), "; ",
2459                            "; fc=", disp_chars((fc $x)), "; ",
2460                            disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2461                            $x =~ /\Q$y/i ? 1 : 0,
2462                            "; ",
2463                            disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2464                            $y =~ /\Q$x/i ? 1 : 0,
2465                            "\n");
2466
2467                push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2468
2469                # The places where Unicode's lc is different from fc are
2470                # skipped here by virtue of the 'next unless uc...' line above
2471                push @f, $x unless lc $x eq fc $x;
2472            }
2473        }
2474
2475	foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
2476            if (! $is_utf8_locale) {
2477                my $y = uc $x;
2478                next unless lc $y eq $x;
2479                debug_more( "lower=", disp_chars(($x)),
2480                            "; uc=", disp_chars(($y)), "; ",
2481                            "; fc=", disp_chars((fc $x)), "; ",
2482                            disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2483                            $x =~ /\Q$y/i ? 1 : 0,
2484                            "; ",
2485                            disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2486                            $y =~ /\Q$x/i ? 1 : 0,
2487                            "\n");
2488                if ($x =~ $re || $y =~ $re) { # See above.
2489                    print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2490                    next;
2491                }
2492                push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2493
2494                push @f, $x unless lc $x eq fc $x;
2495            }
2496            else {
2497                use locale ':not_characters';
2498                my $y = uc $x;
2499                next unless lc $y eq $x;
2500                debug_more( "lower=", disp_chars(($x)),
2501                            "; uc=", disp_chars(($y)), "; ",
2502                            "; fc=", disp_chars((fc $x)), "; ",
2503                            disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2504                            $x =~ /\Q$y/i ? 1 : 0,
2505                            "; ",
2506                            disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2507                            $y =~ /\Q$x/i ? 1 : 0,
2508                            "\n");
2509                push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2510
2511                push @f, $x unless lc $x eq fc $x;
2512            }
2513	}
2514	report_multi_result($Locale, $locales_test_number, \@f);
2515        $problematical_tests{$locales_test_number} = 1;
2516    }
2517
2518    # [perl #109318]
2519    {
2520        my @f = ();
2521        ++$locales_test_number;
2522        $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent';
2523        $problematical_tests{$locales_test_number} = 1;
2524
2525        my $radix = langinfo(RADIXCHAR);
2526        my @nums = (
2527             "3.14e+9",  "3${radix}14e+9",  "3.14e-9",  "3${radix}14e-9",
2528            "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9",
2529        );
2530
2531        if (! $is_utf8_locale) {
2532            use locale;
2533            for my $num (@nums) {
2534                push @f, $num
2535                    unless sprintf("%g", $num) =~ /3.+14/;
2536            }
2537        }
2538        else {
2539            use locale ':not_characters';
2540            for my $num (@nums) {
2541                push @f, $num
2542                    unless sprintf("%g", $num) =~ /3.+14/;
2543            }
2544        }
2545
2546        report_result($Locale, $locales_test_number, @f == 0);
2547        if (@f) {
2548            print "# failed $locales_test_number locale '$Locale' numbers @f\n"
2549	}
2550    }
2551
2552    {
2553        my @f = ();
2554        ++$locales_test_number;
2555        $test_names{$locales_test_number} =
2556                 'Verify ALT_DIGITS returns nothing, or else non-ASCII and'
2557               . ' the single char digits evaluate to consecutive integers'
2558               . ' starting at 0; 0 is accepted for alt-0 for locales without'
2559               . ' a zero';
2560
2561        my $alts = langinfo(ALT_DIGITS);
2562        if ($alts) {
2563            my @alts = split ';', $alts;
2564            my $prev = -1;
2565            foreach my $num (@alts) {
2566                if ($num =~ /[[:ascii:]]/) {
2567                    if ($prev != -1 || $num != 0) {
2568                        push @f, disp_str($num);
2569                        last;
2570                    }
2571                }
2572
2573                # We only look at single character strings; likely locales
2574                # that have alternate digits have a different mechanism for
2575                # representing larger numbers.  Japanese for example, has a
2576                # single character for the number 10, which is prefixed to the
2577                # '1' symbol for '11', etc.  And 21 is represented by 3
2578                # characters, the '2' symbol, followed by the '10' symbol,
2579                # then the '1' symbol.  (There is nothing to say that a locale
2580                # even has to use base 10.)
2581                last if length $num > 1;
2582
2583                use Unicode::UCD 'num';
2584                my $value = num($num);
2585                if ($value != $prev + 1) {
2586                    push @f, disp_str($num);
2587                    last;
2588                }
2589
2590                $prev = $value;
2591            }
2592        }
2593
2594        report_result($Locale, $locales_test_number, @f == 0);
2595        if (@f) {
2596            print "# failed $locales_test_number locale '$Locale' numbers @f\n"
2597	}
2598    }
2599}
2600
2601my $final_locales_test_number = $locales_test_number;
2602
2603# Recount the errors.
2604
2605TEST_NUM:
2606foreach $test_num ($first_locales_test_number..$final_locales_test_number) {
2607    my $has_non_global_failure = $Problem{$test_num}
2608                            || ! defined $Okay{$test_num}
2609                            || ! @{$Okay{$test_num}};
2610    print "not " if $has_non_global_failure;
2611    print "ok $test_num";
2612    $test_names{$test_num} = "" unless defined $test_names{$test_num};
2613
2614    # If TODO is in the test name, make it thus
2615    my $todo = $test_names{$test_num} =~ s/\s*TODO\s*//;
2616    print " $test_names{$test_num}";
2617    if ($todo) {
2618        print " # TODO\n";
2619    }
2620    elsif (! $has_non_global_failure) {
2621        print "\n";
2622    }
2623    elsif ($has_non_global_failure) {
2624
2625        # If there are any locales that pass this test, or are known-bad, it
2626        # may be that there are enough passes that we TODO the failure, but
2627        # only for tests that we have decided can be problematical.
2628        if (  ($Okay{$test_num} || $Known_bad_locale{$test_num})
2629            && grep { $_ == $test_num } keys %problematical_tests)
2630        {
2631            # Don't count the known-bad failures when calculating the
2632            # percentage that fail.
2633            my $known_failures = (exists $Known_bad_locale{$test_num})
2634                                  ? scalar(keys $Known_bad_locale{$test_num}->%*)
2635                                  : 0;
2636            my $adjusted_failures = scalar(keys $Problem{$test_num}->%*)
2637                                    - $known_failures;
2638
2639            # Specially handle failures where only known-bad locales fail.
2640            # This makes the diagnositics clearer.
2641            if ($adjusted_failures <= 0) {
2642                print " # TODO fails only on known bad locales: ",
2643                      join " ", keys $Known_bad_locale{$test_num}->%*, "\n";
2644                next TEST_NUM;
2645            }
2646
2647            # Round to nearest .1%
2648            my $percent_fail = (int(.5 + (1000 * $adjusted_failures
2649                                          / scalar(@Locale))))
2650                               / 10;
2651            $todo = $percent_fail < $acceptable_failure_percentage;
2652            print " # TODO" if $todo;
2653            print "\n";
2654
2655            if ($debug) {
2656                print "# $percent_fail% of locales (",
2657                      scalar(keys $Problem{$test_num}->%*),
2658                      " of ",
2659                      scalar(@Locale),
2660                      ") fail the above test (TODO cut-off is ",
2661                      $acceptable_failure_percentage,
2662                      "%)\n";
2663            }
2664            elsif ($todo) {
2665                print "# ", 100 - $percent_fail, "% of locales not known to be problematic on this platform\n";
2666                print "# pass the above test, so it is likely that the failures\n";
2667                print "# are errors in the locale definitions.  The test is marked TODO, as the\n";
2668                print "# problem is not likely to be Perl's\n";
2669            }
2670        }
2671
2672        if ($debug) {
2673            print "# The code points that had this failure are given above.  Look for lines\n";
2674            print "# that match 'failed $test_num'\n";
2675        }
2676        else {
2677            print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2678            print "# Then look at that output for lines that match 'failed $test_num'\n";
2679        }
2680	if (defined $not_necessarily_a_problem_test_number
2681            && $test_num == $not_necessarily_a_problem_test_number)
2682        {
2683	    print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n";
2684	    print "# It usually indicates a problem in the environment,\n";
2685	    print "# not in Perl itself.\n";
2686	}
2687    }
2688}
2689
2690$test_num = $final_locales_test_number;
2691
2692if ( ! defined $Config{d_setlocale_accepts_any_locale_name}) {
2693    # perl #115808
2694    use warnings;
2695    my $warned = 0;
2696    local $SIG{__WARN__} = sub {
2697        $warned = $_[0] =~ /uninitialized/;
2698    };
2699    my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy");
2700    ok($warned, "variable set to setlocale(\"invalid locale name\") is considered uninitialized");
2701}
2702
2703# Test that tainting and case changing works on utf8 strings.  These tests are
2704# placed last to avoid disturbing the hard-coded test numbers that existed at
2705# the time these were added above this in this file.
2706# This also tests that locale overrides unicode_strings in the same scope for
2707# non-utf8 strings.
2708setlocale(&POSIX::LC_ALL, "C");
2709{
2710    use locale;
2711    use feature 'unicode_strings';
2712
2713    foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") {
2714        my @list;   # List of code points to test for $function
2715
2716        # Used to calculate the changed case for ASCII characters by using the
2717        # ord, instead of using one of the functions under test.
2718        my $ascii_case_change_delta;
2719        my $above_latin1_case_change_delta; # Same for the specific ords > 255
2720                                            # that we use
2721
2722        # We test an ASCII character, which should change case;
2723        # a Latin1 character, which shouldn't change case under this C locale,
2724        # an above-Latin1 character that when the case is changed would cross
2725        #   the 255/256 boundary, so doesn't change case
2726        #   (the \x{149} is one of these, but changes into 2 characters, the
2727        #   first one of which doesn't cross the boundary.
2728        # the final one in each list is an above-Latin1 character whose case
2729        #   does change.  The code below uses its position in its list as a
2730        #   marker to indicate that it, unlike the other code points above
2731        #   ASCII, has a successful case change
2732        #
2733        # All casing operations under locale (but not :not_characters) should
2734        # taint
2735        if ($function =~ /^u/) {
2736            @list = ("", "a",
2737                     chr(utf8::unicode_to_native(0xe0)),
2738                     chr(utf8::unicode_to_native(0xff)),
2739                     "\x{fb00}", "\x{149}", "\x{101}");
2740            $ascii_case_change_delta = ($is_ebcdic) ? +64 : -32;
2741            $above_latin1_case_change_delta = -1;
2742        }
2743        else {
2744            @list = ("", "A",
2745                     chr(utf8::unicode_to_native(0xC0)),
2746                     "\x{17F}", "\x{100}");
2747            $ascii_case_change_delta = ($is_ebcdic) ? -64 : +32;
2748            $above_latin1_case_change_delta = +1;
2749        }
2750        foreach my $is_utf8_locale (0 .. 1) {
2751            foreach my $j (0 .. $#list) {
2752                my $char = $list[$j];
2753
2754                for my $encoded_in_utf8 (0 .. 1) {
2755                    my $should_be;
2756                    my $changed;
2757                    if (! $is_utf8_locale) {
2758                        no warnings 'locale';
2759                        $should_be = ($j == $#list)
2760                            ? chr(ord($char) + $above_latin1_case_change_delta)
2761                            : (length $char == 0 || utf8::native_to_unicode(ord($char)) > 127)
2762                              ? $char
2763                              : chr(ord($char) + $ascii_case_change_delta);
2764
2765                        # This monstrosity is in order to avoid using an eval,
2766                        # which might perturb the results
2767                        $changed = ($function eq "uc")
2768                                    ? uc($char)
2769                                    : ($function eq "ucfirst")
2770                                      ? ucfirst($char)
2771                                      : ($function eq "lc")
2772                                        ? lc($char)
2773                                        : ($function eq "lcfirst")
2774                                          ? lcfirst($char)
2775                                          : ($function eq "fc")
2776                                            ? fc($char)
2777                                            : die("Unexpected function \"$function\"");
2778                    }
2779                    else {
2780                        {
2781                            no locale;
2782
2783                            # For utf8-locales the case changing functions
2784                            # should work just like they do outside of locale.
2785                            # Can use eval here because not testing it when
2786                            # not in locale.
2787                            $should_be = eval "$function('$char')";
2788                            die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if  $@;
2789
2790                        }
2791                        use locale ':not_characters';
2792                        $changed = ($function eq "uc")
2793                                    ? uc($char)
2794                                    : ($function eq "ucfirst")
2795                                      ? ucfirst($char)
2796                                      : ($function eq "lc")
2797                                        ? lc($char)
2798                                        : ($function eq "lcfirst")
2799                                          ? lcfirst($char)
2800                                          : ($function eq "fc")
2801                                            ? fc($char)
2802                                            : die("Unexpected function \"$function\"");
2803                    }
2804                    ok($changed eq $should_be,
2805                        "$function(\"$char\") in C locale "
2806                        . (($is_utf8_locale)
2807                            ? "(use locale ':not_characters'"
2808                            : "(use locale")
2809                        . (($encoded_in_utf8)
2810                            ? "; encoded in utf8)"
2811                            : "; not encoded in utf8)")
2812                        . " should be \"$should_be\", got \"$changed\"");
2813
2814                    # Tainting shouldn't happen for use locale :not_character
2815                    # (a utf8 locale)
2816                    (! $is_utf8_locale)
2817                    ? check_taint($changed)
2818                    : check_taint_not($changed);
2819
2820                    # Use UTF-8 next time through the loop
2821                    utf8::upgrade($char);
2822                }
2823            }
2824        }
2825    }
2826}
2827
2828# Give final advice.
2829
2830my $didwarn = 0;
2831
2832if (%setlocale_failed) {
2833    print "#\nsetlocale() failed for these locales:\n";
2834    for my $locale (keys %setlocale_failed) {
2835        print "#\t$locale\n";
2836    }
2837    print "#\n";
2838    $didwarn = 1;
2839}
2840
2841foreach ($first_locales_test_number..$final_locales_test_number) {
2842    if ($Problem{$_}) {
2843	my @f = sort keys %{ $Problem{$_} };
2844
2845        # Don't list the failures caused by known-bad locales.
2846        if (exists $known_bad_locales{$os}) {
2847            @f = grep { $_ !~ $known_bad_locales{$os} } @f;
2848            next unless @f;
2849        }
2850	my $f = join(" ", @f);
2851	$f =~ s/(.{50,60}) /$1\n#\t/g;
2852	print
2853	    "#\n",
2854            "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n",
2855	    "#\t", $f, "\n#\n",
2856	    "# on your system may have errors because the locale test $_\n",
2857	    "# \"$test_names{$_}\"\n",
2858            "# failed in ", (@f == 1 ? "that locale" : "those locales"),
2859            ".\n";
2860	print <<EOW;
2861#
2862# If your users are not using these locales you are safe for the moment,
2863# but please report this failure first to perlbug\@perl.org using the
2864# perlbug script (as described in the INSTALL file) so that the exact
2865# details of the failures can be sorted out first and then your operating
2866# system supplier can be alerted about these anomalies.
2867#
2868EOW
2869	$didwarn = 1;
2870    }
2871}
2872
2873# Tell which locales were okay and which were not.
2874
2875if ($didwarn) {
2876    my (@s, @F);
2877
2878    foreach my $l (@Locale) {
2879	my $p = 0;
2880        if ($setlocale_failed{$l}) {
2881            $p++;
2882        }
2883        else {
2884            foreach my $t
2885                        ($first_locales_test_number..$final_locales_test_number)
2886            {
2887                $p++ if $Problem{$t}{$l};
2888            }
2889	}
2890	push @s, $l if $p == 0;
2891        push @F, $l unless $p == 0;
2892    }
2893
2894    if (@s) {
2895        my $s = join(" ", @s);
2896        $s =~ s/(.{50,60}) /$1\n#\t/g;
2897
2898        print
2899            "# The following locales\n#\n",
2900            "#\t", $s, "\n#\n",
2901	    "# tested okay.\n#\n",
2902    } else {
2903        print "# None of your locales were fully okay.\n";
2904    }
2905
2906    if (@F) {
2907        my $F = join(" ", @F);
2908        $F =~ s/(.{50,60}) /$1\n#\t/g;
2909
2910        my $details = "";
2911        unless ($debug) {
2912            $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2913        }
2914        elsif ($debug == 1) {
2915            $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n";
2916        }
2917
2918        print
2919          "# The following locales\n#\n",
2920          "#\t", $F, "\n#\n",
2921          "# had problems.\n#\n",
2922          $details;
2923    } else {
2924        print "# None of your locales were broken.\n";
2925    }
2926}
2927
2928if (exists $known_bad_locales{$os} && ! %Known_bad_locale) {
2929    $test_num++;
2930    print "ok $test_num $^O no longer has known bad locales # TODO\n";
2931}
2932
2933print "1..$test_num\n";
2934
2935# eof
2936