1#!./perl -wT 2 3use strict; 4use warnings; 5use Config; 6 7# This tests plain 'use locale' and adorned 'use locale ":not_characters"' 8# Because these pragmas are compile time, and I (khw) am trying to test 9# without using 'eval' as much as possible, which might cloud the issue, the 10# crucial parts of the code are duplicated in a block for each pragma. 11 12# Unfortunately, many systems have defective locale definitions. This test 13# file looks for both perl bugs and bugs in the system's locale definitions. 14# It can be difficult to tease apart which is which. For the latter, there 15# are tests that are based on the POSIX standard. A character isn't supposed 16# to be both a space and graphic, for example. Another example is if a 17# character is the uppercase of another, that other should be the lowercase of 18# the first. Including tests for these allows you to test for defective 19# locales, as described in perllocale. The way this file distinguishes 20# between defective locales, and perl bugs is to see what percentage of 21# locales fail a given test. If it's a lot, then it's more likely to be a 22# perl bug; only a few, those particular locales are likely defective. In 23# that case the failing tests are marked TODO. (They should be reported to 24# the vendor, however; but it's not perl's problem.) In some cases, this 25# script has caused tickets to be filed against perl which turn out to be the 26# platform's bug, but a higher percentage of locales are failing than the 27# built-in cut-off point. For those platforms, code has been added to 28# increase the cut-off, so those platforms don't trigger failing test reports. 29# Ideally, the platforms would get fixed and that code would be changed to 30# only kick-in when run on versions that are earlier than the fixed one. But, 31# this rarely happens in practice. 32 33# To make a TODO test, add the string 'TODO' to its %test_names value 34 35my $is_ebcdic = ord("A") == 193; 36my $os = lc $^O; 37 38# Configure now lets you build a perl that silently ignores taint features 39my $NoTaintSupport = exists($Config{taint_support}) && !$Config{taint_support}; 40 41no warnings 'locale'; # We test even weird locales; and do some scary things 42 # in ok locales 43 44binmode STDOUT, ':utf8'; 45binmode STDERR, ':utf8'; 46 47BEGIN { 48 chdir 't' if -d 't'; 49 @INC = '../lib'; 50 unshift @INC, '.'; 51 require './loc_tools.pl'; 52 unless (locales_enabled('LC_CTYPE')) { 53 print "1..0\n"; 54 exit; 55 } 56 $| = 1; 57 require Config; import Config; 58} 59 60use feature 'fc'; 61my @langinfo; 62BEGIN { 63 @langinfo = qw( 64 CODESET 65 RADIXCHAR 66 THOUSEP 67 CRNCYSTR 68 ALT_DIGITS 69 YESEXPR 70 YESSTR 71 NOEXPR 72 NOSTR 73 ERA 74 ABDAY_1 75 DAY_1 76 ABMON_1 77 MON_1 78 AM_STR 79 PM_STR 80 D_FMT 81 D_T_FMT 82 ERA_D_FMT 83 ERA_D_T_FMT 84 ERA_T_FMT 85 T_FMT 86 T_FMT_AMPM 87 _NL_ADDRESS_POSTAL_FMT 88 _NL_ADDRESS_COUNTRY_NAME 89 _NL_ADDRESS_COUNTRY_POST 90 _NL_ADDRESS_COUNTRY_AB2 91 _NL_ADDRESS_COUNTRY_AB3 92 _NL_ADDRESS_COUNTRY_CAR 93 _NL_ADDRESS_COUNTRY_NUM 94 _NL_ADDRESS_COUNTRY_ISBN 95 _NL_ADDRESS_LANG_NAME 96 _NL_ADDRESS_LANG_AB 97 _NL_ADDRESS_LANG_TERM 98 _NL_ADDRESS_LANG_LIB 99 _NL_IDENTIFICATION_TITLE 100 _NL_IDENTIFICATION_SOURCE 101 _NL_IDENTIFICATION_ADDRESS 102 _NL_IDENTIFICATION_CONTACT 103 _NL_IDENTIFICATION_EMAIL 104 _NL_IDENTIFICATION_TEL 105 _NL_IDENTIFICATION_FAX 106 _NL_IDENTIFICATION_LANGUAGE 107 _NL_IDENTIFICATION_TERRITORY 108 _NL_IDENTIFICATION_AUDIENCE 109 _NL_IDENTIFICATION_APPLICATION 110 _NL_IDENTIFICATION_ABBREVIATION 111 _NL_IDENTIFICATION_REVISION 112 _NL_IDENTIFICATION_DATE 113 _NL_IDENTIFICATION_CATEGORY 114 _NL_MEASUREMENT_MEASUREMENT 115 _NL_NAME_NAME_FMT 116 _NL_NAME_NAME_GEN 117 _NL_NAME_NAME_MR 118 _NL_NAME_NAME_MRS 119 _NL_NAME_NAME_MISS 120 _NL_NAME_NAME_MS 121 _NL_PAPER_HEIGHT 122 _NL_PAPER_WIDTH 123 _NL_TELEPHONE_TEL_INT_FMT 124 _NL_TELEPHONE_TEL_DOM_FMT 125 _NL_TELEPHONE_INT_SELECT 126 _NL_TELEPHONE_INT_PREFIX 127 ); 128} 129 130use I18N::Langinfo 'langinfo', @langinfo; 131 132# =1 adds debugging output; =2 increases the verbosity somewhat 133our $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0; 134 135# Certain tests have been shown to be problematical for a few locales. Don't 136# fail them unless at least this percentage of the tested locales fail. 137# EBCDIC os390 has more locales fail than normal, because it has locales that 138# move various critical characters like '['. 139my $acceptable_failure_percentage = ($os =~ / ^ ( os390 ) $ /x) 140 ? 10 141 : 5; 142 143# The list of test numbers of the problematic tests. 144my %problematical_tests; 145 146# If any %problematical_tests fails in one of these locales, it is 147# considered a TODO. 148my %known_bad_locales = ( 149 irix => qr/ ^ (?: cs | hu | sk ) $/x, 150 darwin => qr/ ^ lt_LT.ISO8859 /ix, 151 os390 => qr/ ^ italian /ix, 152 netbsd => qr/\bISO8859-2\b/i, 153 154 # This may be the same bug as the cygwin below; it's 155 # generating malformed UTF-8 on the radix being 156 # mulit-byte 157 solaris => qr/ ^ ( ar_ | pa_ ) /x, 158 ); 159 160# cygwin isn't returning proper radix length in this locale, but supposedly to 161# be fixed in later versions. 162if ($os eq 'cygwin' && version->new(($Config{osvers} =~ /^(\d+(?:\.\d+)+)/)[0]) le v2.4.1) { 163 $known_bad_locales{'cygwin'} = qr/ ^ ps_AF /ix; 164} 165 166use Dumpvalue; 167 168my $dumper = Dumpvalue->new( 169 tick => qq{"}, 170 quoteHighBit => 0, 171 unctrl => "quote" 172 ); 173 174sub debug { 175 return unless $debug; 176 my($mess) = join "", '# ', @_; 177 chomp $mess; 178 print STDERR $dumper->stringify($mess,1), "\n"; 179} 180 181sub note { 182 local $debug = 1; 183 debug @_; 184} 185 186sub debug_more { 187 return unless $debug > 1; 188 return debug(@_); 189} 190 191sub debugf { 192 printf STDERR @_ if $debug; 193} 194 195$a = 'abc %9'; 196 197my $test_num = 0; 198 199sub ok { 200 my ($result, $message) = @_; 201 $message = "" unless defined $message; 202 203 print 'not ' unless ($result); 204 print "ok " . ++$test_num; 205 print " $message"; 206 print "\n"; 207 return ($result) ? 1 : 0; 208} 209 210sub skip { 211 return ok 1, "skipped: " . shift; 212} 213 214sub fail { 215 return ok 0, shift; 216} 217 218# First we'll do a lot of taint checking for locales. 219# This is the easiest to test, actually, as any locale, 220# even the default locale will taint under 'use locale'. 221 222sub is_tainted { # hello, camel two. 223 no warnings 'uninitialized' ; 224 my $dummy; 225 local $@; 226 not eval { $dummy = join("", @_), kill 0; 1 } 227} 228 229sub check_taint ($;$) { 230 my $message_tail = $_[1] // ""; 231 232 # Extra blanks are so aligns with taint_not output 233 $message_tail = ": $message_tail" if $message_tail; 234 if ($NoTaintSupport) { 235 skip("your perl was built without taint support"); 236 } 237 else { 238 ok is_tainted($_[0]), "verify that is tainted$message_tail"; 239 } 240} 241 242sub check_taint_not ($;$) { 243 my $message_tail = $_[1] // ""; 244 $message_tail = ": $message_tail" if $message_tail; 245 ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail"); 246} 247 248foreach my $category (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) { 249 my $short_result = locales_enabled($category); 250 ok ($short_result == 0 || $short_result == 1, 251 "Verify locales_enabled('$category') returns 0 or 1"); 252 debug("locales_enabled('$category') returned '$short_result'"); 253 my $long_result = locales_enabled("LC_$category"); 254 if (! ok ($long_result == $short_result, 255 " and locales_enabled('LC_$category') returns " 256 . "the same value") 257 ) { 258 debug("locales_enabled('LC_$category') returned $long_result"); 259 } 260} 261 262"\tb\t" =~ /^m?(\s)(.*)\1$/; 263check_taint_not $&, "not tainted outside 'use locale'"; 264; 265 266use locale; # engage locale and therefore locale taint. 267 268# BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for 269# ":notcharacters" 270 271check_taint_not $a, '$a'; 272 273check_taint uc($a), 'uc($a)'; 274check_taint "\U$a", '"\U$a"'; 275check_taint ucfirst($a), 'ucfirst($a)'; 276check_taint "\u$a", '"\u$a"'; 277check_taint lc($a), 'lc($a)'; 278check_taint fc($a), 'fc($a)'; 279check_taint "\L$a", '"\L$a"'; 280check_taint "\F$a", '"\F$a"'; 281check_taint lcfirst($a), 'lcfirst($a)'; 282check_taint "\l$a", '"\l$a"'; 283 284check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)"; 285check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)"; 286check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)"; 287check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)"; 288check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)"; 289 290$_ = $a; # untaint $_ 291 292$_ = uc($a); # taint $_ 293 294check_taint $_, '$_ = uc($a)'; 295 296/(\w)/; # taint $&, $`, $', $+, $1. 297check_taint $&, "\$& from /(\\w)/"; 298check_taint $`, "\t\$`"; 299check_taint $', "\t\$'"; 300check_taint $+, "\t\$+"; 301check_taint $1, "\t\$1"; 302check_taint_not $2, "\t\$2"; 303 304/(.)/; # untaint $&, $`, $', $+, $1. 305check_taint_not $&, "\$& from /(.)/"; 306check_taint_not $`, "\t\$`"; 307check_taint_not $', "\t\$'"; 308check_taint_not $+, "\t\$+"; 309check_taint_not $1, "\t\$1"; 310check_taint_not $2, "\t\$2"; 311 312/(\W)/; # taint $&, $`, $', $+, $1. 313check_taint $&, "\$& from /(\\W)/"; 314check_taint $`, "\t\$`"; 315check_taint $', "\t\$'"; 316check_taint $+, "\t\$+"; 317check_taint $1, "\t\$1"; 318check_taint_not $2, "\t\$2"; 319 320/(.)/; # untaint $&, $`, $', $+, $1. 321check_taint_not $&, "\$& from /(.)/"; 322check_taint_not $`, "\t\$`"; 323check_taint_not $', "\t\$'"; 324check_taint_not $+, "\t\$+"; 325check_taint_not $1, "\t\$1"; 326check_taint_not $2, "\t\$2"; 327 328/(\s)/; # taint $&, $`, $', $+, $1. 329check_taint $&, "\$& from /(\\s)/"; 330check_taint $`, "\t\$`"; 331check_taint $', "\t\$'"; 332check_taint $+, "\t\$+"; 333check_taint $1, "\t\$1"; 334check_taint_not $2, "\t\$2"; 335 336/(.)/; # untaint $&, $`, $', $+, $1. 337check_taint_not $&, "\$& from /(.)/"; 338 339/(\S)/; # taint $&, $`, $', $+, $1. 340check_taint $&, "\$& from /(\\S)/"; 341check_taint $`, "\t\$`"; 342check_taint $', "\t\$'"; 343check_taint $+, "\t\$+"; 344check_taint $1, "\t\$1"; 345check_taint_not $2, "\t\$2"; 346 347/(.)/; # untaint $&, $`, $', $+, $1. 348check_taint_not $&, "\$& from /(.)/"; 349 350"0" =~ /(\d)/; # taint $&, $`, $', $+, $1. 351check_taint $&, "\$& from /(\\d)/"; 352check_taint $`, "\t\$`"; 353check_taint $', "\t\$'"; 354check_taint $+, "\t\$+"; 355check_taint $1, "\t\$1"; 356check_taint_not $2, "\t\$2"; 357 358/(.)/; # untaint $&, $`, $', $+, $1. 359check_taint_not $&, "\$& from /(.)/"; 360 361/(\D)/; # taint $&, $`, $', $+, $1. 362check_taint $&, "\$& from /(\\D)/"; 363check_taint $`, "\t\$`"; 364check_taint $', "\t\$'"; 365check_taint $+, "\t\$+"; 366check_taint $1, "\t\$1"; 367check_taint_not $2, "\t\$2"; 368 369/(.)/; # untaint $&, $`, $', $+, $1. 370check_taint_not $&, "\$& from /(.)/"; 371 372/([[:alnum:]])/; # taint $&, $`, $', $+, $1. 373check_taint $&, "\$& from /([[:alnum:]])/"; 374check_taint $`, "\t\$`"; 375check_taint $', "\t\$'"; 376check_taint $+, "\t\$+"; 377check_taint $1, "\t\$1"; 378check_taint_not $2, "\t\$2"; 379 380/(.)/; # untaint $&, $`, $', $+, $1. 381check_taint_not $&, "\$& from /(.)/"; 382 383/([[:^alnum:]])/; # taint $&, $`, $', $+, $1. 384check_taint $&, "\$& from /([[:^alnum:]])/"; 385check_taint $`, "\t\$`"; 386check_taint $', "\t\$'"; 387check_taint $+, "\t\$+"; 388check_taint $1, "\t\$1"; 389check_taint_not $2, "\t\$2"; 390 391"a" =~ /(a)|(\w)/; # taint $&, $`, $', $+, $1. 392check_taint $&, "\$& from /(a)|(\\w)/"; 393check_taint $`, "\t\$`"; 394check_taint $', "\t\$'"; 395check_taint $+, "\t\$+"; 396check_taint $1, "\t\$1"; 397ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'"); 398ok(! defined $2, ("\t" x 5) . "\$2 is undefined"); 399check_taint_not $2, "\t\$2"; 400check_taint_not $3, "\t\$3"; 401 402/(.)/; # untaint $&, $`, $', $+, $1. 403check_taint_not $&, "\$& from /(.)/"; 404 405"\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; # no tainting because no locale dependence 406check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i"; 407check_taint_not $`, "\t\$`"; 408check_taint_not $', "\t\$'"; 409check_taint_not $+, "\t\$+"; 410check_taint_not $1, "\t\$1"; 411ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'"); 412check_taint_not $2, "\t\$2"; 413 414/(.)/; # untaint $&, $`, $', $+, $1. 415check_taint_not $&, "\$& from /./"; 416 417"(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; # taints because depends on locale 418check_taint $&, "\$& from /(\\N{KELVIN SIGN})/i"; 419check_taint $`, "\t\$`"; 420check_taint $', "\t\$'"; 421check_taint $+, "\t\$+"; 422check_taint $1, "\t\$1"; 423check_taint_not $2, "\t\$2"; 424 425/(.)/; # untaint $&, $`, $', $+, $1. 426check_taint_not $&, "\$& from /(.)/"; 427 428"a:" =~ /(.)\b(.)/; # taint $&, $`, $', $+, $1. 429check_taint $&, "\$& from /(.)\\b(.)/"; 430check_taint $`, "\t\$`"; 431check_taint $', "\t\$'"; 432check_taint $+, "\t\$+"; 433check_taint $1, "\t\$1"; 434check_taint $2, "\t\$2"; 435check_taint_not $3, "\t\$3"; 436 437/(.)/; # untaint $&, $`, $', $+, $1. 438check_taint_not $&, "\$& from /./"; 439 440"aa" =~ /(.)\B(.)/; # taint $&, $`, $', $+, $1. 441check_taint $&, "\$& from /(.)\\B(.)/"; 442check_taint $`, "\t\$`"; 443check_taint $', "\t\$'"; 444check_taint $+, "\t\$+"; 445check_taint $1, "\t\$1"; 446check_taint $2, "\t\$2"; 447check_taint_not $3, "\t\$3"; 448 449/(.)/; # untaint $&, $`, $', $+, $1. 450check_taint_not $&, "\$& from /./"; 451 452"aaa" =~ /(.).(\1)/i; # notaint because not locale dependent 453check_taint_not $&, "\$ & from /(.).(\\1)/"; 454check_taint_not $`, "\t\$`"; 455check_taint_not $', "\t\$'"; 456check_taint_not $+, "\t\$+"; 457check_taint_not $1, "\t\$1"; 458check_taint_not $2, "\t\$2"; 459check_taint_not $3, "\t\$3"; 460 461/(.)/; # untaint $&, $`, $', $+, $1. 462check_taint_not $&, "\$ & from /./"; 463 464$_ = $a; # untaint $_ 465 466check_taint_not $_, 'untainting $_ works'; 467 468/(b)/; # this must not taint 469check_taint_not $&, "\$ & from /(b)/"; 470check_taint_not $`, "\t\$`"; 471check_taint_not $', "\t\$'"; 472check_taint_not $+, "\t\$+"; 473check_taint_not $1, "\t\$1"; 474check_taint_not $2, "\t\$2"; 475 476$_ = $a; # untaint $_ 477 478check_taint_not $_, 'untainting $_ works'; 479 480$b = uc($a); # taint $b 481s/(.+)/$b/; # this must taint only the $_ 482 483check_taint $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted'; 484check_taint_not $&, "\t\$&"; 485check_taint_not $`, "\t\$`"; 486check_taint_not $', "\t\$'"; 487check_taint_not $+, "\t\$+"; 488check_taint_not $1, "\t\$1"; 489check_taint_not $2, "\t\$2"; 490 491$_ = $a; # untaint $_ 492 493s/(.+)/b/; # this must not taint 494check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/'; 495check_taint_not $&, "\t\$&"; 496check_taint_not $`, "\t\$`"; 497check_taint_not $', "\t\$'"; 498check_taint_not $+, "\t\$+"; 499check_taint_not $1, "\t\$1"; 500check_taint_not $2, "\t\$2"; 501 502$b = $a; # untaint $b 503 504($b = $a) =~ s/\w/$&/; 505check_taint $b, '$b from ($b = $a) =~ s/\w/$&/'; # $b should be tainted. 506check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; # $a should be not. 507 508$_ = $a; # untaint $_ 509 510s/(\w)/\l$1/; # this must taint 511check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint 512check_taint $&, "\t\$&"; 513check_taint $`, "\t\$`"; 514check_taint $', "\t\$'"; 515check_taint $+, "\t\$+"; 516check_taint $1, "\t\$1"; 517check_taint_not $2, "\t\$2"; 518 519$_ = $a; # untaint $_ 520 521s/(\w)/\L$1/; # this must taint 522check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,'; 523check_taint $&, "\t\$&"; 524check_taint $`, "\t\$`"; 525check_taint $', "\t\$'"; 526check_taint $+, "\t\$+"; 527check_taint $1, "\t\$1"; 528check_taint_not $2, "\t\$2"; 529 530$_ = $a; # untaint $_ 531 532s/(\w)/\u$1/; # this must taint 533check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/'; 534check_taint $&, "\t\$&"; 535check_taint $`, "\t\$`"; 536check_taint $', "\t\$'"; 537check_taint $+, "\t\$+"; 538check_taint $1, "\t\$1"; 539check_taint_not $2, "\t\$2"; 540 541$_ = $a; # untaint $_ 542 543s/(\w)/\U$1/; # this must taint 544check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/'; 545check_taint $&, "\t\$&"; 546check_taint $`, "\t\$`"; 547check_taint $', "\t\$'"; 548check_taint $+, "\t\$+"; 549check_taint $1, "\t\$1"; 550check_taint_not $2, "\t\$2"; 551 552# After all this tainting $a should be cool. 553 554check_taint_not $a, '$a still not tainted'; 555 556"a" =~ /([a-z])/; 557check_taint_not $1, '"a" =~ /([a-z])/'; 558"foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675 559check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/'; 560 561# BE SURE TO COPY ANYTHING YOU ADD to the block below 562 563{ # This is just the previous tests copied here with a different 564 # compile-time pragma. 565 566 use locale ':not_characters'; # engage restricted locale with different 567 # tainting rules 568 check_taint_not $a, '$a'; 569 570 check_taint_not uc($a), 'uc($a)'; 571 check_taint_not "\U$a", '"\U$a"'; 572 check_taint_not ucfirst($a), 'ucfirst($a)'; 573 check_taint_not "\u$a", '"\u$a"'; 574 check_taint_not lc($a), 'lc($a)'; 575 check_taint_not fc($a), 'fc($a)'; 576 check_taint_not "\L$a", '"\L$a"'; 577 check_taint_not "\F$a", '"\F$a"'; 578 check_taint_not lcfirst($a), 'lcfirst($a)'; 579 check_taint_not "\l$a", '"\l$a"'; 580 581 check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)"; 582 check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)"; 583 check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)"; 584 check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)"; 585 check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)"; 586 587 $_ = $a; # untaint $_ 588 589 $_ = uc($a); 590 591 check_taint_not $_, '$_ = uc($a)'; 592 593 /(\w)/; 594 check_taint_not $&, "\$& from /(\\w)/"; 595 check_taint_not $`, "\t\$`"; 596 check_taint_not $', "\t\$'"; 597 check_taint_not $+, "\t\$+"; 598 check_taint_not $1, "\t\$1"; 599 check_taint_not $2, "\t\$2"; 600 601 /(.)/; # untaint $&, $`, $', $+, $1. 602 check_taint_not $&, "\$& from /(.)/"; 603 check_taint_not $`, "\t\$`"; 604 check_taint_not $', "\t\$'"; 605 check_taint_not $+, "\t\$+"; 606 check_taint_not $1, "\t\$1"; 607 check_taint_not $2, "\t\$2"; 608 609 /(\W)/; 610 check_taint_not $&, "\$& from /(\\W)/"; 611 check_taint_not $`, "\t\$`"; 612 check_taint_not $', "\t\$'"; 613 check_taint_not $+, "\t\$+"; 614 check_taint_not $1, "\t\$1"; 615 check_taint_not $2, "\t\$2"; 616 617 /(.)/; # untaint $&, $`, $', $+, $1. 618 check_taint_not $&, "\$& from /(.)/"; 619 check_taint_not $`, "\t\$`"; 620 check_taint_not $', "\t\$'"; 621 check_taint_not $+, "\t\$+"; 622 check_taint_not $1, "\t\$1"; 623 check_taint_not $2, "\t\$2"; 624 625 /(\s)/; 626 check_taint_not $&, "\$& from /(\\s)/"; 627 check_taint_not $`, "\t\$`"; 628 check_taint_not $', "\t\$'"; 629 check_taint_not $+, "\t\$+"; 630 check_taint_not $1, "\t\$1"; 631 check_taint_not $2, "\t\$2"; 632 633 /(.)/; # untaint $&, $`, $', $+, $1. 634 check_taint_not $&, "\$& from /(.)/"; 635 636 /(\S)/; 637 check_taint_not $&, "\$& from /(\\S)/"; 638 check_taint_not $`, "\t\$`"; 639 check_taint_not $', "\t\$'"; 640 check_taint_not $+, "\t\$+"; 641 check_taint_not $1, "\t\$1"; 642 check_taint_not $2, "\t\$2"; 643 644 /(.)/; # untaint $&, $`, $', $+, $1. 645 check_taint_not $&, "\$& from /(.)/"; 646 647 "0" =~ /(\d)/; 648 check_taint_not $&, "\$& from /(\\d)/"; 649 check_taint_not $`, "\t\$`"; 650 check_taint_not $', "\t\$'"; 651 check_taint_not $+, "\t\$+"; 652 check_taint_not $1, "\t\$1"; 653 check_taint_not $2, "\t\$2"; 654 655 /(.)/; # untaint $&, $`, $', $+, $1. 656 check_taint_not $&, "\$& from /(.)/"; 657 658 /(\D)/; 659 check_taint_not $&, "\$& from /(\\D)/"; 660 check_taint_not $`, "\t\$`"; 661 check_taint_not $', "\t\$'"; 662 check_taint_not $+, "\t\$+"; 663 check_taint_not $1, "\t\$1"; 664 check_taint_not $2, "\t\$2"; 665 666 /(.)/; # untaint $&, $`, $', $+, $1. 667 check_taint_not $&, "\$& from /(.)/"; 668 669 /([[:alnum:]])/; 670 check_taint_not $&, "\$& from /([[:alnum:]])/"; 671 check_taint_not $`, "\t\$`"; 672 check_taint_not $', "\t\$'"; 673 check_taint_not $+, "\t\$+"; 674 check_taint_not $1, "\t\$1"; 675 check_taint_not $2, "\t\$2"; 676 677 /(.)/; # untaint $&, $`, $', $+, $1. 678 check_taint_not $&, "\$& from /(.)/"; 679 680 /([[:^alnum:]])/; 681 check_taint_not $&, "\$& from /([[:^alnum:]])/"; 682 check_taint_not $`, "\t\$`"; 683 check_taint_not $', "\t\$'"; 684 check_taint_not $+, "\t\$+"; 685 check_taint_not $1, "\t\$1"; 686 check_taint_not $2, "\t\$2"; 687 688 "a" =~ /(a)|(\w)/; 689 check_taint_not $&, "\$& from /(a)|(\\w)/"; 690 check_taint_not $`, "\t\$`"; 691 check_taint_not $', "\t\$'"; 692 check_taint_not $+, "\t\$+"; 693 check_taint_not $1, "\t\$1"; 694 ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'"); 695 ok(! defined $2, ("\t" x 5) . "\$2 is undefined"); 696 check_taint_not $2, "\t\$2"; 697 check_taint_not $3, "\t\$3"; 698 699 /(.)/; # untaint $&, $`, $', $+, $1. 700 check_taint_not $&, "\$& from /(.)/"; 701 702 "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; 703 check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i"; 704 check_taint_not $`, "\t\$`"; 705 check_taint_not $', "\t\$'"; 706 check_taint_not $+, "\t\$+"; 707 check_taint_not $1, "\t\$1"; 708 ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'"); 709 check_taint_not $2, "\t\$2"; 710 711 /(.)/; # untaint $&, $`, $', $+, $1. 712 check_taint_not $&, "\$& from /./"; 713 714 "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; 715 check_taint_not $&, "\$& from /(\\N{KELVIN SIGN})/i"; 716 check_taint_not $`, "\t\$`"; 717 check_taint_not $', "\t\$'"; 718 check_taint_not $+, "\t\$+"; 719 check_taint_not $1, "\t\$1"; 720 check_taint_not $2, "\t\$2"; 721 722 /(.)/; # untaint $&, $`, $', $+, $1. 723 check_taint_not $&, "\$& from /(.)/"; 724 725 "a:" =~ /(.)\b(.)/; 726 check_taint_not $&, "\$& from /(.)\\b(.)/"; 727 check_taint_not $`, "\t\$`"; 728 check_taint_not $', "\t\$'"; 729 check_taint_not $+, "\t\$+"; 730 check_taint_not $1, "\t\$1"; 731 check_taint_not $2, "\t\$2"; 732 check_taint_not $3, "\t\$3"; 733 734 /(.)/; # untaint $&, $`, $', $+, $1. 735 check_taint_not $&, "\$& from /./"; 736 737 "aa" =~ /(.)\B(.)/; 738 check_taint_not $&, "\$& from /(.)\\B(.)/"; 739 check_taint_not $`, "\t\$`"; 740 check_taint_not $', "\t\$'"; 741 check_taint_not $+, "\t\$+"; 742 check_taint_not $1, "\t\$1"; 743 check_taint_not $2, "\t\$2"; 744 check_taint_not $3, "\t\$3"; 745 746 /(.)/; # untaint $&, $`, $', $+, $1. 747 check_taint_not $&, "\$& from /./"; 748 749 "aaa" =~ /(.).(\1)/i; # notaint because not locale dependent 750 check_taint_not $&, "\$ & from /(.).(\\1)/"; 751 check_taint_not $`, "\t\$`"; 752 check_taint_not $', "\t\$'"; 753 check_taint_not $+, "\t\$+"; 754 check_taint_not $1, "\t\$1"; 755 check_taint_not $2, "\t\$2"; 756 check_taint_not $3, "\t\$3"; 757 758 /(.)/; # untaint $&, $`, $', $+, $1. 759 check_taint_not $&, "\$ & from /./"; 760 761 $_ = $a; # untaint $_ 762 763 check_taint_not $_, 'untainting $_ works'; 764 765 /(b)/; 766 check_taint_not $&, "\$ & from /(b)/"; 767 check_taint_not $`, "\t\$`"; 768 check_taint_not $', "\t\$'"; 769 check_taint_not $+, "\t\$+"; 770 check_taint_not $1, "\t\$1"; 771 check_taint_not $2, "\t\$2"; 772 773 $_ = $a; # untaint $_ 774 775 check_taint_not $_, 'untainting $_ works'; 776 777 s/(.+)/b/; 778 check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/'; 779 check_taint_not $&, "\t\$&"; 780 check_taint_not $`, "\t\$`"; 781 check_taint_not $', "\t\$'"; 782 check_taint_not $+, "\t\$+"; 783 check_taint_not $1, "\t\$1"; 784 check_taint_not $2, "\t\$2"; 785 786 $b = $a; # untaint $b 787 788 ($b = $a) =~ s/\w/$&/; 789 check_taint_not $b, '$b from ($b = $a) =~ s/\w/$&/'; 790 check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; 791 792 $_ = $a; # untaint $_ 793 794 s/(\w)/\l$1/; 795 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint 796 check_taint_not $&, "\t\$&"; 797 check_taint_not $`, "\t\$`"; 798 check_taint_not $', "\t\$'"; 799 check_taint_not $+, "\t\$+"; 800 check_taint_not $1, "\t\$1"; 801 check_taint_not $2, "\t\$2"; 802 803 $_ = $a; # untaint $_ 804 805 s/(\w)/\L$1/; 806 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,'; 807 check_taint_not $&, "\t\$&"; 808 check_taint_not $`, "\t\$`"; 809 check_taint_not $', "\t\$'"; 810 check_taint_not $+, "\t\$+"; 811 check_taint_not $1, "\t\$1"; 812 check_taint_not $2, "\t\$2"; 813 814 $_ = $a; # untaint $_ 815 816 s/(\w)/\u$1/; 817 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/'; 818 check_taint_not $&, "\t\$&"; 819 check_taint_not $`, "\t\$`"; 820 check_taint_not $', "\t\$'"; 821 check_taint_not $+, "\t\$+"; 822 check_taint_not $1, "\t\$1"; 823 check_taint_not $2, "\t\$2"; 824 825 $_ = $a; # untaint $_ 826 827 s/(\w)/\U$1/; 828 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/'; 829 check_taint_not $&, "\t\$&"; 830 check_taint_not $`, "\t\$`"; 831 check_taint_not $', "\t\$'"; 832 check_taint_not $+, "\t\$+"; 833 check_taint_not $1, "\t\$1"; 834 check_taint_not $2, "\t\$2"; 835 836 # After all this tainting $a should be cool. 837 838 check_taint_not $a, '$a still not tainted'; 839 840 "a" =~ /([a-z])/; 841 check_taint_not $1, '"a" =~ /([a-z])/'; 842 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675 843 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/'; 844 845} 846 847# Here are in scope of 'use locale' 848 849# I think we've seen quite enough of taint. 850# Let us do some *real* locale work now, 851# unless setlocale() is missing (i.e. minitest). 852 853# The test number before our first setlocale() 854my $final_without_setlocale = $test_num; 855 856# Find locales. 857 858debug "Scanning for locales...\n"; 859 860require POSIX; import POSIX ':locale_h'; 861 862debug "Scanning for just perl-compatible locales"; 863my $category = 'LC_CTYPE'; 864my @Locale = find_locales($category); 865if (! @Locale) { 866 $category = 'LC_ALL'; 867 @Locale = find_locales($category); 868} 869debug "Scanning for even incompatible locales"; 870my @include_incompatible_locales = find_locales($category, 871 'even incompatible locales'); 872 873# The locales included in the incompatible list that aren't in the compatible 874# one. 875my @incompatible_locales; 876 877if (@Locale < @include_incompatible_locales) { 878 my %seen; 879 @seen{@Locale} = (); 880 881 foreach my $item (@include_incompatible_locales) { 882 push @incompatible_locales, $item unless exists $seen{$item}; 883 } 884 885 # For each bad locale, switch into it to find out why it's incompatible 886 for my $bad_locale (@incompatible_locales) { 887 my @warnings; 888 889 use warnings 'locale'; 890 891 local $SIG{__WARN__} = sub { 892 my $warning = $_[0]; 893 chomp $warning; 894 push @warnings, ($warning =~ s/\n/\n# /sgr); 895 }; 896 897 debug "Trying incompatible $bad_locale"; 898 my $ret = setlocale(&POSIX::LC_CTYPE, $bad_locale); 899 900 my $message = "testing of locale '$bad_locale' is skipped"; 901 if (@warnings) { 902 skip $message . ":\n# " . join "\n# ", @warnings; 903 } 904 elsif (! $ret) { 905 skip("$message:\n#" 906 . " setlocale(&POSIX::LC_CTYPE, '$bad_locale') failed"); 907 } 908 else { 909 fail $message . ", because it is was found to be incompatible with" 910 . " Perl, but could not discern reason"; 911 } 912 } 913} 914 915debug "Locales =\n"; 916for ( @Locale ) { 917 debug "$_\n"; 918} 919 920unless (@Locale) { 921 print "1..$test_num\n"; 922 exit; 923} 924 925 926setlocale(&POSIX::LC_ALL, "C"); 927 928my %posixes; 929 930my %Problem; 931my %Okay; 932my %Known_bad_locale; # Failed test for a locale known to be bad 933my %Testing; 934my @Added_alpha; # Alphas that aren't in the C locale. 935my %test_names; 936 937sub disp_chars { 938 # This returns a display string denoting the input parameter @_, each 939 # entry of which is a single character in the range 0-255. The first part 940 # of the output is a string of the characters in @_ that are ASCII 941 # graphics, and hence unambiguously displayable. They are given by code 942 # point order. The second part is the remaining code points, the ordinals 943 # of which are each displayed as 2-digit hex. Blanks are inserted so as 944 # to keep anything from the first part looking like a 2-digit hex number. 945 946 no locale; 947 my @chars = sort { ord $a <=> ord $b } @_; 948 my $output = ""; 949 my $range_start; 950 my $start_class; 951 push @chars, chr(258); # This sentinel simplifies the loop termination 952 # logic 953 foreach my $i (0 .. @chars - 1) { 954 my $char = $chars[$i]; 955 my $range_end; 956 my $class; 957 958 # We avoid using [:posix:] classes, as these are being tested in this 959 # file. Each equivalence class below is for things that can appear in 960 # a range; those that can't be in a range have class -1. 0 for those 961 # which should be output in hex; and >0 for the other ranges 962 if ($char =~ /[A-Z]/) { 963 $class = 2; 964 } 965 elsif ($char =~ /[a-z]/) { 966 $class = 3; 967 } 968 elsif ($char =~ /[0-9]/) { 969 $class = 4; 970 } 971 # Uncomment to get literal punctuation displayed instead of hex 972 #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) { 973 # $class = -1; # Punct never appears in a range 974 #} 975 else { 976 $class = 0; # Output in hex 977 } 978 979 if (! defined $range_start) { 980 if ($class < 0) { 981 $output .= " " . $char; 982 } 983 else { 984 $range_start = ord $char; 985 $start_class = $class; 986 } 987 } # A range ends if not consecutive, or the class-type changes 988 elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1 989 || $class != $start_class) 990 { 991 992 # Here, the current character is not in the range. This means the 993 # previous character must have been. Output the range up through 994 # that one. 995 my $range_length = $range_end - $range_start + 1; 996 if ($start_class > 0) { 997 $output .= " " . chr($range_start); 998 $output .= "-" . chr($range_end) if $range_length > 1; 999 } 1000 else { 1001 $output .= sprintf(" %02X", $range_start); 1002 $output .= sprintf("-%02X", $range_end) if $range_length > 1; 1003 } 1004 1005 # Handle the new current character, as potentially beginning a new 1006 # range 1007 undef $range_start; 1008 redo; 1009 } 1010 } 1011 1012 $output =~ s/^ //; 1013 return $output; 1014} 1015 1016sub disp_str ($) { 1017 my $string = shift; 1018 1019 # Displays the string unambiguously. ASCII printables are always output 1020 # as-is, though perhaps separated by blanks from other characters. If 1021 # entirely printable ASCII, just returns the string. Otherwise if valid 1022 # UTF-8 it uses the character names for non-printable-ASCII. Otherwise it 1023 # outputs hex for each non-ASCII-printable byte. 1024 1025 return $string if $string =~ / ^ [[:print:]]* $/xa; 1026 1027 my $result = ""; 1028 my $prev_was_punct = 1; # Beginning is considered punct 1029 if (utf8::valid($string) && utf8::is_utf8($string)) { 1030 use charnames (); 1031 foreach my $char (split "", $string) { 1032 1033 # Keep punctuation adjacent to other characters; otherwise 1034 # separate them with a blank 1035 if ($char =~ /[[:punct:]]/a) { 1036 $result .= $char; 1037 $prev_was_punct = 1; 1038 } 1039 elsif ($char =~ /[[:print:]]/a) { 1040 $result .= " " unless $prev_was_punct; 1041 $result .= $char; 1042 $prev_was_punct = 0; 1043 } 1044 else { 1045 $result .= " " unless $prev_was_punct; 1046 my $name = charnames::viacode(ord $char); 1047 $result .= (defined $name) ? $name : ':unknown:'; 1048 $prev_was_punct = 0; 1049 } 1050 } 1051 } 1052 else { 1053 use bytes; 1054 foreach my $char (split "", $string) { 1055 if ($char =~ /[[:punct:]]/a) { 1056 $result .= $char; 1057 $prev_was_punct = 1; 1058 } 1059 elsif ($char =~ /[[:print:]]/a) { 1060 $result .= " " unless $prev_was_punct; 1061 $result .= $char; 1062 $prev_was_punct = 0; 1063 } 1064 else { 1065 $result .= " " unless $prev_was_punct; 1066 $result .= sprintf("%02X", ord $char); 1067 $prev_was_punct = 0; 1068 } 1069 } 1070 } 1071 1072 return $result; 1073} 1074 1075sub report_result { 1076 my ($Locale, $i, $pass_fail, $message) = @_; 1077 if ($pass_fail) { 1078 push @{$Okay{$i}}, $Locale; 1079 } 1080 else { 1081 $message //= ""; 1082 $message = " ($message)" if $message; 1083 $Known_bad_locale{$i}{$Locale} = 1 if exists $known_bad_locales{$os} 1084 && $Locale =~ $known_bad_locales{$os}; 1085 $Problem{$i}{$Locale} = 1; 1086 debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n"; 1087 } 1088} 1089 1090sub report_multi_result { 1091 my ($Locale, $i, $results_ref) = @_; 1092 1093 # $results_ref points to an array, each element of which is a character that was 1094 # in error for this test numbered '$i'. If empty, the test passed 1095 1096 my $message = ""; 1097 if (@$results_ref) { 1098 $message = join " ", "for", disp_chars(@$results_ref); 1099 } 1100 report_result($Locale, $i, @$results_ref == 0, $message); 1101} 1102 1103my $first_locales_test_number = $final_without_setlocale 1104 + 1 + @incompatible_locales; 1105my $locales_test_number; 1106my $not_necessarily_a_problem_test_number; 1107my $first_casing_test_number; 1108my %setlocale_failed; # List of locales that setlocale() didn't work on 1109my $has_glibc_extra_categories = grep { $_ =~ /^ _NL /x } 1110 valid_locale_categories(); 1111 1112foreach my $Locale (@Locale) { 1113 $locales_test_number = $first_locales_test_number - 1; 1114 debug "\n"; 1115 debug "Locale = $Locale\n"; 1116 1117 unless (setlocale(&POSIX::LC_ALL, $Locale)) { 1118 $setlocale_failed{$Locale} = $Locale; 1119 next; 1120 } 1121 1122 # We test UTF-8 locales only under ':not_characters'; It is easier to 1123 # test them in other test files than here. Non- UTF-8 locales are tested 1124 # only under plain 'use locale', as otherwise we would have to convert 1125 # everything in them to Unicode. 1126 1127 my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X 1128 my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X 1129 my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X 1130 1131 my $is_utf8_locale = is_locale_utf8($Locale); 1132 1133 if ($debug) { 1134 debug "is utf8 locale? = $is_utf8_locale\n"; 1135 for my $item (@langinfo) { 1136 my $numeric_item = eval $item; 1137 my $value = langinfo($numeric_item); 1138 1139 # All items should return a value; if not, this will warn. But on 1140 # platforms without the extra categories, almost all items will be 1141 # empty. Skip reporting such. 1142 next if $value eq "" 1143 && $item =~ / ^ _NL_ / && ! $has_glibc_extra_categories; 1144 1145 debug "$item = " . disp_str($value); 1146 } 1147 } 1148 1149 if (! $is_utf8_locale) { 1150 use locale; 1151 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255; 1152 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255; 1153 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255; 1154 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255; 1155 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255; 1156 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255; 1157 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255; 1158 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255; 1159 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255; 1160 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255; 1161 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255; 1162 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; 1163 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; 1164 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; 1165 @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255; 1166 1167 # Sieve the uppercase and the lowercase. 1168 1169 for (@{$posixes{'word'}}) { 1170 if (/[^\d_]/) { # skip digits and the _ 1171 if (uc($_) eq $_) { 1172 $UPPER{$_} = $_; 1173 } 1174 if (lc($_) eq $_) { 1175 $lower{$_} = $_; 1176 } 1177 } 1178 } 1179 } 1180 else { 1181 use locale ':not_characters'; 1182 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255; 1183 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255; 1184 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255; 1185 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255; 1186 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255; 1187 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255; 1188 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255; 1189 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255; 1190 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255; 1191 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255; 1192 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255; 1193 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; 1194 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; 1195 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; 1196 @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255; 1197 for (@{$posixes{'word'}}) { 1198 if (/[^\d_]/) { # skip digits and the _ 1199 if (uc($_) eq $_) { 1200 $UPPER{$_} = $_; 1201 } 1202 if (lc($_) eq $_) { 1203 $lower{$_} = $_; 1204 } 1205 } 1206 } 1207 } 1208 1209 # Ordered, where possible, in groups of "this is a subset of the next 1210 # one" 1211 debug ":upper: = ", disp_chars(@{$posixes{'upper'}}), "\n"; 1212 debug ":lower: = ", disp_chars(@{$posixes{'lower'}}), "\n"; 1213 debug ":cased: = ", disp_chars(@{$posixes{'cased'}}), "\n"; 1214 debug ":alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n"; 1215 debug ":alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n"; 1216 debug ' \w = ', disp_chars(@{$posixes{'word'}}), "\n"; 1217 debug ":graph: = ", disp_chars(@{$posixes{'graph'}}), "\n"; 1218 debug ":print: = ", disp_chars(@{$posixes{'print'}}), "\n"; 1219 debug ' \d = ', disp_chars(@{$posixes{'digit'}}), "\n"; 1220 debug ":xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n"; 1221 debug ":blank: = ", disp_chars(@{$posixes{'blank'}}), "\n"; 1222 debug ' \s = ', disp_chars(@{$posixes{'space'}}), "\n"; 1223 debug ":punct: = ", disp_chars(@{$posixes{'punct'}}), "\n"; 1224 debug ":cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n"; 1225 debug ":ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n"; 1226 1227 foreach (keys %UPPER) { 1228 1229 $BoThCaSe{$_}++ if exists $lower{$_}; 1230 } 1231 foreach (keys %lower) { 1232 $BoThCaSe{$_}++ if exists $UPPER{$_}; 1233 } 1234 foreach (keys %BoThCaSe) { 1235 delete $UPPER{$_}; 1236 delete $lower{$_}; 1237 } 1238 1239 my %Unassigned; 1240 foreach my $ord ( 0 .. 255 ) { 1241 $Unassigned{chr $ord} = 1; 1242 } 1243 foreach my $class (keys %posixes) { 1244 foreach my $char (@{$posixes{$class}}) { 1245 delete $Unassigned{$char}; 1246 } 1247 } 1248 1249 debug "UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n"; 1250 debug "lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n"; 1251 debug "BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n"; 1252 debug "Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n"; 1253 1254 my @failures; 1255 my @fold_failures; 1256 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) { 1257 my $ok; 1258 my $fold_ok; 1259 if ($is_utf8_locale) { 1260 use locale ':not_characters'; 1261 $ok = $x =~ /[[:upper:]]/; 1262 $fold_ok = $x =~ /[[:lower:]]/i; 1263 } 1264 else { 1265 use locale; 1266 $ok = $x =~ /[[:upper:]]/; 1267 $fold_ok = $x =~ /[[:lower:]]/i; 1268 } 1269 push @failures, $x unless $ok; 1270 push @fold_failures, $x unless $fold_ok; 1271 } 1272 $locales_test_number++; 1273 $first_casing_test_number = $locales_test_number; 1274 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X'; 1275 report_multi_result($Locale, $locales_test_number, \@failures); 1276 1277 $locales_test_number++; 1278 1279 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X'; 1280 report_multi_result($Locale, $locales_test_number, \@fold_failures); 1281 1282 undef @failures; 1283 undef @fold_failures; 1284 1285 foreach my $x (sort { ord $a <=> ord $b } keys %lower) { 1286 my $ok; 1287 my $fold_ok; 1288 if ($is_utf8_locale) { 1289 use locale ':not_characters'; 1290 $ok = $x =~ /[[:lower:]]/; 1291 $fold_ok = $x =~ /[[:upper:]]/i; 1292 } 1293 else { 1294 use locale; 1295 $ok = $x =~ /[[:lower:]]/; 1296 $fold_ok = $x =~ /[[:upper:]]/i; 1297 } 1298 push @failures, $x unless $ok; 1299 push @fold_failures, $x unless $fold_ok; 1300 } 1301 1302 $locales_test_number++; 1303 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X'; 1304 report_multi_result($Locale, $locales_test_number, \@failures); 1305 1306 $locales_test_number++; 1307 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X'; 1308 report_multi_result($Locale, $locales_test_number, \@fold_failures); 1309 1310 { # Find the alphabetic characters that are not considered alphabetics 1311 # in the default (C) locale. 1312 1313 no locale; 1314 1315 @Added_alpha = (); 1316 for (keys %UPPER, keys %lower, keys %BoThCaSe) { 1317 push(@Added_alpha, $_) if (/\W/); 1318 } 1319 } 1320 1321 @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha; 1322 1323 debug "Added_alpha = ", disp_chars(@Added_alpha), "\n"; 1324 1325 # Cross-check the whole 8-bit character set. 1326 1327 ++$locales_test_number; 1328 my @f; 1329 $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical'; 1330 for (map { chr } 0..255) { 1331 if ($is_utf8_locale) { 1332 use locale ':not_characters'; 1333 push @f, $_ unless /[[:word:]]/ == /\w/; 1334 } 1335 else { 1336 push @f, $_ unless /[[:word:]]/ == /\w/; 1337 } 1338 } 1339 report_multi_result($Locale, $locales_test_number, \@f); 1340 1341 ++$locales_test_number; 1342 undef @f; 1343 $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical'; 1344 for (map { chr } 0..255) { 1345 if ($is_utf8_locale) { 1346 use locale ':not_characters'; 1347 push @f, $_ unless /[[:digit:]]/ == /\d/; 1348 } 1349 else { 1350 push @f, $_ unless /[[:digit:]]/ == /\d/; 1351 } 1352 } 1353 report_multi_result($Locale, $locales_test_number, \@f); 1354 1355 ++$locales_test_number; 1356 undef @f; 1357 $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical'; 1358 for (map { chr } 0..255) { 1359 if ($is_utf8_locale) { 1360 use locale ':not_characters'; 1361 push @f, $_ unless /[[:space:]]/ == /\s/; 1362 } 1363 else { 1364 push @f, $_ unless /[[:space:]]/ == /\s/; 1365 } 1366 } 1367 report_multi_result($Locale, $locales_test_number, \@f); 1368 1369 ++$locales_test_number; 1370 undef @f; 1371 $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive'; 1372 for (map { chr } 0..255) { 1373 if ($is_utf8_locale) { 1374 use locale ':not_characters'; 1375 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || 1376 (/[[:alnum:]]/ xor /[[:^alnum:]]/) || 1377 (/[[:ascii:]]/ xor /[[:^ascii:]]/) || 1378 (/[[:blank:]]/ xor /[[:^blank:]]/) || 1379 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || 1380 (/[[:digit:]]/ xor /[[:^digit:]]/) || 1381 (/[[:graph:]]/ xor /[[:^graph:]]/) || 1382 (/[[:lower:]]/ xor /[[:^lower:]]/) || 1383 (/[[:print:]]/ xor /[[:^print:]]/) || 1384 (/[[:space:]]/ xor /[[:^space:]]/) || 1385 (/[[:upper:]]/ xor /[[:^upper:]]/) || 1386 (/[[:word:]]/ xor /[[:^word:]]/) || 1387 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || 1388 1389 # effectively is what [:cased:] would be if it existed. 1390 (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i); 1391 } 1392 else { 1393 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || 1394 (/[[:alnum:]]/ xor /[[:^alnum:]]/) || 1395 (/[[:ascii:]]/ xor /[[:^ascii:]]/) || 1396 (/[[:blank:]]/ xor /[[:^blank:]]/) || 1397 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || 1398 (/[[:digit:]]/ xor /[[:^digit:]]/) || 1399 (/[[:graph:]]/ xor /[[:^graph:]]/) || 1400 (/[[:lower:]]/ xor /[[:^lower:]]/) || 1401 (/[[:print:]]/ xor /[[:^print:]]/) || 1402 (/[[:space:]]/ xor /[[:^space:]]/) || 1403 (/[[:upper:]]/ xor /[[:^upper:]]/) || 1404 (/[[:word:]]/ xor /[[:^word:]]/) || 1405 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || 1406 (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i); 1407 } 1408 } 1409 report_multi_result($Locale, $locales_test_number, \@f); 1410 1411 # The rules for the relationships are given in: 1412 # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html 1413 1414 1415 ++$locales_test_number; 1416 undef @f; 1417 $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z'; 1418 for ('a' .. 'z') { 1419 if ($is_utf8_locale) { 1420 use locale ':not_characters'; 1421 push @f, $_ unless /[[:lower:]]/; 1422 } 1423 else { 1424 push @f, $_ unless /[[:lower:]]/; 1425 } 1426 } 1427 report_multi_result($Locale, $locales_test_number, \@f); 1428 1429 ++$locales_test_number; 1430 undef @f; 1431 $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]'; 1432 for (map { chr } 0..255) { 1433 if ($is_utf8_locale) { 1434 use locale ':not_characters'; 1435 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; 1436 } 1437 else { 1438 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; 1439 } 1440 } 1441 report_multi_result($Locale, $locales_test_number, \@f); 1442 1443 ++$locales_test_number; 1444 undef @f; 1445 $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z'; 1446 for ('A' .. 'Z') { 1447 if ($is_utf8_locale) { 1448 use locale ':not_characters'; 1449 push @f, $_ unless /[[:upper:]]/; 1450 } 1451 else { 1452 push @f, $_ unless /[[:upper:]]/; 1453 } 1454 } 1455 report_multi_result($Locale, $locales_test_number, \@f); 1456 1457 ++$locales_test_number; 1458 undef @f; 1459 $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]'; 1460 for (map { chr } 0..255) { 1461 if ($is_utf8_locale) { 1462 use locale ':not_characters'; 1463 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; 1464 } 1465 else { 1466 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; 1467 } 1468 } 1469 report_multi_result($Locale, $locales_test_number, \@f); 1470 1471 ++$locales_test_number; 1472 undef @f; 1473 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]'; 1474 for (map { chr } 0..255) { 1475 if ($is_utf8_locale) { 1476 use locale ':not_characters'; 1477 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; 1478 } 1479 else { 1480 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; 1481 } 1482 } 1483 report_multi_result($Locale, $locales_test_number, \@f); 1484 1485 ++$locales_test_number; 1486 undef @f; 1487 $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]'; 1488 for (map { chr } 0..255) { 1489 if ($is_utf8_locale) { 1490 use locale ':not_characters'; 1491 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; 1492 } 1493 else { 1494 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; 1495 } 1496 } 1497 report_multi_result($Locale, $locales_test_number, \@f); 1498 1499 ++$locales_test_number; 1500 undef @f; 1501 $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9'; 1502 for ('0' .. '9') { 1503 if ($is_utf8_locale) { 1504 use locale ':not_characters'; 1505 push @f, $_ unless /[[:digit:]]/; 1506 } 1507 else { 1508 push @f, $_ unless /[[:digit:]]/; 1509 } 1510 } 1511 report_multi_result($Locale, $locales_test_number, \@f); 1512 1513 ++$locales_test_number; 1514 undef @f; 1515 $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]'; 1516 for (map { chr } 0..255) { 1517 if ($is_utf8_locale) { 1518 use locale ':not_characters'; 1519 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; 1520 } 1521 else { 1522 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; 1523 } 1524 } 1525 report_multi_result($Locale, $locales_test_number, \@f); 1526 1527 ++$locales_test_number; 1528 undef @f; 1529 $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points'; 1530 report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20); 1531 1532 ++$locales_test_number; 1533 undef @f; 1534 $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive'; 1535 if (@{$posixes{'digit'}} == 20) { 1536 my $previous_ord; 1537 for (map { chr } 0..255) { 1538 next unless /[[:digit:]]/; 1539 next if /[0-9]/; 1540 if (defined $previous_ord) { 1541 if ($is_utf8_locale) { 1542 use locale ':not_characters'; 1543 push @f, $_ if ord $_ != $previous_ord + 1; 1544 } 1545 else { 1546 push @f, $_ if ord $_ != $previous_ord + 1; 1547 } 1548 } 1549 $previous_ord = ord $_; 1550 } 1551 } 1552 report_multi_result($Locale, $locales_test_number, \@f); 1553 1554 ++$locales_test_number; 1555 undef @f; 1556 my @xdigit_digits; # :digit: & :xdigit: 1557 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars'; 1558 for (map { chr } 0..255) { 1559 if ($is_utf8_locale) { 1560 use locale ':not_characters'; 1561 # For utf8 locales, we actually use a stricter test: that :digit: 1562 # is a subset of :xdigit:, as we know that only 0-9 should match 1563 push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/; 1564 } 1565 else { 1566 push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/; 1567 } 1568 } 1569 if (! $is_utf8_locale) { 1570 1571 # For non-utf8 locales, @xdigit_digits is a list of the characters 1572 # that are both :xdigit: and :digit:. Because :digit: is stored in 1573 # increasing code point order (unless the tests above failed), 1574 # @xdigit_digits is as well. There should be exactly 10 or 1575 # 20 of these. 1576 if (@xdigit_digits != 10 && @xdigit_digits != 20) { 1577 @f = @xdigit_digits; 1578 } 1579 else { 1580 1581 # Look for contiguity in the series, adding any wrong ones to @f 1582 my @temp = @xdigit_digits; 1583 while (@temp > 1) { 1584 push @f, $temp[1] if ($temp[0] != $temp[1] - 1) 1585 1586 # Skip this test for the 0th character of 1587 # the second block of 10, as it won't be 1588 # contiguous with the previous block 1589 && (! defined $xdigit_digits[10] 1590 || $temp[1] != $xdigit_digits[10]); 1591 shift @temp; 1592 } 1593 } 1594 } 1595 1596 report_multi_result($Locale, $locales_test_number, \@f); 1597 1598 ++$locales_test_number; 1599 undef @f; 1600 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f'; 1601 for ('A' .. 'F', 'a' .. 'f') { 1602 if ($is_utf8_locale) { 1603 use locale ':not_characters'; 1604 push @f, $_ unless /[[:xdigit:]]/; 1605 } 1606 else { 1607 push @f, $_ unless /[[:xdigit:]]/; 1608 } 1609 } 1610 report_multi_result($Locale, $locales_test_number, \@f); 1611 1612 ++$locales_test_number; 1613 undef @f; 1614 $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points'; 1615 my $previous_ord; 1616 my $count = 0; 1617 for my $chr (map { chr } 0..255) { 1618 next unless $chr =~ /[[:xdigit:]]/; 1619 if ($is_utf8_locale) { 1620 next if $chr =~ /[[:digit:]]/; 1621 } 1622 else { 1623 next if grep { $chr eq $_ } @xdigit_digits; 1624 } 1625 next if $chr =~ /[A-Fa-f]/; 1626 if (defined $previous_ord) { 1627 if ($is_utf8_locale) { 1628 use locale ':not_characters'; 1629 push @f, $chr if ord $chr != $previous_ord + 1; 1630 } 1631 else { 1632 push @f, $chr if ord $chr != $previous_ord + 1; 1633 } 1634 } 1635 $count++; 1636 if ($count == 6) { 1637 undef $previous_ord; 1638 } 1639 else { 1640 $previous_ord = ord $chr; 1641 } 1642 } 1643 report_multi_result($Locale, $locales_test_number, \@f); 1644 1645 ++$locales_test_number; 1646 undef @f; 1647 $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]'; 1648 for (map { chr } 0..255) { 1649 if ($is_utf8_locale) { 1650 use locale ':not_characters'; 1651 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; 1652 } 1653 else { 1654 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; 1655 } 1656 } 1657 report_multi_result($Locale, $locales_test_number, \@f); 1658 1659 # Note that xdigit doesn't have to be a subset of alnum 1660 1661 ++$locales_test_number; 1662 undef @f; 1663 $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]'; 1664 for (map { chr } 0..255) { 1665 if ($is_utf8_locale) { 1666 use locale ':not_characters'; 1667 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; 1668 } 1669 else { 1670 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; 1671 } 1672 } 1673 report_multi_result($Locale, $locales_test_number, \@f); 1674 1675 ++$locales_test_number; 1676 undef @f; 1677 $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]'; 1678 if ($is_utf8_locale) { 1679 use locale ':not_characters'; 1680 push @f, " " if " " =~ /[[:graph:]]/; 1681 } 1682 else { 1683 push @f, " " if " " =~ /[[:graph:]]/; 1684 } 1685 report_multi_result($Locale, $locales_test_number, \@f); 1686 1687 ++$locales_test_number; 1688 undef @f; 1689 $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]'; 1690 for (' ', "\f", "\n", "\r", "\t", "\cK") { 1691 if ($is_utf8_locale) { 1692 use locale ':not_characters'; 1693 push @f, $_ unless /[[:space:]]/; 1694 } 1695 else { 1696 push @f, $_ unless /[[:space:]]/; 1697 } 1698 } 1699 report_multi_result($Locale, $locales_test_number, \@f); 1700 1701 ++$locales_test_number; 1702 undef @f; 1703 $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]'; 1704 for (' ', "\t") { 1705 if ($is_utf8_locale) { 1706 use locale ':not_characters'; 1707 push @f, $_ unless /[[:blank:]]/; 1708 } 1709 else { 1710 push @f, $_ unless /[[:blank:]]/; 1711 } 1712 } 1713 report_multi_result($Locale, $locales_test_number, \@f); 1714 1715 ++$locales_test_number; 1716 undef @f; 1717 $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]'; 1718 for (map { chr } 0..255) { 1719 if ($is_utf8_locale) { 1720 use locale ':not_characters'; 1721 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; 1722 } 1723 else { 1724 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; 1725 } 1726 } 1727 report_multi_result($Locale, $locales_test_number, \@f); 1728 1729 ++$locales_test_number; 1730 undef @f; 1731 $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]'; 1732 for (map { chr } 0..255) { 1733 if ($is_utf8_locale) { 1734 use locale ':not_characters'; 1735 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; 1736 } 1737 else { 1738 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; 1739 } 1740 } 1741 report_multi_result($Locale, $locales_test_number, \@f); 1742 1743 ++$locales_test_number; 1744 undef @f; 1745 $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]'; 1746 if ($is_utf8_locale) { 1747 use locale ':not_characters'; 1748 push @f, " " if " " !~ /[[:print:]]/; 1749 } 1750 else { 1751 push @f, " " if " " !~ /[[:print:]]/; 1752 } 1753 report_multi_result($Locale, $locales_test_number, \@f); 1754 1755 ++$locales_test_number; 1756 undef @f; 1757 $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]'; 1758 for (map { chr } 0..255) { 1759 if ($is_utf8_locale) { 1760 use locale ':not_characters'; 1761 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); 1762 } 1763 else { 1764 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); 1765 } 1766 } 1767 report_multi_result($Locale, $locales_test_number, \@f); 1768 1769 ++$locales_test_number; 1770 undef @f; 1771 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]'; 1772 for (map { chr } 0..255) { 1773 if ($is_utf8_locale) { 1774 use locale ':not_characters'; 1775 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/; 1776 } 1777 else { 1778 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/; 1779 } 1780 } 1781 report_multi_result($Locale, $locales_test_number, \@f); 1782 1783 ++$locales_test_number; 1784 undef @f; 1785 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]'; 1786 for (map { chr } 0..255) { 1787 if ($is_utf8_locale) { 1788 use locale ':not_characters'; 1789 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; 1790 } 1791 else { 1792 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; 1793 } 1794 } 1795 report_multi_result($Locale, $locales_test_number, \@f); 1796 1797 ++$locales_test_number; 1798 undef @f; 1799 $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]'; 1800 for (map { chr } 0..255) { 1801 if ($is_utf8_locale) { 1802 use locale ':not_characters'; 1803 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); 1804 } 1805 else { 1806 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); 1807 } 1808 } 1809 report_multi_result($Locale, $locales_test_number, \@f); 1810 1811 ++$locales_test_number; 1812 undef @f; 1813 $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]'; 1814 for (map { chr } 0..255) { 1815 if ($is_utf8_locale) { 1816 use locale ':not_characters'; 1817 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); 1818 } 1819 else { 1820 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); 1821 } 1822 } 1823 report_multi_result($Locale, $locales_test_number, \@f); 1824 1825 foreach ($first_casing_test_number..$locales_test_number) { 1826 $problematical_tests{$_} = 1; 1827 } 1828 1829 1830 # Test for read-only scalars' locale vs non-locale comparisons. 1831 1832 { 1833 no locale; 1834 my $ok; 1835 $a = "qwerty"; 1836 if ($is_utf8_locale) { 1837 use locale ':not_characters'; 1838 $ok = ($a cmp "qwerty") == 0; 1839 } 1840 else { 1841 use locale; 1842 $ok = ($a cmp "qwerty") == 0; 1843 } 1844 report_result($Locale, ++$locales_test_number, $ok); 1845 $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale'; 1846 } 1847 1848 { 1849 my ($from, $to, $lesser, $greater, 1850 @test, %test, $test, $yes, $no, $sign); 1851 1852 ++$locales_test_number; 1853 $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work'; 1854 $not_necessarily_a_problem_test_number = $locales_test_number; 1855 for (0..9) { 1856 # Select a slice. 1857 $from = int(($_*@{$posixes{'word'}})/10); 1858 $to = $from + int(@{$posixes{'word'}}/10); 1859 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}}); 1860 $lesser = join('', @{$posixes{'word'}}[$from..$to]); 1861 # Select a slice one character on. 1862 $from++; $to++; 1863 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}}); 1864 $greater = join('', @{$posixes{'word'}}[$from..$to]); 1865 if ($is_utf8_locale) { 1866 use locale ':not_characters'; 1867 ($yes, $no, $sign) = ($lesser lt $greater 1868 ? (" ", "not ", 1) 1869 : ("not ", " ", -1)); 1870 } 1871 else { 1872 use locale; 1873 ($yes, $no, $sign) = ($lesser lt $greater 1874 ? (" ", "not ", 1) 1875 : ("not ", " ", -1)); 1876 } 1877 # all these tests should FAIL (return 0). Exact lt or gt cannot 1878 # be tested because in some locales, say, eacute and E may test 1879 # equal. 1880 @test = 1881 ( 1882 $no.' ($lesser le $greater)', # 1 1883 'not ($lesser ne $greater)', # 2 1884 ' ($lesser eq $greater)', # 3 1885 $yes.' ($lesser ge $greater)', # 4 1886 $yes.' ($lesser ge $greater)', # 5 1887 $yes.' ($greater le $lesser )', # 7 1888 'not ($greater ne $lesser )', # 8 1889 ' ($greater eq $lesser )', # 9 1890 $no.' ($greater ge $lesser )', # 10 1891 'not (($lesser cmp $greater) == -($sign))' # 11 1892 ); 1893 @test{@test} = 0 x @test; 1894 $test = 0; 1895 for my $ti (@test) { 1896 if ($is_utf8_locale) { 1897 use locale ':not_characters'; 1898 $test{$ti} = eval $ti; 1899 } 1900 else { 1901 # Already in 'use locale'; 1902 $test{$ti} = eval $ti; 1903 } 1904 $test ||= $test{$ti} 1905 } 1906 report_result($Locale, $locales_test_number, $test == 0); 1907 if ($test) { 1908 debug "lesser = '$lesser'\n"; 1909 debug "greater = '$greater'\n"; 1910 debug "lesser cmp greater = ", 1911 $lesser cmp $greater, "\n"; 1912 debug "greater cmp lesser = ", 1913 $greater cmp $lesser, "\n"; 1914 debug "(greater) from = $from, to = $to\n"; 1915 for my $ti (@test) { 1916 debugf("# %-40s %-4s", $ti, 1917 $test{$ti} ? 'FAIL' : 'ok'); 1918 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { 1919 debugf("(%s == %4d)", $1, eval $1); 1920 } 1921 debugf("\n#"); 1922 } 1923 1924 last; 1925 } 1926 } 1927 1928 use locale; 1929 1930 my @sorted_controls; 1931 1932 ++$locales_test_number; 1933 $test_names{$locales_test_number} 1934 = 'Skip in locales where there are no controls;' 1935 . ' otherwise verify that \0 sorts before any (other) control'; 1936 if (! $posixes{'cntrl'}) { 1937 report_result($Locale, $locales_test_number, 1); 1938 1939 # We use all code points for the tests below since there aren't 1940 # any controls 1941 push @sorted_controls, chr $_ for 1..255; 1942 @sorted_controls = sort @sorted_controls; 1943 } 1944 else { 1945 @sorted_controls = @{$posixes{'cntrl'}}; 1946 push @sorted_controls, "\0", 1947 unless grep { $_ eq "\0" } @sorted_controls; 1948 @sorted_controls = sort @sorted_controls; 1949 my $output = ""; 1950 for my $control (@sorted_controls) { 1951 $output .= " " . disp_chars($control); 1952 } 1953 debug "sorted :cntrl: (plus NUL) = $output\n"; 1954 my $ok = $sorted_controls[0] eq "\0"; 1955 report_result($Locale, $locales_test_number, $ok); 1956 1957 shift @sorted_controls if $ok; 1958 } 1959 1960 my $lowest_control = $sorted_controls[0]; 1961 1962 ++$locales_test_number; 1963 $test_names{$locales_test_number} 1964 = 'Skip in locales where all controls have primary sorting weight; ' 1965 . 'otherwise verify that \0 doesn\'t have primary sorting weight'; 1966 if ("a${lowest_control}c" lt "ab") { 1967 report_result($Locale, $locales_test_number, 1); 1968 } 1969 else { 1970 my $ok = "ab" lt "a\0c"; 1971 report_result($Locale, $locales_test_number, $ok); 1972 } 1973 1974 ++$locales_test_number; 1975 $test_names{$locales_test_number} 1976 = 'Verify that strings with embedded NUL collate'; 1977 my $ok = "a\0a\0a" lt "a${lowest_control}a${lowest_control}a"; 1978 report_result($Locale, $locales_test_number, $ok); 1979 1980 ++$locales_test_number; 1981 $test_names{$locales_test_number} 1982 = 'Verify that strings with embedded NUL and ' 1983 . 'extra trailing NUL collate'; 1984 $ok = "a\0a\0" lt "a${lowest_control}a${lowest_control}"; 1985 report_result($Locale, $locales_test_number, $ok); 1986 1987 ++$locales_test_number; 1988 $test_names{$locales_test_number} 1989 = 'Verify that empty strings collate'; 1990 $ok = "" le ""; 1991 report_result($Locale, $locales_test_number, $ok); 1992 1993 ++$locales_test_number; 1994 $test_names{$locales_test_number} 1995 = "Skip in non-UTF-8 locales; otherwise verify that UTF8ness " 1996 . "doesn't matter with collation"; 1997 if (! $is_utf8_locale) { 1998 report_result($Locale, $locales_test_number, 1); 1999 } 2000 else { 2001 2002 # khw can't think of anything better. Start with a string that is 2003 # higher than its UTF-8 representation in both EBCDIC and ASCII 2004 my $string = chr utf8::unicode_to_native(0xff); 2005 my $utf8_string = $string; 2006 utf8::upgrade($utf8_string); 2007 2008 # 8 should be lt 9 in all locales (except ones that aren't 2009 # ASCII-based, which might fail this) 2010 $ok = ("a${string}8") lt ("a${utf8_string}9"); 2011 report_result($Locale, $locales_test_number, $ok); 2012 } 2013 2014 ++$locales_test_number; 2015 $test_names{$locales_test_number} 2016 = "Skip in UTF-8 locales; otherwise verify that single byte " 2017 . "collates before 0x100 and above"; 2018 if ($is_utf8_locale) { 2019 report_result($Locale, $locales_test_number, 1); 2020 } 2021 else { 2022 my $max_collating = chr 0; # Find byte that collates highest 2023 for my $i (0 .. 255) { 2024 my $char = chr $i; 2025 $max_collating = $char if $char gt $max_collating; 2026 } 2027 $ok = $max_collating lt chr 0x100; 2028 report_result($Locale, $locales_test_number, $ok); 2029 } 2030 2031 ++$locales_test_number; 2032 $test_names{$locales_test_number} 2033 = "Skip in UTF-8 locales; otherwise verify that 0x100 and " 2034 . "above collate in code point order"; 2035 if ($is_utf8_locale) { 2036 report_result($Locale, $locales_test_number, 1); 2037 } 2038 else { 2039 $ok = chr 0x100 lt chr 0x101; 2040 report_result($Locale, $locales_test_number, $ok); 2041 } 2042 } 2043 2044 my $ok1; 2045 my $ok2; 2046 my $ok3; 2047 my $ok4; 2048 my $ok5; 2049 my $ok6; 2050 my $ok7; 2051 my $ok8; 2052 my $ok9; 2053 my $ok10; 2054 my $ok11; 2055 my $ok12; 2056 my $ok13; 2057 my $ok14; 2058 my $ok14_5; 2059 my $ok15; 2060 my $ok16; 2061 my $ok17; 2062 my $ok18; 2063 my $ok19; 2064 my $ok20; 2065 my $ok21; 2066 2067 my $c; 2068 my $d; 2069 my $e; 2070 my $f; 2071 my $g; 2072 my $h; 2073 my $i; 2074 my $j; 2075 2076 if (! $is_utf8_locale) { 2077 use locale; 2078 2079 my ($x, $y) = (1.23, 1.23); 2080 2081 $a = "$x"; 2082 printf ''; # printf used to reset locale to "C" 2083 $b = "$y"; 2084 $ok1 = $a eq $b; 2085 2086 $c = "$x"; 2087 my $z = sprintf ''; # sprintf used to reset locale to "C" 2088 $d = "$y"; 2089 $ok2 = $c eq $d; 2090 { 2091 2092 use warnings; 2093 my $w = 0; 2094 local $SIG{__WARN__} = 2095 sub { 2096 print "# @_\n"; 2097 $w++; 2098 }; 2099 2100 # The == (among other ops) used to warn for locales 2101 # that had something else than "." as the radix character. 2102 2103 $ok3 = $c == 1.23; 2104 $ok4 = $c == $x; 2105 $ok5 = $c == $d; 2106 { 2107 no locale; 2108 2109 $e = "$x"; 2110 2111 $ok6 = $e == 1.23; 2112 $ok7 = $e == $x; 2113 $ok8 = $e == $c; 2114 } 2115 2116 $f = "1.23"; 2117 $g = 2.34; 2118 $h = 1.5; 2119 $i = 1.25; 2120 $j = "$h:$i"; 2121 2122 $ok9 = $f == 1.23; 2123 $ok10 = $f == $x; 2124 $ok11 = $f == $c; 2125 $ok12 = abs(($f + $g) - 3.57) < 0.01; 2126 $ok13 = $w == 0; 2127 $ok14 = $ok14_5 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales 2128 } 2129 { 2130 no locale; 2131 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); 2132 } 2133 $ok18 = $j eq sprintf("%g:%g", $h, $i); 2134 } 2135 else { 2136 use locale ':not_characters'; 2137 2138 my ($x, $y) = (1.23, 1.23); 2139 $a = "$x"; 2140 printf ''; # printf used to reset locale to "C" 2141 $b = "$y"; 2142 $ok1 = $a eq $b; 2143 2144 $c = "$x"; 2145 my $z = sprintf ''; # sprintf used to reset locale to "C" 2146 $d = "$y"; 2147 $ok2 = $c eq $d; 2148 { 2149 use warnings; 2150 my $w = 0; 2151 local $SIG{__WARN__} = 2152 sub { 2153 print "# @_\n"; 2154 $w++; 2155 }; 2156 $ok3 = $c == 1.23; 2157 $ok4 = $c == $x; 2158 $ok5 = $c == $d; 2159 { 2160 no locale; 2161 $e = "$x"; 2162 2163 $ok6 = $e == 1.23; 2164 $ok7 = $e == $x; 2165 $ok8 = $e == $c; 2166 } 2167 2168 $f = "1.23"; 2169 $g = 2.34; 2170 $h = 1.5; 2171 $i = 1.25; 2172 $j = "$h:$i"; 2173 2174 $ok9 = $f == 1.23; 2175 $ok10 = $f == $x; 2176 $ok11 = $f == $c; 2177 $ok12 = abs(($f + $g) - 3.57) < 0.01; 2178 $ok13 = $w == 0; 2179 2180 # Look for non-ASCII error messages, and verify that the first 2181 # such is in UTF-8 (the others almost certainly will be like the 2182 # first). This is only done if the current locale has LC_MESSAGES 2183 $ok14 = 1; 2184 $ok14_5 = 1; 2185 if ( locales_enabled('LC_MESSAGES') 2186 && setlocale(&POSIX::LC_MESSAGES, $Locale)) 2187 { 2188 foreach my $err (keys %!) { 2189 use Errno; 2190 $! = eval "&Errno::$err"; # Convert to strerror() output 2191 my $errnum = 0+$!; 2192 my $strerror = "$!"; 2193 if ("$strerror" =~ /\P{ASCII}/) { 2194 $ok14 = utf8::is_utf8($strerror); 2195 no locale; 2196 $ok14_5 = "$!" !~ /\P{ASCII}/; 2197 debug( disp_str( 2198 "non-ASCII \$! for error $errnum='$strerror'")) 2199 if ! $ok14_5; 2200 last; 2201 } 2202 } 2203 } 2204 2205 # Similarly, we verify that a non-ASCII radix is in UTF-8. This 2206 # also catches if there is a disparity between sprintf and 2207 # stringification. 2208 2209 my $string_g = "$g"; 2210 my $sprintf_g = sprintf("%g", $g); 2211 2212 $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g); 2213 $ok16 = $sprintf_g eq $string_g; 2214 } 2215 { 2216 no locale; 2217 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); 2218 } 2219 $ok18 = $j eq sprintf("%g:%g", $h, $i); 2220 } 2221 2222 $ok19 = $ok20 = 1; 2223 if (locales_enabled('LC_TIME')) { 2224 if (setlocale(&POSIX::LC_TIME, $Locale)) { # These tests aren't 2225 # affected by 2226 # :not_characters 2227 my @times = CORE::localtime(); 2228 2229 use locale; 2230 $ok19 = POSIX::strftime("%p", @times) ne "%p"; # [perl #119425] 2231 my $date = POSIX::strftime("'%A' '%B' '%Z' '%p'", @times); 2232 debug("'Day' 'Month' 'TZ' 'am/pm' = ", disp_str($date)); 2233 2234 # If there is any non-ascii, it better be UTF-8 in a UTF-8 locale, 2235 # and not UTF-8 if the locale isn't UTF-8. 2236 $ok20 = $date =~ / ^ \p{ASCII}+ $ /x 2237 || $is_utf8_locale == utf8::is_utf8($date); 2238 } 2239 } 2240 2241 $ok21 = 1; 2242 if (locales_enabled('LC_MESSAGES')) { 2243 foreach my $err (keys %!) { 2244 no locale; 2245 use Errno; 2246 $! = eval "&Errno::$err"; # Convert to strerror() output 2247 my $strerror = "$!"; 2248 if ($strerror =~ /\P{ASCII}/) { 2249 $ok21 = 0; 2250 debug(disp_str("non-ASCII strerror=$strerror")); 2251 last; 2252 } 2253 } 2254 } 2255 2256 report_result($Locale, ++$locales_test_number, $ok1); 2257 $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results'; 2258 my $first_a_test = $locales_test_number; 2259 2260 debug "$first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n"; 2261 2262 report_result($Locale, ++$locales_test_number, $ok2); 2263 $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results'; 2264 2265 my $first_c_test = $locales_test_number; 2266 2267 $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant'; 2268 report_result($Locale, $locales_test_number, $ok3); 2269 $problematical_tests{$locales_test_number} = 1; 2270 2271 $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar'; 2272 report_result($Locale, $locales_test_number, $ok4); 2273 $problematical_tests{$locales_test_number} = 1; 2274 2275 report_result($Locale, ++$locales_test_number, $ok5); 2276 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf'; 2277 $problematical_tests{$locales_test_number} = 1; 2278 2279 debug "$first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n"; 2280 2281 report_result($Locale, ++$locales_test_number, $ok6); 2282 $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block'; 2283 my $first_e_test = $locales_test_number; 2284 2285 report_result($Locale, ++$locales_test_number, $ok7); 2286 $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale'; 2287 2288 $test_names{++$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale'; 2289 report_result($Locale, $locales_test_number, $ok8); 2290 $problematical_tests{$locales_test_number} = 1; 2291 2292 debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n"; 2293 2294 report_result($Locale, ++$locales_test_number, $ok9); 2295 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant'; 2296 $problematical_tests{$locales_test_number} = 1; 2297 my $first_f_test = $locales_test_number; 2298 2299 report_result($Locale, ++$locales_test_number, $ok10); 2300 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar'; 2301 $problematical_tests{$locales_test_number} = 1; 2302 2303 $test_names{++$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf'; 2304 report_result($Locale, $locales_test_number, $ok11); 2305 $problematical_tests{$locales_test_number} = 1; 2306 2307 report_result($Locale, ++$locales_test_number, $ok12); 2308 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric'; 2309 $problematical_tests{$locales_test_number} = 1; 2310 2311 report_result($Locale, ++$locales_test_number, $ok13); 2312 $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot'; 2313 $problematical_tests{$locales_test_number} = 1; 2314 2315 report_result($Locale, ++$locales_test_number, $ok14); 2316 $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8'; 2317 2318 report_result($Locale, ++$locales_test_number, $ok14_5); 2319 $test_names{$locales_test_number} = '... and are ASCII outside "use locale"'; 2320 2321 report_result($Locale, ++$locales_test_number, $ok15); 2322 $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification'; 2323 $problematical_tests{$locales_test_number} = 1; 2324 2325 report_result($Locale, ++$locales_test_number, $ok16); 2326 $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8'; 2327 $problematical_tests{$locales_test_number} = 1; 2328 2329 report_result($Locale, ++$locales_test_number, $ok17); 2330 $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix'; 2331 2332 report_result($Locale, ++$locales_test_number, $ok18); 2333 $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix'; 2334 $problematical_tests{$locales_test_number} = 1; 2335 2336 report_result($Locale, ++$locales_test_number, $ok19); 2337 $test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty'; 2338 2339 report_result($Locale, ++$locales_test_number, $ok20); 2340 $test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set'; 2341 $problematical_tests{$locales_test_number} = 1; # This is broken in 2342 # OS X 10.9.3 2343 2344 report_result($Locale, ++$locales_test_number, $ok21); 2345 $test_names{$locales_test_number} = '"$!" is ASCII-only outside of locale scope'; 2346 2347 debug "$first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n"; 2348 2349 # Does taking lc separately differ from taking 2350 # the lc "in-line"? (This was the bug 19990704.002 (#965), change #3568.) 2351 # The bug was in the caching of the 'o'-magic. 2352 if (! $is_utf8_locale) { 2353 use locale; 2354 2355 sub lcA { 2356 my $lc0 = lc $_[0]; 2357 my $lc1 = lc $_[1]; 2358 return $lc0 cmp $lc1; 2359 } 2360 2361 sub lcB { 2362 return lc($_[0]) cmp lc($_[1]); 2363 } 2364 2365 my $x = "ab"; 2366 my $y = "aa"; 2367 my $z = "AB"; 2368 2369 report_result($Locale, ++$locales_test_number, 2370 lcA($x, $y) == 1 && lcB($x, $y) == 1 || 2371 lcA($x, $z) == 0 && lcB($x, $z) == 0); 2372 } 2373 else { 2374 use locale ':not_characters'; 2375 2376 sub lcC { 2377 my $lc0 = lc $_[0]; 2378 my $lc1 = lc $_[1]; 2379 return $lc0 cmp $lc1; 2380 } 2381 2382 sub lcD { 2383 return lc($_[0]) cmp lc($_[1]); 2384 } 2385 2386 my $x = "ab"; 2387 my $y = "aa"; 2388 my $z = "AB"; 2389 2390 report_result($Locale, ++$locales_test_number, 2391 lcC($x, $y) == 1 && lcD($x, $y) == 1 || 2392 lcC($x, $z) == 0 && lcD($x, $z) == 0); 2393 } 2394 $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp'; 2395 2396 # Does lc of an UPPER (if different from the UPPER) match 2397 # case-insensitively the UPPER, and does the UPPER match 2398 # case-insensitively the lc of the UPPER. And vice versa. 2399 { 2400 use locale; 2401 no utf8; 2402 my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/; 2403 2404 my @f = (); 2405 ++$locales_test_number; 2406 $test_names{$locales_test_number} = 'Verify case insensitive matching works'; 2407 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) { 2408 if (! $is_utf8_locale) { 2409 my $y = lc $x; 2410 next unless uc $y eq $x; 2411 debug_more( "UPPER=", disp_chars(($x)), 2412 "; lc=", disp_chars(($y)), "; ", 2413 "; fc=", disp_chars((fc $x)), "; ", 2414 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", 2415 $x =~ /\Q$y/i ? 1 : 0, 2416 "; ", 2417 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", 2418 $y =~ /\Q$x/i ? 1 : 0, 2419 "\n"); 2420 # 2421 # If $x and $y contain regular expression characters 2422 # AND THEY lowercase (/i) to regular expression characters, 2423 # regcomp() will be mightily confused. No, the \Q doesn't 2424 # help here (maybe regex engine internal lowercasing 2425 # is done after the \Q?) An example of this happening is 2426 # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS): 2427 # the chr(173) (the "[") is the lowercase of the chr(235). 2428 # 2429 # Similarly losing EBCDIC locales include cs_cz, cs_CZ, 2430 # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!), 2431 # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT, 2432 # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037, 2433 # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU, 2434 # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR. 2435 # 2436 # Similar things can happen even under (bastardised) 2437 # non-EBCDIC locales: in many European countries before the 2438 # advent of ISO 8859-x nationally customised versions of 2439 # ISO 646 were devised, reusing certain punctuation 2440 # characters for modified characters needed by the 2441 # country/language. For example, the "|" might have 2442 # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS. 2443 # 2444 if ($x =~ $re || $y =~ $re) { 2445 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; 2446 next; 2447 } 2448 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; 2449 2450 # fc is not a locale concept, so Perl uses lc for it. 2451 push @f, $x unless lc $x eq fc $x; 2452 } 2453 else { 2454 use locale ':not_characters'; 2455 my $y = lc $x; 2456 next unless uc $y eq $x; 2457 debug_more( "UPPER=", disp_chars(($x)), 2458 "; lc=", disp_chars(($y)), "; ", 2459 "; fc=", disp_chars((fc $x)), "; ", 2460 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", 2461 $x =~ /\Q$y/i ? 1 : 0, 2462 "; ", 2463 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", 2464 $y =~ /\Q$x/i ? 1 : 0, 2465 "\n"); 2466 2467 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; 2468 2469 # The places where Unicode's lc is different from fc are 2470 # skipped here by virtue of the 'next unless uc...' line above 2471 push @f, $x unless lc $x eq fc $x; 2472 } 2473 } 2474 2475 foreach my $x (sort { ord $a <=> ord $b } keys %lower) { 2476 if (! $is_utf8_locale) { 2477 my $y = uc $x; 2478 next unless lc $y eq $x; 2479 debug_more( "lower=", disp_chars(($x)), 2480 "; uc=", disp_chars(($y)), "; ", 2481 "; fc=", disp_chars((fc $x)), "; ", 2482 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", 2483 $x =~ /\Q$y/i ? 1 : 0, 2484 "; ", 2485 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", 2486 $y =~ /\Q$x/i ? 1 : 0, 2487 "\n"); 2488 if ($x =~ $re || $y =~ $re) { # See above. 2489 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; 2490 next; 2491 } 2492 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; 2493 2494 push @f, $x unless lc $x eq fc $x; 2495 } 2496 else { 2497 use locale ':not_characters'; 2498 my $y = uc $x; 2499 next unless lc $y eq $x; 2500 debug_more( "lower=", disp_chars(($x)), 2501 "; uc=", disp_chars(($y)), "; ", 2502 "; fc=", disp_chars((fc $x)), "; ", 2503 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", 2504 $x =~ /\Q$y/i ? 1 : 0, 2505 "; ", 2506 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", 2507 $y =~ /\Q$x/i ? 1 : 0, 2508 "\n"); 2509 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; 2510 2511 push @f, $x unless lc $x eq fc $x; 2512 } 2513 } 2514 report_multi_result($Locale, $locales_test_number, \@f); 2515 $problematical_tests{$locales_test_number} = 1; 2516 } 2517 2518 # [perl #109318] 2519 { 2520 my @f = (); 2521 ++$locales_test_number; 2522 $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent'; 2523 $problematical_tests{$locales_test_number} = 1; 2524 2525 my $radix = langinfo(RADIXCHAR); 2526 my @nums = ( 2527 "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9", 2528 "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9", 2529 ); 2530 2531 if (! $is_utf8_locale) { 2532 use locale; 2533 for my $num (@nums) { 2534 push @f, $num 2535 unless sprintf("%g", $num) =~ /3.+14/; 2536 } 2537 } 2538 else { 2539 use locale ':not_characters'; 2540 for my $num (@nums) { 2541 push @f, $num 2542 unless sprintf("%g", $num) =~ /3.+14/; 2543 } 2544 } 2545 2546 report_result($Locale, $locales_test_number, @f == 0); 2547 if (@f) { 2548 print "# failed $locales_test_number locale '$Locale' numbers @f\n" 2549 } 2550 } 2551 2552 { 2553 my @f = (); 2554 ++$locales_test_number; 2555 $test_names{$locales_test_number} = 2556 'Verify ALT_DIGITS returns nothing, or else non-ASCII and' 2557 . ' the single char digits evaluate to consecutive integers' 2558 . ' starting at 0; 0 is accepted for alt-0 for locales without' 2559 . ' a zero'; 2560 2561 my $alts = langinfo(ALT_DIGITS); 2562 if ($alts) { 2563 my @alts = split ';', $alts; 2564 my $prev = -1; 2565 foreach my $num (@alts) { 2566 if ($num =~ /[[:ascii:]]/) { 2567 if ($prev != -1 || $num != 0) { 2568 push @f, disp_str($num); 2569 last; 2570 } 2571 } 2572 2573 # We only look at single character strings; likely locales 2574 # that have alternate digits have a different mechanism for 2575 # representing larger numbers. Japanese for example, has a 2576 # single character for the number 10, which is prefixed to the 2577 # '1' symbol for '11', etc. And 21 is represented by 3 2578 # characters, the '2' symbol, followed by the '10' symbol, 2579 # then the '1' symbol. (There is nothing to say that a locale 2580 # even has to use base 10.) 2581 last if length $num > 1; 2582 2583 use Unicode::UCD 'num'; 2584 my $value = num($num); 2585 if ($value != $prev + 1) { 2586 push @f, disp_str($num); 2587 last; 2588 } 2589 2590 $prev = $value; 2591 } 2592 } 2593 2594 report_result($Locale, $locales_test_number, @f == 0); 2595 if (@f) { 2596 print "# failed $locales_test_number locale '$Locale' numbers @f\n" 2597 } 2598 } 2599} 2600 2601my $final_locales_test_number = $locales_test_number; 2602 2603# Recount the errors. 2604 2605TEST_NUM: 2606foreach $test_num ($first_locales_test_number..$final_locales_test_number) { 2607 my $has_non_global_failure = $Problem{$test_num} 2608 || ! defined $Okay{$test_num} 2609 || ! @{$Okay{$test_num}}; 2610 print "not " if $has_non_global_failure; 2611 print "ok $test_num"; 2612 $test_names{$test_num} = "" unless defined $test_names{$test_num}; 2613 2614 # If TODO is in the test name, make it thus 2615 my $todo = $test_names{$test_num} =~ s/\s*TODO\s*//; 2616 print " $test_names{$test_num}"; 2617 if ($todo) { 2618 print " # TODO\n"; 2619 } 2620 elsif (! $has_non_global_failure) { 2621 print "\n"; 2622 } 2623 elsif ($has_non_global_failure) { 2624 2625 # If there are any locales that pass this test, or are known-bad, it 2626 # may be that there are enough passes that we TODO the failure, but 2627 # only for tests that we have decided can be problematical. 2628 if ( ($Okay{$test_num} || $Known_bad_locale{$test_num}) 2629 && grep { $_ == $test_num } keys %problematical_tests) 2630 { 2631 # Don't count the known-bad failures when calculating the 2632 # percentage that fail. 2633 my $known_failures = (exists $Known_bad_locale{$test_num}) 2634 ? scalar(keys $Known_bad_locale{$test_num}->%*) 2635 : 0; 2636 my $adjusted_failures = scalar(keys $Problem{$test_num}->%*) 2637 - $known_failures; 2638 2639 # Specially handle failures where only known-bad locales fail. 2640 # This makes the diagnositics clearer. 2641 if ($adjusted_failures <= 0) { 2642 print " # TODO fails only on known bad locales: ", 2643 join " ", keys $Known_bad_locale{$test_num}->%*, "\n"; 2644 next TEST_NUM; 2645 } 2646 2647 # Round to nearest .1% 2648 my $percent_fail = (int(.5 + (1000 * $adjusted_failures 2649 / scalar(@Locale)))) 2650 / 10; 2651 $todo = $percent_fail < $acceptable_failure_percentage; 2652 print " # TODO" if $todo; 2653 print "\n"; 2654 2655 if ($debug) { 2656 print "# $percent_fail% of locales (", 2657 scalar(keys $Problem{$test_num}->%*), 2658 " of ", 2659 scalar(@Locale), 2660 ") fail the above test (TODO cut-off is ", 2661 $acceptable_failure_percentage, 2662 "%)\n"; 2663 } 2664 elsif ($todo) { 2665 print "# ", 100 - $percent_fail, "% of locales not known to be problematic on this platform\n"; 2666 print "# pass the above test, so it is likely that the failures\n"; 2667 print "# are errors in the locale definitions. The test is marked TODO, as the\n"; 2668 print "# problem is not likely to be Perl's\n"; 2669 } 2670 } 2671 2672 if ($debug) { 2673 print "# The code points that had this failure are given above. Look for lines\n"; 2674 print "# that match 'failed $test_num'\n"; 2675 } 2676 else { 2677 print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; 2678 print "# Then look at that output for lines that match 'failed $test_num'\n"; 2679 } 2680 if (defined $not_necessarily_a_problem_test_number 2681 && $test_num == $not_necessarily_a_problem_test_number) 2682 { 2683 print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n"; 2684 print "# It usually indicates a problem in the environment,\n"; 2685 print "# not in Perl itself.\n"; 2686 } 2687 } 2688} 2689 2690$test_num = $final_locales_test_number; 2691 2692if ( ! defined $Config{d_setlocale_accepts_any_locale_name}) { 2693 # perl #115808 2694 use warnings; 2695 my $warned = 0; 2696 local $SIG{__WARN__} = sub { 2697 $warned = $_[0] =~ /uninitialized/; 2698 }; 2699 my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy"); 2700 ok($warned, "variable set to setlocale(\"invalid locale name\") is considered uninitialized"); 2701} 2702 2703# Test that tainting and case changing works on utf8 strings. These tests are 2704# placed last to avoid disturbing the hard-coded test numbers that existed at 2705# the time these were added above this in this file. 2706# This also tests that locale overrides unicode_strings in the same scope for 2707# non-utf8 strings. 2708setlocale(&POSIX::LC_ALL, "C"); 2709{ 2710 use locale; 2711 use feature 'unicode_strings'; 2712 2713 foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") { 2714 my @list; # List of code points to test for $function 2715 2716 # Used to calculate the changed case for ASCII characters by using the 2717 # ord, instead of using one of the functions under test. 2718 my $ascii_case_change_delta; 2719 my $above_latin1_case_change_delta; # Same for the specific ords > 255 2720 # that we use 2721 2722 # We test an ASCII character, which should change case; 2723 # a Latin1 character, which shouldn't change case under this C locale, 2724 # an above-Latin1 character that when the case is changed would cross 2725 # the 255/256 boundary, so doesn't change case 2726 # (the \x{149} is one of these, but changes into 2 characters, the 2727 # first one of which doesn't cross the boundary. 2728 # the final one in each list is an above-Latin1 character whose case 2729 # does change. The code below uses its position in its list as a 2730 # marker to indicate that it, unlike the other code points above 2731 # ASCII, has a successful case change 2732 # 2733 # All casing operations under locale (but not :not_characters) should 2734 # taint 2735 if ($function =~ /^u/) { 2736 @list = ("", "a", 2737 chr(utf8::unicode_to_native(0xe0)), 2738 chr(utf8::unicode_to_native(0xff)), 2739 "\x{fb00}", "\x{149}", "\x{101}"); 2740 $ascii_case_change_delta = ($is_ebcdic) ? +64 : -32; 2741 $above_latin1_case_change_delta = -1; 2742 } 2743 else { 2744 @list = ("", "A", 2745 chr(utf8::unicode_to_native(0xC0)), 2746 "\x{17F}", "\x{100}"); 2747 $ascii_case_change_delta = ($is_ebcdic) ? -64 : +32; 2748 $above_latin1_case_change_delta = +1; 2749 } 2750 foreach my $is_utf8_locale (0 .. 1) { 2751 foreach my $j (0 .. $#list) { 2752 my $char = $list[$j]; 2753 2754 for my $encoded_in_utf8 (0 .. 1) { 2755 my $should_be; 2756 my $changed; 2757 if (! $is_utf8_locale) { 2758 no warnings 'locale'; 2759 $should_be = ($j == $#list) 2760 ? chr(ord($char) + $above_latin1_case_change_delta) 2761 : (length $char == 0 || utf8::native_to_unicode(ord($char)) > 127) 2762 ? $char 2763 : chr(ord($char) + $ascii_case_change_delta); 2764 2765 # This monstrosity is in order to avoid using an eval, 2766 # which might perturb the results 2767 $changed = ($function eq "uc") 2768 ? uc($char) 2769 : ($function eq "ucfirst") 2770 ? ucfirst($char) 2771 : ($function eq "lc") 2772 ? lc($char) 2773 : ($function eq "lcfirst") 2774 ? lcfirst($char) 2775 : ($function eq "fc") 2776 ? fc($char) 2777 : die("Unexpected function \"$function\""); 2778 } 2779 else { 2780 { 2781 no locale; 2782 2783 # For utf8-locales the case changing functions 2784 # should work just like they do outside of locale. 2785 # Can use eval here because not testing it when 2786 # not in locale. 2787 $should_be = eval "$function('$char')"; 2788 die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@; 2789 2790 } 2791 use locale ':not_characters'; 2792 $changed = ($function eq "uc") 2793 ? uc($char) 2794 : ($function eq "ucfirst") 2795 ? ucfirst($char) 2796 : ($function eq "lc") 2797 ? lc($char) 2798 : ($function eq "lcfirst") 2799 ? lcfirst($char) 2800 : ($function eq "fc") 2801 ? fc($char) 2802 : die("Unexpected function \"$function\""); 2803 } 2804 ok($changed eq $should_be, 2805 "$function(\"$char\") in C locale " 2806 . (($is_utf8_locale) 2807 ? "(use locale ':not_characters'" 2808 : "(use locale") 2809 . (($encoded_in_utf8) 2810 ? "; encoded in utf8)" 2811 : "; not encoded in utf8)") 2812 . " should be \"$should_be\", got \"$changed\""); 2813 2814 # Tainting shouldn't happen for use locale :not_character 2815 # (a utf8 locale) 2816 (! $is_utf8_locale) 2817 ? check_taint($changed) 2818 : check_taint_not($changed); 2819 2820 # Use UTF-8 next time through the loop 2821 utf8::upgrade($char); 2822 } 2823 } 2824 } 2825 } 2826} 2827 2828# Give final advice. 2829 2830my $didwarn = 0; 2831 2832if (%setlocale_failed) { 2833 print "#\nsetlocale() failed for these locales:\n"; 2834 for my $locale (keys %setlocale_failed) { 2835 print "#\t$locale\n"; 2836 } 2837 print "#\n"; 2838 $didwarn = 1; 2839} 2840 2841foreach ($first_locales_test_number..$final_locales_test_number) { 2842 if ($Problem{$_}) { 2843 my @f = sort keys %{ $Problem{$_} }; 2844 2845 # Don't list the failures caused by known-bad locales. 2846 if (exists $known_bad_locales{$os}) { 2847 @f = grep { $_ !~ $known_bad_locales{$os} } @f; 2848 next unless @f; 2849 } 2850 my $f = join(" ", @f); 2851 $f =~ s/(.{50,60}) /$1\n#\t/g; 2852 print 2853 "#\n", 2854 "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n", 2855 "#\t", $f, "\n#\n", 2856 "# on your system may have errors because the locale test $_\n", 2857 "# \"$test_names{$_}\"\n", 2858 "# failed in ", (@f == 1 ? "that locale" : "those locales"), 2859 ".\n"; 2860 print <<EOW; 2861# 2862# If your users are not using these locales you are safe for the moment, 2863# but please report this failure first to perlbug\@perl.org using the 2864# perlbug script (as described in the INSTALL file) so that the exact 2865# details of the failures can be sorted out first and then your operating 2866# system supplier can be alerted about these anomalies. 2867# 2868EOW 2869 $didwarn = 1; 2870 } 2871} 2872 2873# Tell which locales were okay and which were not. 2874 2875if ($didwarn) { 2876 my (@s, @F); 2877 2878 foreach my $l (@Locale) { 2879 my $p = 0; 2880 if ($setlocale_failed{$l}) { 2881 $p++; 2882 } 2883 else { 2884 foreach my $t 2885 ($first_locales_test_number..$final_locales_test_number) 2886 { 2887 $p++ if $Problem{$t}{$l}; 2888 } 2889 } 2890 push @s, $l if $p == 0; 2891 push @F, $l unless $p == 0; 2892 } 2893 2894 if (@s) { 2895 my $s = join(" ", @s); 2896 $s =~ s/(.{50,60}) /$1\n#\t/g; 2897 2898 print 2899 "# The following locales\n#\n", 2900 "#\t", $s, "\n#\n", 2901 "# tested okay.\n#\n", 2902 } else { 2903 print "# None of your locales were fully okay.\n"; 2904 } 2905 2906 if (@F) { 2907 my $F = join(" ", @F); 2908 $F =~ s/(.{50,60}) /$1\n#\t/g; 2909 2910 my $details = ""; 2911 unless ($debug) { 2912 $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; 2913 } 2914 elsif ($debug == 1) { 2915 $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n"; 2916 } 2917 2918 print 2919 "# The following locales\n#\n", 2920 "#\t", $F, "\n#\n", 2921 "# had problems.\n#\n", 2922 $details; 2923 } else { 2924 print "# None of your locales were broken.\n"; 2925 } 2926} 2927 2928if (exists $known_bad_locales{$os} && ! %Known_bad_locale) { 2929 $test_num++; 2930 print "ok $test_num $^O no longer has known bad locales # TODO\n"; 2931} 2932 2933print "1..$test_num\n"; 2934 2935# eof 2936