1use strict; 2use warnings; 3 4# This file tests interactions with locale and threads 5 6BEGIN { 7 $| = 1; 8 9 chdir 't' if -d 't'; 10 require './test.pl'; 11 set_up_inc('../lib'); 12 13 skip_all_without_config('useithreads'); 14 skip_all("Fails on threaded builds on OpenBSD") 15 if ($^O =~ m/^(openbsd)$/); 16 17 require './loc_tools.pl'; 18 19 eval { require POSIX; POSIX->import(qw(errno_h locale_h unistd_h )) }; 20 if ($@) { 21 skip_all("could not load the POSIX module"); # running minitest? 22 } 23} 24 25use Time::HiRes qw(time usleep); 26 27use Devel::Peek; 28$Devel::Peek::pv_limit = 0; $Devel::Peek::pv_limit = 0; 29use Data::Dumper; 30$Data::Dumper::Sortkeys=1; 31$Data::Dumper::Useqq = 1; 32$Data::Dumper::Deepcopy = 1; 33 34my $debug = 0; 35 36my %map_category_name_to_number; 37my %map_category_number_to_name; 38my @valid_categories = valid_locale_categories(); 39foreach my $category (@valid_categories) { 40 my $cat_num = eval "&POSIX::$category"; 41 die "Can't determine ${category}'s number: $@" if $@; 42 43 $map_category_name_to_number{$category} = $cat_num; 44 $map_category_number_to_name{$cat_num} = $category; 45} 46 47my $LC_ALL; 48my $LC_ALL_string; 49if (defined $map_category_name_to_number{LC_ALL}) { 50 $LC_ALL_string = 'LC_ALL'; 51 $LC_ALL = $map_category_name_to_number{LC_ALL}; 52} 53elsif (defined $map_category_name_to_number{LC_CTYPE}) { 54 $LC_ALL_string = 'LC_CTYPE'; 55 $LC_ALL = $map_category_name_to_number{LC_CTYPE}; 56} 57else { 58 skip_all("No LC_ALL nor LC_CTYPE"); 59} 60 61# reset the locale environment 62delete local @ENV{'LANGUAGE', 'LANG', keys %map_category_name_to_number}; 63 64my @locales = find_locales($LC_ALL); 65skip_all("Couldn't find any locales") if @locales == 0; 66 67plan(2); 68 69my ($utf8_locales_ref, $non_utf8_locales_ref) 70 = classify_locales_wrt_utf8ness(\@locales); 71 72my $official_ascii_name = 'ansi_x341968'; 73 74my %lang_code_to_script = ( # ISO 639.2, but without the many codes that 75 # are for latin (but the few western European 76 # ones that are latin1 are included) 77 am => 'amharic', 78 amh => 'amharic', 79 amharic => 'amharic', 80 ar => 'arabic', 81 be => 'cyrillic', 82 bel => 'cyrillic', 83 ben => 'bengali', 84 bn => 'bengali', 85 bg => 'cyrillic', 86 bul => 'cyrillic', 87 bulgarski => 'cyrillic', 88 bulgarian => 'cyrillic', 89 c => $official_ascii_name, 90 cnr => 'cyrillic', 91 de => 'latin_1', 92 deu => 'latin_1', 93 deutsch => 'latin_1', 94 german => 'latin_1', 95 div => 'thaana', 96 dv => 'thaana', 97 dzo => 'tibetan', 98 dz => 'tibetan', 99 el => 'greek', 100 ell => 'greek', 101 ellada => 'greek', 102 en => $official_ascii_name, 103 eng => $official_ascii_name, 104 american => $official_ascii_name, 105 british => $official_ascii_name, 106 es => 'latin_1', 107 fa => 'arabic', 108 fas => 'arabic', 109 flamish => 'latin_1', 110 fra => 'latin_1', 111 fr => 'latin_1', 112 heb => 'hebrew', 113 he => 'hebrew', 114 hi => 'hindi', 115 hin => 'hindi', 116 hy => 'armenian', 117 hye => 'armenian', 118 ita => 'latin_1', 119 it => 'latin_1', 120 ja => 'katakana', 121 jpn => 'katakana', 122 nihongo => 'katakana', 123 japanese => 'katakana', 124 ka => 'georgian', 125 kat => 'georgian', 126 kaz => 'cyrillic', 127 khm => 'khmer', 128 kir => 'cyrillic', 129 kk => 'cyrillic', 130 km => 'khmer', 131 ko => 'hangul', 132 kor => 'hangul', 133 korean => 'hangul', 134 ku => 'arabic', 135 kur => 'arabic', 136 ky => 'cyrillic', 137 latin1 => 'latin_1', 138 lao => 'lao', 139 lo => 'lao', 140 mk => 'cyrillic', 141 mkd => 'cyrillic', 142 macedonian => 'cyrillic', 143 mn => 'cyrillic', 144 mon => 'cyrillic', 145 mya => 'myanmar', 146 my => 'myanmar', 147 ne => 'devanagari', 148 nep => 'devanagari', 149 nld => 'latin_1', 150 nl => 'latin_1', 151 nederlands => 'latin_1', 152 dutch => 'latin_1', 153 por => 'latin_1', 154 posix => $official_ascii_name, 155 ps => 'arabic', 156 pt => 'latin_1', 157 pus => 'arabic', 158 ru => 'cyrillic', 159 russki => 'cyrillic', 160 russian => 'cyrillic', 161 rus => 'cyrillic', 162 sin => 'sinhala', 163 si => 'sinhala', 164 so => 'arabic', 165 som => 'arabic', 166 spa => 'latin_1', 167 sr => 'cyrillic', 168 srp => 'cyrillic', 169 tam => 'tamil', 170 ta => 'tamil', 171 tg => 'cyrillic', 172 tgk => 'cyrillic', 173 tha => 'thai', 174 th => 'thai', 175 thai => 'thai', 176 ti => 'ethiopian', 177 tir => 'ethiopian', 178 uk => 'cyrillic', 179 ukr => 'cyrillic', 180 ur => 'arabic', 181 urd => 'arabic', 182 zgh => 'arabic', 183 zh => 'chinese', 184 zho => 'chinese', 185 ); 186my %codeset_to_script = ( 187 88591 => 'latin_1', 188 88592 => 'latin_2', 189 88593 => 'latin_3', 190 88594 => 'latin_4', 191 88595 => 'cyrillic', 192 88596 => 'arabic', 193 88597 => 'greek', 194 88598 => 'hebrew', 195 88599 => 'latin_5', 196 885910 => 'latin_6', 197 885911 => 'thai', 198 885912 => 'devanagari', 199 885913 => 'latin_7', 200 885914 => 'latin_8', 201 885915 => 'latin_9', 202 885916 => 'latin_10', 203 cp1251 => 'cyrillic', 204 cp1255 => 'hebrew', 205 ); 206 207my %script_priorities = ( # In trying to make the results as distinct as 208 # possible, make the ones closest to Unicode, 209 # and ASCII lowest priority 210 $official_ascii_name => 15, 211 latin_1 => 14, 212 latin_9 => 13, 213 latin_2 => 12, 214 latin_4 => 12, 215 latin_5 => 12, 216 latin_6 => 12, 217 latin_7 => 12, 218 latin_8 => 12, 219 latin_10 => 12, 220 latin => 11, # Unknown latin version 221 ); 222 223my %script_instances; # Keys are scripts, values are how many locales use 224 # this script. 225 226sub analyze_locale_name($) { 227 228 # Takes the input name of a locale and creates (and returns) a hash 229 # containing information about that locale 230 231 my %ret; 232 my $input_locale_name = shift; 233 234 my $old_locale = setlocale(LC_CTYPE); 235 236 # Often a locale has multiple aliases, and the base one is returned 237 # by setlocale() when called with an alias. The base is more likely to 238 # meet the XPG standards than the alias. 239 my $new_locale = setlocale(LC_CTYPE, $input_locale_name); 240 if (! $new_locale) { 241 diag "Unexpectedly can't setlocale(LC_CTYPE, $new_locale);" 242 . " \$!=$!, \$^E=$^E"; 243 return; 244 } 245 246 $ret{locale_name} = $new_locale; 247 248 # XPG standard for locale names: 249 # language[_territory[.codeset]][@modifier] 250 # But, there are instances which violate this, where there is a codeset 251 # without a territory, so instead match: 252 # language[_territory][.codeset][@modifier] 253 $ret{locale_name} =~ / ^ 254 ( .+? ) # language 255 (?: _ ( .+? ) )? # territory 256 (?: \. ( .+? ) )? # codeset 257 (?: \@ ( .+ ) )? # modifier 258 $ 259 /x; 260 261 $ret{language} = $1 // ""; 262 $ret{territory} = $2 // ""; 263 $ret{codeset} = $3 // ""; 264 $ret{modifier} = $4 // ""; 265 266 # Normalize all but 'territory' to lowercase 267 foreach my $key (qw(language codeset modifier)) { 268 $ret{$key} = lc $ret{$key}; 269 } 270 271 # Often, the codeset is omitted from the locale name, but it is still 272 # discoverable (via langinfo() ) for the current locale on many platforms. 273 # We already have switched locales 274 use I18N::Langinfo qw(langinfo CODESET); 275 my $langinfo_codeset = lc langinfo(CODESET); 276 277 # Now can switch back to the locale current on entry to this sub 278 if (! setlocale(LC_CTYPE, $old_locale)) { 279 die "Unexpectedly can't restore locale to $old_locale from" 280 . " $new_locale; \$!=$!, \$^E=$^E"; 281 } 282 283 # Normalize the codesets 284 foreach my $codeset_ref (\$langinfo_codeset, \$ret{codeset}) { 285 $$codeset_ref =~ s/\W//g; 286 $$codeset_ref =~ s/iso8859/8859/g; 287 $$codeset_ref =~ s/\b65001\b/utf8/; # Windows synonym 288 $$codeset_ref =~ s/\b646\b/$official_ascii_name/; 289 $$codeset_ref =~ s/\busascii\b/$official_ascii_name/; 290 } 291 292 # The langinfo codeset, if found, is considered more reliable than the one 293 # in the name. (This is because libc looks into the actual data 294 # definition.) So use it unconditionally when found. But note any 295 # discrepancy as an aid for improving this test. 296 if ($langinfo_codeset) { 297 if ($ret{codeset} && $ret{codeset} ne $langinfo_codeset) { 298 diag "In $ret{locale_name}, codeset from langinfo" 299 . " ($langinfo_codeset) doesn't match codeset in" 300 . " locale_name ($ret{codeset})"; 301 } 302 $ret{codeset} = $langinfo_codeset; 303 } 304 305 $ret{is_utf8} = 0 + ($ret{codeset} eq 'utf8'); 306 307 # If the '@' modifier is a known script, use it as the script. 308 if ( $ret{modifier} 309 and grep { $_ eq $ret{modifier} } values %lang_code_to_script) 310 { 311 $ret{script} = $ret{nominal_script} = $ret{modifier}; 312 $ret{modifier} = ""; 313 } 314 elsif ($ret{codeset} && ! $ret{is_utf8}) { 315 316 # The codeset determines the script being used, except if we don't 317 # have the codeset, or it is UTF-8 (which covers a multitude of 318 # scripts). 319 # 320 # We have hard-coded the scripts corresponding to a few of these 321 # non-UTF-8 codesets. See if this is one of them. 322 $ret{script} = $codeset_to_script{$ret{codeset}}; 323 if ($ret{script}) { 324 325 # For these, the script is likely a combination of ASCII (from 326 # 0-127), and the script from (128-255). Reflect that in the name 327 # used (for distinguishing below) 328 $ret{script} .= '_' . $official_ascii_name; 329 } 330 elsif ($ret{codeset} =~ /^koi/) { # Another common set. 331 $ret{script} = "cyrillic_${official_ascii_name}"; 332 } 333 else { # Here the codeset name is unknown to us. Just assume it 334 # means a whole new script. Add the language at the end of 335 # the name to further make it distinct 336 $ret{script} = $ret{codeset}; 337 $ret{script} .= "_$ret{language}" 338 if $ret{codeset} !~ /$official_ascii_name/; 339 } 340 } 341 else { # Here, the codeset is unknown or is UTF-8. 342 343 # In these cases look up the script based on the language. The table 344 # is meant to be pretty complete, but omits the many scripts that are 345 # ASCII or Latin1. And it omits the fullnames of languages whose 346 # scripts are themselves. The grep below catches those. Defaulting 347 # to Latin means that a non-standard language name is considered to be 348 # latin -- maybe not the best outcome but what else is better? 349 $ret{script} = $lang_code_to_script{$ret{language}}; 350 if (! $ret{script}) { 351 $ret{script} = (grep { $ret{language} eq $_ } 352 values %lang_code_to_script) 353 ? $ret{language} 354 : 'latin'; 355 } 356 } 357 358 # If we have @euro, and the script is ASCII or latin or latin1, change it 359 # into latin9, which is closer to what is going on. latin9 has a few 360 # other differences from latin1, but it's not worth creating a whole new 361 # script type that differs only in the currency symbol. 362 if ( ($ret{modifier} && $ret{modifier} eq 'euro') 363 && $ret{script} =~ / ^ ($official_ascii_name | latin (_1)? ) $ /x) 364 { 365 $ret{script} = 'latin_9'; 366 } 367 368 # Look up the priority of this script. All the non-listed ones have 369 # highest (0 or 1) priority. We arbitrarily make the ones higher 370 # priority (0) that aren't known to be half-ascii, simply because they 371 # might be entirely different than most locales. 372 $ret{priority} = $script_priorities{$ret{script}}; 373 if (! $ret{priority}) { 374 $ret{priority} = ( $ret{script} ne $official_ascii_name 375 && $ret{script} =~ $official_ascii_name) 376 ? 0 377 : 1; 378 } 379 380 # Script names have been set up so that anything after an underscore is a 381 # modifier of the main script. We keep a counter of which occurence of 382 # this script this is. This is used along with the priority to order the 383 # locales so that the characters are as varied as possible. 384 my $script_root = ($ret{script} =~ s/_.*//r) . "_$ret{is_utf8}"; 385 $ret{script_instance} = $script_instances{$script_root}++; 386 387 return \%ret; 388} 389 390# Prioritize locales that are most unlike the standard C/Latin1-ish ones. 391# This is to minimize getting passes for tests on a category merely because 392# they share many of the same characteristics as the locale of another 393# category simultaneously in effect. 394sub sort_locales () 395{ 396 my $cmp = $a->{script_instance} <=> $b->{script_instance}; 397 return $cmp if $cmp; 398 399 $cmp = $a->{priority} <=> $b->{priority}; 400 return $cmp if $cmp; 401 402 $cmp = $a->{script} cmp $b->{script}; 403 return $cmp if $cmp; 404 405 $cmp = $a->{modifier} cmp $b->{modifier}; 406 return $cmp if $cmp; 407 408 $cmp = $a->{codeset} cmp $b->{codeset}; 409 return $cmp if $cmp; 410 411 $cmp = $a->{territory} cmp $b->{territory}; 412 return $cmp if $cmp; 413 414 return lc $a cmp lc $b; 415} 416 417# Find out extra info about each locale 418my @cleaned_up_locales; 419for my $locale (@locales) { 420 my $locale_struct = analyze_locale_name($locale); 421 422 next unless $locale_struct; 423 424 my $name = $locale_struct->{locale_name}; 425 next if grep { $name eq $_->{locale_name} } @cleaned_up_locales; 426 427 push @cleaned_up_locales, $locale_struct; 428} 429 430@locales = @cleaned_up_locales; 431 432# Without a proper codeset, we can't really know how to test. This should 433# only happen on platforms that lack the ability to determine the codeset. 434@locales = grep { $_->{codeset} ne "" } @locales; 435 436# Sort into priority order. 437@locales = sort sort_locales @locales; 438 439# First test 440SKIP: { # perl #127708 441 my $locale = $locales[0]; 442 skip("No valid locale to test with", 1) if $locale->{codeset} eq 443 $official_ascii_name; 444 local $ENV{LC_MESSAGES} = $locale->{locale_name}; 445 446 # We're going to try with all possible error numbers on this platform 447 my $error_count = keys(%!) + 1; 448 449 print fresh_perl(" 450 use threads; 451 use strict; 452 use warnings; 453 use Time::HiRes qw(usleep); 454 455 my \$errnum = 1; 456 457 my \@threads = map +threads->create(sub { 458 usleep 0.1; 459 'threads'->yield(); 460 461 for (1..5_000) { 462 \$errnum = (\$errnum + 1) % $error_count; 463 \$! = \$errnum; 464 465 # no-op to trigger stringification 466 next if \"\$!\" eq \"\"; 467 } 468 }), (0..1); 469 \$_->join for splice \@threads;", 470 {} 471 ); 472 473 pass("Didn't segfault"); 474} 475 476# Second test setup 477my %locale_name_to_object; 478for my $locale (@locales) { 479 $locale_name_to_object{$locale->{locale_name}} = $locale; 480} 481 482sub sort_by_hashed_locale { 483 local $a = $locale_name_to_object{$a}; 484 local $b = $locale_name_to_object{$b}; 485 486 return sort_locales; 487} 488 489sub min { 490 my ($a, $b) = @_; 491 return $a if $a <= $b; 492 return $b; 493} 494 495# Smokes have shown this to be about the maximum numbers some platforms can 496# handle. khw has tried 500 threads/1000 iterations on Linux 497my $thread_count = 15; 498my $iterations = 100; 499 500my $alarm_clock = (1 * 10 * 60); # A long time, just to prevent hanging 501 502# Chunk the iterations, so that every so often the test comes up for air. 503my $iterations_per_test_set = min(30, int($iterations / 5)); 504$iterations_per_test_set = 1 if $iterations_per_test_set == 0; 505 506# Sometimes the test calls setlocale() for each individual locale category. 507# But every this many threads, it will be called just once, using LC_ALL to 508# specify the categories. This way both setting individual categories and 509# LC_ALL get tested. But skip this nicety on platforms where we are restricted from 510# using all the available categories, as it would make the code more complex 511# for not that much gain. 512my @platform_categories = platform_locale_categories(); 513my $lc_all_frequency = scalar @platform_categories == scalar @valid_categories 514 ? 3 515 : -1; 516 517# To avoid things getting too big; skip tests whose results are larger than 518# this many characters. 519my $max_result_length = 10000; 520 521# Estimate as to how long in seconds to allow a thread to be ready to roll 522# after creation, so as to try to get all the threads to start as 523# simultaneously as possible 524my $per_thread_startup = .18; 525 526# For use in experimentally tuning the above value 527my $die_on_negative_sleep = 1; 528 529# We don't need to test every possible errno, but you could change this to do 530# so by setting it to negative 531my $max_message_catalog_entries = 10; 532 533# December 18, 1987 534my $strftime_args = "'%c', 0, 0, , 12, 18, 11, 87"; 535 536my %distincts; # The distinct 'operation => result' cases 537my %op_counts; # So we can bail early if more test cases than threads 538my $separator = '____'; # The operation and result are often melded into a 539 # string separated by this. 540 541sub pack_op_result($$) { 542 my ($op, $result) = @_; 543 return $op . $separator 544 . (0 + utf8::is_utf8($op)) . $separator 545 . $result . $separator 546 . (0 + utf8::is_utf8($result)); 547} 548 549sub fixup_utf8ness($$) { 550 my ($operand, $utf8ness) = @_; 551 552 # Make sure $operand is encoded properly 553 554 if ($utf8ness + 0 != 0 + utf8::is_utf8($$operand)) { 555 if ($utf8ness) { 556 utf8::upgrade($$operand); 557 } 558 else { 559 utf8::downgrade($$operand); 560 } 561 } 562} 563 564sub unpack_op_result($) { 565 my $op_result = shift; 566 567 my ($op, $op_utf8ness, $result, $result_utf8ness) = 568 split $separator, $op_result; 569 fixup_utf8ness(\$op, $op_utf8ness); 570 fixup_utf8ness(\$result, $result_utf8ness); 571 572 return ($op, $result); 573} 574 575sub add_trials($$;$) 576{ 577 # Add a test case for category $1. 578 # $2 is the test case operation to perform 579 # $3 is a constraint, optional. 580 581 my $category_name = shift; 582 my $input_op = shift; # The eval string to perform 583 my $locale_constraint = shift // ""; # If defined, the test will be 584 # created only for locales that 585 # match this 586 LOCALE: 587 foreach my $locale (@locales) { 588 my $locale_name = $locale->{locale_name}; 589 my $op = $input_op; 590 591 # All categories should be set to the same locale to make sure 592 # this test gets the valid results. 593 next unless setlocale($LC_ALL, $locale_name); 594 595 # As of NetBSD 10, it doesn't implement LC_COLLATE, and setting that 596 # category to anything but C or POSIX fails. But setting LC_ALL to 597 # other locales (as we just did) returns success, while leaving 598 # LC_COLLATE untouched. Therefore, also set the category individually 599 # to catch such things. This problem may not be confined to NetBSD. 600 # This also works if the platform lacks LC_ALL. We at least set 601 # LC_CTYPE (via '$LC_ALL' above) besides the category. 602 next unless setlocale($map_category_name_to_number{$category_name}, 603 $locale_name); 604 605 # Use a placeholder if this test requires a particular constraint, 606 # which isn't met in this case. 607 if ($locale_constraint) { 608 if ($locale_constraint eq 'utf8_only') { 609 next if ! $locale->{is_utf8}; 610 } 611 elsif ($locale_constraint eq 'a<b') { 612 my $result = eval "use locale; 'a' lt 'B'"; 613 die "$category_name: '$op (a lt B)': $@" if $@; 614 next unless $result; 615 } 616 else { 617 die "Only accepted locale constraints are 'utf8_only' and 'a<b'" 618 } 619 } 620 621 # Calculate what the expected value of the test should be. We're 622 # doing this here in the main thread and with all the locales set to 623 # be the same thing. The test will be that we should get this value 624 # under stress, with each thread using different locales for each 625 # category, and multiple threads simultaneously executing with 626 # disparate locales 627 my $eval_string = ($op) ? "use locale; $op;" : ""; 628 my $result = eval $eval_string; 629 die "$category_name: '$op': $@" if $@; 630 if (! defined $result) { 631 if ($debug) { 632 print STDERR __FILE__, ": ", __LINE__, 633 ": Undefined result for $locale_name", 634 " $category_name: '$op'\n"; 635 } 636 next; 637 } 638 elsif ($debug > 1) { 639 print STDERR "\n", __FILE__, ": ", __LINE__, ": $category_name:", 640 " $locale_name: Op = ", Dumper($op), "; Returned "; 641 Dump $result; 642 } 643 if (length $result > $max_result_length) { 644 diag("For $locale_name, '$op', result is too long; skipped"); 645 next; 646 } 647 648 # It seems best to not include tests with mojibake results, which here 649 # is checked for by two question marks in a row. (strxfrm is excluded 650 # from this restriction, as the result is really binary, so '??' could 651 # and does come up, not meaning mojibake.) A concrete example of this 652 # is in Mingw the locale Yi_China.1252. CP 1252 is for a Latin 653 # script; just about anything from an East Asian script is bound to 654 # fail. It makes no sense to have this locale, but it exists. 655 if ($eval_string !~ /xfrm/ && $result =~ /\?\?/) { 656 if ($debug) { 657 print STDERR __FILE__, ": ", __LINE__, 658 " For $locale_name, op=$op, result has mojibake: $result\n"; 659 } 660 661 next; 662 } 663 664 # Some systems are buggy in that setlocale() gives non-deterministic 665 # results for some locales. Here we try to exclude those from our 666 # test by trying the setlocale this many times to see if it varies: 667 my $deterministic_trial_count = 5; 668 669 # To do this, we set the locale to an 'alternate' locale between 670 # trials. This defeats any attempt by the implementation to skip the 671 # setlocale if it is already in said locale. 672 my $alternate; 673 my @alternate; 674 675 # If possible, the alternate is chosen to be of the opposite UTF8ness, 676 # so as to reset internal states about that. 677 if (! $utf8_locales_ref || ! $utf8_locales_ref->@*) { 678 679 # If no UTF-8 locales, must choose one that is non-UTF-8. 680 @alternate = grep { $_ ne $locale_name } $non_utf8_locales_ref->@*; 681 } 682 elsif (! $non_utf8_locales_ref || ! $non_utf8_locales_ref->@*) { 683 684 # If no non-UTF-8 locales, must choose one that is UTF-8. 685 @alternate = grep { $_ ne $locale_name } $utf8_locales_ref->@*; 686 } 687 elsif (grep { $_ eq $locale_name } $utf8_locales_ref->@*) { 688 @alternate = $non_utf8_locales_ref->@*; 689 } 690 else { 691 @alternate = $utf8_locales_ref->@*; 692 } 693 694 # Now do the trials. For each, we choose the next alternate on the 695 # list, rotating the list so the following iteration will choose a 696 # different alternate. 697 for my $i (1 .. $deterministic_trial_count - 1) { 698 my $other = shift @alternate; 699 push @alternate, $other; 700 701 # Run the test on the alternate locale 702 if (! setlocale($LC_ALL, $other)) { 703 if ( $LC_ALL_string eq 'LC_ALL' 704 || ! setlocale($map_category_name_to_number{$category_name}, 705 $other)) 706 { 707 die "Unexpectedly can't set locale to $other:" 708 . " \$!=$!, \$^E=$^E"; 709 } 710 } 711 712 eval $eval_string; 713 714 # Then run it on the one we are hoping to test 715 if (! setlocale($LC_ALL, $locale_name)) { 716 if ( $LC_ALL_string eq 'LC_ALL' 717 || ! setlocale($map_category_name_to_number{$category_name}, 718 $locale_name)) 719 { 720 die "Unexpectedly can't set locale to $locale_name from " 721 . setlocale($LC_ALL) 722 . "; \$!=$!, \$^E=$^E"; 723 } 724 } 725 726 my $got = eval $eval_string; 727 next if $got eq $result 728 && utf8::is_utf8($got) == utf8::is_utf8($result); 729 730 # If the result varied from the expected value, this is a 731 # non-deterministic locale, so, don't test it. 732 diag("For '$eval_string',\nresults in iteration $i differed from" 733 . " the original\ngot"); 734 Dump($got); 735 diag("expected"); 736 Dump($result); 737 next LOCALE; 738 } 739 740 # Here, the setlocale for this locale appears deterministic. Use it. 741 my $op_result = pack_op_result($op, $result); 742 push $distincts{$category_name}{$op_result}{locales}->@*, $locale_name; 743 # No point in looking beyond this if we already have all the tests we 744 # need. Note this assumes that the same op isn't used in two 745 # categories. 746 if (defined $op_counts{$op} && $op_counts{$op} >= $thread_count) 747 { 748 last; 749 } 750 } 751} 752 753use Config; 754 755# Figure out from config how to represent disparate LC_ALL 756my @valid_category_numbers = sort { $a <=> $b } 757 map { $map_category_name_to_number{$_} } @valid_categories; 758 759my $use_name_value_pairs = defined $Config{d_perl_lc_all_uses_name_value_pairs}; 760my $lc_all_separator = ($use_name_value_pairs) 761 ? ";" 762 : $Config{perl_lc_all_separator} =~ s/"//gr; 763my @position_to_category_number; 764if (! $use_name_value_pairs) { 765 my $positions = $Config{perl_lc_all_category_positions_init} =~ s/[{}]//gr; 766 $positions =~ s/,//g; 767 $positions =~ s/^ +//; 768 $positions =~ s/ +$//; 769 @position_to_category_number = split / \s+ /x, $positions 770} 771 772sub get_next_category() { 773 use feature 'state'; 774 state $index; 775 776 # Called to rotate all the legal locale categories 777 778 my $which = ($use_name_value_pairs) 779 ? \@valid_category_numbers 780 : \@position_to_category_number; 781 782 $index = -1 unless defined $index; 783 $index++; 784 785 if (! defined $which->[$index]) { 786 undef $index; 787 return; 788 } 789 790 my $category_number = $which->[$index]; 791 return $category_number if $category_number != $LC_ALL; 792 793 # If this was LC_ALL, the next one won't be 794 return &get_next_category(); 795} 796 797SKIP: { 798 skip("Unsafe locale threads", 1) unless ${^SAFE_LOCALES}; 799 800 # The second test is several threads nearly simulataneously executing 801 # locale-sensitive operations with the categories set to disparate 802 # locales. This catches cases where the results of a given category is 803 # related to what the locale is of another category. (As an example, this 804 # test showed that some platforms require LC_CTYPE to be the same as 805 # LC_COLLATION, and/or LC_MESSAGES for proper results, so that Perl had to 806 # change to bring these into congruence under the hood). And it also 807 # catches where there is interference between multiple threads. 808 # 809 # This test tries to exercise every underlying locale-dependent operation 810 # available in Perl. It doesn't test every use of the operation, but 811 # includes some Perl construct that uses each. For example, it tests lc 812 # but not lcfirst. That would be redundant for this test; it wants to 813 # know if lowercasing works under threads and locales. But if the 814 # implementations were disjoint at the time this test was written, it 815 # would try each implementation. So, various things in the POSIX module 816 # have separate tests from the ones in core. 817 # 818 # For each such underlying locale-dependent operation, a Perl-visible 819 # construct is chosen that uses it. And a typical input or set of inputs 820 # is passed to that and the results are noted for every available locale 821 # on the platform. Many locales will have identical results, so the 822 # duplicates are stored separately. 823 # 824 # There will be N simultaneous threads. Each thread is configured to set 825 # a locale for each category, to run operations whose results depend on 826 # that locale, then check that the result matches the expected value, and 827 # to immediately repeat some largish number of iterations. The goal is to 828 # see if the locales on each thread are truly independent of those on the 829 # other threads. 830 # 831 # To that end, the locales are chosen so that the results differ from 832 # every other locale. Otherwise, the thread results wouldn't be truly 833 # independent. But if there are more threads than there are distinct 834 # results, duplicates are used to fill up what would otherwise be empty 835 # slots. That is the best we can do on those platforms. 836 # 837 # Having lots of locales to continually switch between stresses things so 838 # as to find potential segfaults where locale changing isn't really thread 839 # safe. 840 841 # There is a bug in older Windows runtimes in which locales in CP1252 and 842 # similar code pages whose names aren't entirely ASCII aren't recognized 843 # by later setlocales. Some names that are all ASCII are synonyms for 844 # such names. Weed those out by doing a setlocale of the original name, 845 # and then a setlocale of the resulting one. Discard locales which have 846 # any unacceptable name 847 if (${^O} eq "MSWin32" && $Config{'libc'} !~ /ucrt/) { 848 @locales = grep { 849 my $locale_name = $_->{locale_name}; 850 my $underlying_name = setlocale(&LC_CTYPE, $locale_name); 851 852 # Defeat any attempt to skip the setlocale if the same as current, 853 # by switching to a locale very unlikey to be the current one. 854 setlocale($LC_ALL, "Albanian"); 855 856 defined($underlying_name) && setlocale(&LC_CTYPE, $underlying_name) 857 } @locales; 858 } 859 860 # Create a hash of the errnos: 861 # "1" => "Operation\\ not\\ permitted", 862 # "2" => "No\\ such\\ file\\ or\\ directory", 863 # etc. 864 my %msg_catalog; 865 foreach my $error (sort keys %!) { 866 my $number = eval "Errno::$error"; 867 $! = $number; 868 my $description = "$!"; 869 next unless "$description"; 870 $msg_catalog{$number} = quotemeta "$description"; 871 } 872 873 # Then just the errnos. 874 my @msg_catalog = sort { $a <=> $b } keys %msg_catalog; 875 876 # Remove the excess ones. 877 splice @msg_catalog, $max_message_catalog_entries 878 if $max_message_catalog_entries >= 0; 879 my $msg_catalog = join ',', @msg_catalog; 880 881 eval { my $discard = POSIX::localeconv()->{currency_symbol}; }; 882 my $has_localeconv = $@ eq ""; 883 884 # Now go through and create tests for each locale category on the system. 885 # These tests were determined by grepping through the code base for 886 # locale-sensitive operations, and then figuring out something to exercise 887 # them. 888 foreach my $category (@valid_categories) { 889 no warnings 'uninitialized'; 890 891 next if $category eq 'LC_ALL'; # Tested below as a combination of the 892 # individual categories 893 if ($category eq 'LC_COLLATE') { 894 add_trials('LC_COLLATE', 895 # 'reverse' causes it to be definitely out of order for 896 # the 'sort' to correct 897 'quotemeta join "", sort reverse map { chr } (1..255)'); 898 899 # We pass an re to exclude testing locales that don't necessarily 900 # have a lt b. 901 add_trials('LC_COLLATE', '"a" lt "B"', 'a<b'); 902 add_trials('LC_COLLATE', 'my $a = "a"; my $b = "B";' 903 . ' POSIX::strcoll($a, $b) < 0;', 904 'a<b'); 905 906 # Doesn't include NUL because our memcollxfrm implementation of it 907 # isn't perfect 908 add_trials('LC_COLLATE', 'my $string = quotemeta join "",' 909 . ' map { chr } (1..255);' 910 . ' POSIX::strxfrm($string)'); 911 next; 912 } 913 914 if ($category eq 'LC_CTYPE') { 915 add_trials('LC_CTYPE', 'no warnings "locale"; quotemeta lc' 916 . ' join "" , map { chr } (0..255)'); 917 add_trials('LC_CTYPE', 'no warnings "locale"; quotemeta uc' 918 . ' join "", map { chr } (0..255)'); 919 add_trials('LC_CTYPE', 'no warnings "locale"; quotemeta CORE::fc' 920 . ' join "", map { chr } (0..255)'); 921 add_trials('LC_CTYPE', 'no warnings "locale";' 922 . ' my $string = join "", map { chr } 0..255;' 923 . ' $string =~ s|(.)|$1=~/\d/?1:0|gers'); 924 add_trials('LC_CTYPE', 'no warnings "locale";' 925 . ' my $string = join "", map { chr } 0..255;' 926 . ' $string =~ s|(.)|$1=~/\s/?1:0|gers'); 927 add_trials('LC_CTYPE', 'no warnings "locale";' 928 . ' my $string = join "", map { chr } 0..255;' 929 . ' $string =~ s|(.)|$1=~/\w/?1:0|gers'); 930 add_trials('LC_CTYPE', 'no warnings "locale";' 931 . ' my $string = join "", map { chr } 0..255;' 932 . ' $string =~ s|(.)|$1=~/[[:alpha:]]/?1:0|gers'); 933 add_trials('LC_CTYPE', 'no warnings "locale";' 934 . ' my $string = join "", map { chr } 0..255;' 935 . ' $string =~ s|(.)|$1=~/[[:alnum:]]/?1:0|gers'); 936 add_trials('LC_CTYPE', 'no warnings "locale";' 937 . ' my $string = join "", map { chr } 0..255;' 938 . ' $string =~ s|(.)|$1=~/[[:ascii:]]/?1:0|gers'); 939 add_trials('LC_CTYPE', 'no warnings "locale";' 940 . ' my $string = join "", map { chr } 0..255;' 941 . ' $string =~ s|(.)|$1=~/[[:blank:]]/?1:0|gers'); 942 add_trials('LC_CTYPE', 'no warnings "locale";' 943 . ' my $string = join "", map { chr } 0..255;' 944 . ' $string =~ s|(.)|$1=~/[[:cntrl:]]/?1:0|gers'); 945 add_trials('LC_CTYPE', 'no warnings "locale";' 946 . ' my $string = join "", map { chr } 0..255;' 947 . ' $string =~ s|(.)|$1=~/[[:graph:]]/?1:0|gers'); 948 add_trials('LC_CTYPE', 'no warnings "locale";' 949 . ' my $string = join "", map { chr } 0..255;' 950 . ' $string =~ s|(.)|$1=~/[[:lower:]]/?1:0|gers'); 951 add_trials('LC_CTYPE', 'no warnings "locale";' 952 . ' my $string = join "", map { chr } 0..255;' 953 . ' $string =~ s|(.)|$1=~/[[:print:]]/?1:0|gers'); 954 add_trials('LC_CTYPE', 'no warnings "locale";' 955 . ' my $string = join "", map { chr } 0..255;' 956 . ' $string =~ s|(.)|$1=~/[[:punct:]]/?1:0|gers'); 957 add_trials('LC_CTYPE', 'no warnings "locale";' 958 . ' my $string = join "", map { chr } 0..255;' 959 . ' $string =~ s|(.)|$1=~/[[:upper:]]/?1:0|gers'); 960 add_trials('LC_CTYPE', 'no warnings "locale";' 961 . ' my $string = join "", map { chr } 0..255;' 962 . ' $string =~ s|(.)|$1=~/[[:xdigit:]]/?1:0|gers'); 963 add_trials('LC_CTYPE', 'use I18N::Langinfo qw(langinfo CODESET);' 964 . ' no warnings "uninitialized";' 965 . ' langinfo(CODESET);'); 966 967 # In the multibyte functions, the non-reentrant ones can't be made 968 # thread safe 969 if ($Config{'d_mbrlen'} eq 'define') { 970 add_trials('LC_CTYPE', 'my $string = chr 0x100;' 971 . ' utf8::encode($string);' 972 . ' no warnings "uninitialized";' 973 . ' POSIX::mblen(undef);' 974 . ' POSIX::mblen($string)', 975 'utf8_only'); 976 } 977 if ($Config{'d_mbrtowc'} eq 'define') { 978 add_trials('LC_CTYPE', 'my $value; my $str = "\x{100}";' 979 . ' utf8::encode($str);' 980 . ' no warnings "uninitialized";' 981 . ' POSIX::mbtowc(undef, undef);' 982 . ' POSIX::mbtowc($value, $str); $value;', 983 'utf8_only'); 984 } 985 if ($Config{'d_wcrtomb'} eq 'define') { 986 add_trials('LC_CTYPE', 'my $value;' 987 . ' no warnings "uninitialized";' 988 . ' POSIX::wctomb(undef, undef);' 989 . ' POSIX::wctomb($value, 0xFF);' 990 . ' $value;', 991 'utf8_only'); 992 } 993 994 add_trials('LC_CTYPE', 995 'no warnings "locale";' 996 . ' my $uc = CORE::uc join "", map { chr } (0..255);' 997 . ' my $fc = quotemeta CORE::fc $uc;' 998 . ' $uc =~ / \A $fc \z /xi;'); 999 next; 1000 } 1001 1002 if ($category eq 'LC_MESSAGES') { 1003 add_trials('LC_MESSAGES', 1004 "join \"\n\", map { \$! = \$_; \"\$!\" } ($msg_catalog)"); 1005 add_trials('LC_MESSAGES', 1006 'use I18N::Langinfo qw(langinfo YESSTR NOSTR YESEXPR NOEXPR);' 1007 . ' no warnings "uninitialized";' 1008 . ' join ",",' 1009 . ' map { langinfo($_) } YESSTR, NOSTR, YESEXPR, NOEXPR;'); 1010 next; 1011 } 1012 1013 if ($category eq 'LC_MONETARY') { 1014 if ($has_localeconv) { 1015 add_trials('LC_MONETARY', "localeconv()->{currency_symbol}"); 1016 } 1017 add_trials('LC_MONETARY', 1018 'use I18N::Langinfo qw(langinfo CRNCYSTR);' 1019 . ' no warnings "uninitialized";' 1020 . ' join "|", map { langinfo($_) } CRNCYSTR;'); 1021 next; 1022 } 1023 1024 if ($category eq 'LC_NUMERIC') { 1025 if ($has_localeconv) { 1026 add_trials('LC_NUMERIC', "no warnings; 'uninitialised';" 1027 . " join '|'," 1028 . " localeconv()->{decimal_point}," 1029 . " localeconv()->{thousands_sep}"); 1030 } 1031 add_trials('LC_NUMERIC', 1032 'use I18N::Langinfo qw(langinfo RADIXCHAR THOUSEP);' 1033 . ' no warnings "uninitialized";' 1034 . ' join "|", map { langinfo($_) } RADIXCHAR, THOUSEP;'); 1035 1036 # Use a variable to avoid runtime bugs being hidden by constant 1037 # folding 1038 add_trials('LC_NUMERIC', 'my $in = 4.2; sprintf("%g", $in)'); 1039 next; 1040 } 1041 1042 if ($category eq 'LC_TIME') { 1043 add_trials('LC_TIME', "POSIX::strftime($strftime_args)"); 1044 add_trials('LC_TIME', <<~'END_OF_CODE'); 1045 use I18N::Langinfo qw(langinfo 1046 ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5 ABDAY_6 ABDAY_7 1047 ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6 1048 ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12 1049 DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7 1050 MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 1051 MON_7 MON_8 MON_9 MON_10 MON_11 MON_12 1052 D_FMT D_T_FMT T_FMT); 1053 no warnings "uninitialized"; 1054 join "|", 1055 map { langinfo($_) } 1056 ABDAY_1,ABDAY_2,ABDAY_3,ABDAY_4,ABDAY_5, 1057 ABDAY_6,ABDAY_7, 1058 ABMON_1,ABMON_2,ABMON_3,ABMON_4,ABMON_5, 1059 ABMON_6, ABMON_7,ABMON_8,ABMON_9,ABMON_10, 1060 ABMON_11,ABMON_12, 1061 DAY_1,DAY_2,DAY_3,DAY_4,DAY_5,DAY_6,DAY_7, 1062 MON_1,MON_2,MON_3,MON_4,MON_5,MON_6, MON_7, 1063 MON_8,MON_9,MON_10,MON_11,MON_12, 1064 D_FMT,D_T_FMT,T_FMT; 1065 END_OF_CODE 1066 next; 1067 } 1068 } # End of creating test cases. 1069 1070 1071 # Now analyze the test cases 1072 my %all_tests; 1073 foreach my $category (keys %distincts) { 1074 my %results; 1075 my %distinct_results_count; 1076 1077 # Find just the distinct test operations; sort for repeatibility 1078 my %distinct_ops; 1079 for my $op_result (sort keys $distincts{$category}->%*) { 1080 my ($op, $result) = unpack_op_result($op_result); 1081 1082 $distinct_ops{$op}++; 1083 push $results{$op}->@*, $result; 1084 $distinct_results_count{$result} += 1085 scalar $distincts{$category}{$op_result}{locales}->@*; 1086 } 1087 1088 # And get a sorted list of all the test operations 1089 my @ops = sort keys %distinct_ops; 1090 1091 sub gen_combinations { 1092 1093 # Generate all the non-empty combinations of operations and 1094 # results (for the current category) possible on this platform. 1095 # That is, if a category has N operations, it will generate a list 1096 # of entries. Each entry will itself have N elements, one for 1097 # each operation, and when all the entries are considered 1098 # together, every possible outcome is represented. 1099 1100 my $op_ref = shift; # Reference to list of operations 1101 my $results_ref = shift; # Reference to hash; key is operation; 1102 # value is an array of all possible 1103 # outcomes of this operation. 1104 my $distincts_ref = shift; # Reference to %distincts of this 1105 # category 1106 1107 # Get the first operation on the list 1108 my $op = shift $op_ref->@*; 1109 1110 # The return starts out as a list of hashes of all possible 1111 # outcomes for executing 'op'. Each hash has two keys: 1112 # 'op_results' is an array of one element: 'op => result', 1113 # packed into a string. 1114 # 'locales' is an array of all the locales which have the 1115 # same result for 'op' 1116 my @return; 1117 foreach my $result ($results_ref->{$op}->@*) { 1118 my $op_result = pack_op_result($op, $result); 1119 push @return, { 1120 op_results => [ $op_result ], 1121 locales => $distincts_ref->{$op_result}{locales}, 1122 }; 1123 } 1124 1125 # If this is the final element of the list, we are done. 1126 return (\@return) unless $op_ref->@*; 1127 1128 # Otherwise recurse to generate the combinations for the remainder 1129 # of the list. 1130 my $recurse_return = &gen_combinations($op_ref, 1131 $results_ref, 1132 $distincts_ref); 1133 # Now we have to generate the combinations of the current item 1134 # with the ones returned by the recursion. Each element of the 1135 # current item is combined with each element of the recursed. 1136 my @combined; 1137 foreach my $this (@return) { 1138 my @this_locales = $this->{locales}->@*; 1139 foreach my $recursed ($recurse_return->@*) { 1140 my @recursed_locales = $recursed->{locales}->@*; 1141 1142 # @this_locales is a list of locales this op => result is 1143 # valid for. @recursed_locales is similarly a list of the 1144 # valid ones for the recursed return. Their intersection 1145 # is a list of the locales valid for this combination. 1146 my %seen; 1147 $seen{$_}++ foreach @this_locales, @recursed_locales; 1148 my @intersection = grep $seen{$_} == 2, keys %seen; 1149 1150 # An alternative intersection algorithm: 1151 # my (%set1, %set2); 1152 # @set1{@list1} = (); 1153 # @set2{@list2} = (); 1154 # my @intersection = grep exists $set1{$_}, keys %set2; 1155 1156 # If the intersection is empty, this combination can't 1157 # actually happen on this platform. 1158 next unless @intersection; 1159 1160 # Append the recursed list to the current list to form the 1161 # combined list. 1162 my @combined_result = $this->{op_results}->@*; 1163 push @combined_result, $recursed->{op_results}->@*; 1164 # And create the hash for the combined result, including 1165 # the locales it is valid for 1166 push @combined, { 1167 op_results => \@combined_result, 1168 locales => \@intersection, 1169 }; 1170 } 1171 } 1172 1173 return \@combined; 1174 } # End of gen_combinations() definition 1175 1176 # The result of calling gen_combinations() will be an array of hashes. 1177 # 1178 # The main value in each hash is an array (whose key is 'op_results') 1179 # containing all the tests for this category for a thread. If there 1180 # were N calls to 'add_trial' for this category, there will be 'N' 1181 # elements in the array. Each element is a string packed with the 1182 # operation to eval in a thread and the operation's expected result. 1183 # 1184 # The other data structure in each hash is an array with the key 1185 # 'locales'. That array is a list of every locale which yields the 1186 # identical results in 'op_results'. 1187 # 1188 # Effectively, each hash gives all the tests for this category for a 1189 # thread. The total array of hashes gives the complete list of 1190 # distinct tests possible on this system. So later, a thread will 1191 # pluck the next available one from the array.. 1192 my $combinations_ref = gen_combinations(\@ops, \%results, 1193 $distincts{$category}); 1194 1195 # Fix up the entries ... 1196 foreach my $test ($combinations_ref->@*) { 1197 1198 # Sort the locale names; this makes it work for later comparisons 1199 # to look at just the first element of each list. 1200 $test->{locales}->@* = 1201 sort sort_by_hashed_locale $test->{locales}->@*; 1202 1203 # And for each test, calculate and store how many locales have the 1204 # same result (saves recomputation later in a sort). This adds 1205 # another data structure to each hash in the main array. 1206 my @individual_tests = $test->{op_results}->@*; 1207 my @in_common_locale_counts; 1208 foreach my $this_test (@individual_tests) { 1209 1210 # Each test came from %distincts, and there we have stored the 1211 # list of all locales that yield the same result 1212 push @in_common_locale_counts, 1213 scalar $distincts{$category}{$this_test}{locales}->@*; 1214 } 1215 push $test->{in_common_locale_counts}->@*, @in_common_locale_counts; 1216 } 1217 1218 # Make a copy 1219 my @cat_tests = $combinations_ref->@*; 1220 1221 # This sorts the test cases so that the ones with the least overlap 1222 # with other cases are first. 1223 sub sort_test_order { 1224 my $a_tests_count = scalar $a->{in_common_locale_counts}->@*; 1225 my $b_tests_count = scalar $b->{in_common_locale_counts}->@*; 1226 my $tests_count = min($a_tests_count, $b_tests_count); 1227 1228 # Choose the one that is most distinctive (least overlap); that is 1229 # the one that has the most tests whose results are not shared by 1230 # any other locale. 1231 my $a_nondistincts = 0; 1232 my $b_nondistincts = 0; 1233 for my $i (0 .. $tests_count - 1) { 1234 $a_nondistincts += ($a->{in_common_locale_counts}[$i] != 1); 1235 $b_nondistincts += ($b->{in_common_locale_counts}[$i] != 1); 1236 } 1237 1238 my $cmp = $a_nondistincts <=> $b_nondistincts; 1239 return $cmp if $cmp; 1240 1241 # If they have the same number of those, choose the one with the 1242 # fewest total number of locales that have the same result 1243 my $a_count = 0; 1244 my $b_count = 0; 1245 for my $i (0 .. $tests_count - 1) { 1246 $a_count += $a->{in_common_locale_counts}[$i]; 1247 $b_count += $b->{in_common_locale_counts}[$i]; 1248 } 1249 1250 $cmp = $a_count <=> $b_count; 1251 return $cmp if $cmp; 1252 1253 # If that still doesn't yield a winner, use the general sort order. 1254 local $a = $a->{locales}[0]; 1255 local $b = $b->{locales}[0]; 1256 return sort_by_hashed_locale; 1257 } 1258 1259 # Actually perform the sort. 1260 @cat_tests = sort sort_test_order @cat_tests; 1261 1262 # This category will now have all the distinct tests possible for it 1263 # on this platform, with the first test being the one with the least 1264 # overlap with other test cases 1265 push $all_tests{$category}->@*, @cat_tests; 1266 } # End of loop through the categories creating and sorting the test 1267 # cases 1268 1269 my %thread_already_used_locales; 1270 1271 # Now generate the tests for each thread. 1272 my @tests_by_thread; 1273 for my $i (0 .. $thread_count - 1) { 1274 foreach my $category (sort keys %all_tests) { 1275 my $skipped = 0; # Used below to not loop infinitely 1276 1277 # Get the next test case 1278 NEXT_CANDIDATE: 1279 my $candidate = shift $all_tests{$category}->@*; 1280 1281 my $locale_name = $candidate->{locales}[0]; 1282 1283 # Avoid, if possible, using the same locale name twice (for 1284 # different categories) in the same thread. 1285 if (defined $thread_already_used_locales{$locale_name =~ s/\W.*//r}) 1286 { 1287 # Look through the synonyms of this locale for an 1288 # as-yet-unused one 1289 for my $j (1 .. $candidate->{locales}->@* - 1) { 1290 my $synonym = $candidate->{locales}[$j]; 1291 next if defined $thread_already_used_locales{$synonym =~ 1292 s/\W.*//r}; 1293 $locale_name = $synonym; 1294 goto found_synonym; 1295 } 1296 1297 # Here, no synonym was found. If we haven't cycled through 1298 # all the possible tests, try another (putting this one at the 1299 # end as a last resort in the future). 1300 $skipped++; 1301 if ($skipped < scalar $all_tests{$category}->@*) { 1302 push $all_tests{$category}->@*, $candidate; 1303 goto NEXT_CANDIDATE; 1304 } 1305 1306 # Here no synonym was found, this test has already been used, 1307 # but there are no unused ones, so have to re-use it. 1308 1309 found_synonym: 1310 } 1311 1312 # Here, we have found a test case. The thread needs to know what 1313 # locale to use, 1314 $tests_by_thread[$i]->{$category}{locale_name} = $locale_name; 1315 1316 # And it needs to know each test to run, and the expected result. 1317 my @cases; 1318 for my $j (0 .. $candidate->{op_results}->@* - 1) { 1319 my ($op, $result) = 1320 unpack_op_result($candidate->{op_results}[$j]); 1321 push @cases, { op => $op, expected => $result }; 1322 } 1323 push $tests_by_thread[$i]->{$category}{locale_tests}->@*, @cases; 1324 1325 # Done with this category in this thread. Setup for subsequent 1326 # categories in this thread, and subsequent threads. 1327 # 1328 # It's best to not have two categories in a thread use the same 1329 # locale. Save this locale name so that later iterations handling 1330 # other categories can avoid using it, if possible. 1331 $thread_already_used_locales{$locale_name =~ s/\W.*//r} = 1; 1332 1333 # In pursuit of using as many different locales as possible, the 1334 # first shall be last in line next time, and eventually the last 1335 # shall be first 1336 push $candidate->{locales}->@*, shift $candidate->{locales}->@*; 1337 1338 # Similarly, this test case is added back at the end of the list, 1339 # so will be used only as a last resort in the next thread, and as 1340 # the penultimate resort in the thread following that, etc. as the 1341 # test cases are cycled through. 1342 push $all_tests{$category}->@*, $candidate; 1343 } # End of looping through the categories for this thread 1344 } # End of generating all threads 1345 1346 # Now reformat the tests to a form convenient for the actual test file 1347 # script to use; minimizing the amount of ancillary work it needs to do. 1348 my @cooked_tests; 1349 for my $i (0 .. $#tests_by_thread) { 1350 1351 my $this_tests = $tests_by_thread[$i]; 1352 my @this_cooked_tests; 1353 my (@this_categories, @this_locales); # Parallel arrays 1354 1355 # Every so often we use LC_ALL instead of individual locales, provided 1356 # it is available on the platform 1357 if ( ($i % $lc_all_frequency == $lc_all_frequency - 1) 1358 && $LC_ALL_string eq 'LC_ALL') 1359 { 1360 my $lc_all= ""; 1361 my $category_number; 1362 1363 # Compute the LC_ALL string for the syntax accepted by this 1364 # platform from the locale each category is to be set to. 1365 while (defined($category_number = get_next_category())) { 1366 my $category_name = 1367 $map_category_number_to_name{$category_number}; 1368 my $locale = $this_tests->{$category_name}{locale_name}; 1369 $locale = "C" unless defined $locale; 1370 $category_name =~ s/\@/\\@/g; 1371 1372 $lc_all .= $lc_all_separator if $lc_all ne ""; 1373 1374 if ($use_name_value_pairs) { 1375 $lc_all .= $category_name . "="; 1376 } 1377 1378 $lc_all .= $locale; 1379 } 1380 1381 $this_categories[0] = $LC_ALL; 1382 $this_locales[0] = $lc_all; 1383 } 1384 else { # The other times, just set each category to its locale 1385 # individually 1386 foreach my $category_name (sort keys $this_tests->%*) { 1387 push @this_categories, 1388 $map_category_name_to_number{$category_name}; 1389 push @this_locales, 1390 $this_tests->{$category_name}{locale_name}; 1391 } 1392 } 1393 1394 while (keys $this_tests->%*) { 1395 foreach my $category_name (sort keys $this_tests->%*) { 1396 my $this_category_tests = $this_tests->{$category_name}; 1397 my $test = shift 1398 $this_category_tests->{locale_tests}->@*; 1399 print STDERR __FILE__, ': ', __LINE__, ': ', Dumper $test 1400 if $debug; 1401 if (! $test) { 1402 delete $this_tests->{$category_name}; 1403 next; 1404 } 1405 1406 $test->{category_name} = $category_name; 1407 my $locale_name = $this_category_tests->{locale_name}; 1408 $test->{locale_name} = $locale_name; 1409 $test->{codeset} = 1410 $locale_name_to_object{$locale_name}{codeset}; 1411 1412 push @this_cooked_tests, $test; 1413 } 1414 } 1415 1416 push @cooked_tests, { 1417 thread => $i, 1418 categories => \@this_categories, 1419 locales => \@this_locales, 1420 tests => \@this_cooked_tests, 1421 }; 1422 } 1423 1424 my $all_tests_ref = \@cooked_tests; 1425 my $all_tests_file = tempfile(); 1426 1427 # Store the tests into a file, retrievable by the subprocess 1428 use Storable; 1429 if (! defined store($all_tests_ref, $all_tests_file)) { 1430 die "Could not save the built-up data structure"; 1431 } 1432 1433 my $category_number_to_name = Data::Dumper->Dump( 1434 [ \%map_category_number_to_name ], 1435 [ 'map_category_number_to_name']); 1436 1437 my $switches = ""; 1438 $switches = "switches => [ -DLv ]" if $debug > 2; 1439 1440 # Build up the program to run. This stresses locale thread safety. We 1441 # start a bunch of threads. Each sets the locale of each category being 1442 # tested to the value determined in the code above. Then each sleeps to a 1443 # common start time, at which point they awaken and iterate their 1444 # respective loops. Each iteration runs a set of tests and checks that 1445 # the results are as expected. This should catch any instances of other 1446 # threads interfering. Every so often, each thread shifts to instead use 1447 # the locales and tests of another thread. This catches bugs dealing with 1448 # changing the locale on the fly. 1449 # 1450 # The code above has set up things so that each thread has as disparate 1451 # results from the other threads as possible, so to more likely catch any 1452 # bleed-through. 1453 my $program = <<EOT; 1454 1455 BEGIN { \$| = 1; } 1456 my \$debug = $debug; 1457 my \$thread_count = $thread_count; 1458 my \$iterations_per_test_set = $iterations_per_test_set; 1459 my \$iterations = $iterations; 1460 my \$die_on_negative_sleep = $die_on_negative_sleep; 1461 my \$per_thread_startup = $per_thread_startup; 1462 my \$all_tests_file = $all_tests_file; 1463 my \$alarm_clock = $alarm_clock; 1464EOT 1465 1466 $program .= <<'EOT'; 1467 use threads; 1468 use strict; 1469 use warnings; 1470 use POSIX qw(locale_h); 1471 use utf8; 1472 use Time::HiRes qw(time usleep); 1473 $|=1; 1474 1475 use Data::Dumper; 1476 $Data::Dumper::Sortkeys=1; 1477 $Data::Dumper::Useqq = 1; 1478 $Data::Dumper::Deepcopy = 1; 1479 1480 # Get the tests stored for us by the setup process 1481 use Storable; 1482 my $all_tests_ref = retrieve($all_tests_file); 1483 if (! defined $all_tests_ref) { 1484 die "Could not restore the built-up data structure"; 1485 } 1486 1487 my %corrects; 1488 1489 sub output_test_failure_prefix { 1490 my ($iteration, $category_name, $test) = @_; 1491 my $tid = threads->tid(); 1492 print STDERR "\nthread ", $tid, 1493 " failed in iteration $iteration", 1494 " for locale $test->{locale_name}", 1495 " codeset='$test->{codeset}'", 1496 " $category_name", 1497 "\nop='$test->{op}'", 1498 "\nafter getting ", ($corrects{$category_name} 1499 {$test->{locale_name}} 1500 {all} // 0), 1501 " previous correct results for this category and", 1502 " locale,\nincluding ", ($corrects{$category_name} 1503 {$test->{locale_name}} 1504 {$tid} // 0), 1505 " in this thread\n"; 1506 } 1507 1508 sub output_test_result($$$) { 1509 my ($type, $result, $utf8_matches) = @_; 1510 1511 no locale; 1512 1513 print STDERR "$type"; 1514 1515 my $copy = $result; 1516 if (! $utf8_matches) { 1517 if (utf8::is_utf8($copy)) { 1518 print STDERR " (result already was in UTF-8)"; 1519 } 1520 else { 1521 utf8::upgrade($copy); 1522 print STDERR " (result wasn't in UTF-8; converted for easier", 1523 " comparison)"; 1524 } 1525 } 1526 print STDERR ":\n"; 1527 1528 use Devel::Peek; 1529 Dump $copy; 1530 } 1531 1532 sub iterate { # Run some chunk of iterations of the tests 1533 my ($tid, # Which thread 1534 $initial_iteration, # The number of the first iteration 1535 $count, # How many 1536 $tests_ref) # The tests 1537 = @_; 1538 1539 my $iteration = $initial_iteration; 1540 $count += $initial_iteration; 1541 1542 # Repeatedly ... 1543 while ($iteration < $count) { 1544 my $errors = 0; 1545 1546 use locale; 1547 1548 # ... execute the tests 1549 foreach my $test ($tests_ref->@*) { 1550 1551 # We know what we are expecting 1552 my $expected = $test->{expected}; 1553 1554 my $category_name = $test->{category_name}; 1555 1556 # And do the test. 1557 my $got = eval $test->{op}; 1558 1559 if (! defined $got) { 1560 output_test_failure_prefix($iteration, 1561 $category_name, 1562 $test); 1563 output_test_result("expected", $expected, 1564 1 # utf8ness matches, since only one 1565 ); 1566 $errors++; 1567 next; 1568 } 1569 1570 my $utf8ness_matches = ( utf8::is_utf8($got) 1571 == utf8::is_utf8($expected)); 1572 1573 my $matched = ($got eq $expected); 1574 if ($matched) { 1575 if ($utf8ness_matches) { 1576 no warnings 'uninitialized'; 1577 $corrects{$category_name}{$test->{locale_name}}{all}++; 1578 $corrects{$category_name}{$test->{locale_name}}{$tid}++; 1579 next; # Complete success! 1580 } 1581 } 1582 1583 $errors++; 1584 output_test_failure_prefix($iteration, $category_name, $test); 1585 1586 if ($matched) { 1587 print STDERR "Only difference is UTF8ness of results\n"; 1588 } 1589 output_test_result("expected", $expected, $utf8ness_matches); 1590 output_test_result("got", $got, $utf8ness_matches); 1591 1592 } # Loop to do the remaining tests for this iteration 1593 1594 return 0 if $errors; 1595 1596 $iteration++; 1597 1598 # A way to set a gdb break point pp_study 1599 #study if $iteration % 10 == 0; 1600 1601 threads->yield(); 1602 } 1603 1604 return 1; 1605 } # End of iterate() definition 1606 1607EOT 1608 1609 $program .= "my $category_number_to_name\n"; 1610 1611 $program .= <<'EOT'; 1612 sub setlocales { 1613 # Set each category to the appropriate locale for this test set 1614 my ($categories, $locales) = @_; 1615 for my $i (0 .. $categories->@* - 1) { 1616 if (! setlocale($categories->[$i], $locales->[$i])) { 1617 my $category_name = 1618 $map_category_number_to_name->{$categories->[$i]}; 1619 print STDERR "\nthread ", threads->tid(), 1620 " setlocale($category_name ($categories->[$i]),", 1621 " $locales->[$i]) failed\n"; 1622 return 0; 1623 } 1624 } 1625 1626 return 1; 1627 } 1628 1629 my $startup_insurance = 1; 1630 my $future = $startup_insurance + $thread_count * $per_thread_startup; 1631 my $starting_time = time() + $future; 1632 1633 sub wait_until_time { 1634 1635 # Sleep until the time when all the threads are due to wake up, so 1636 # they run as simultaneously as we can make it. 1637 my $sleep_time = ($starting_time - time()); 1638 #printf STDERR "thread %d started, sleeping %g sec\n", 1639 # threads->tid, $sleep_time; 1640 if ($sleep_time < 0 && $die_on_negative_sleep) { 1641 # What the start time should have been 1642 my $a_better_future = $future - $sleep_time; 1643 1644 my $better_per_thread = 1645 ($a_better_future - $startup_insurance) / $thread_count; 1646 printf STDERR "$per_thread_startup would need to be %g", 1647 " for thread %d to have started\nin sync with", 1648 " the other threads\n", 1649 $better_per_thread, threads->tid; 1650 die "Thread started too late"; 1651 } 1652 else { 1653 usleep($sleep_time * 1_000_000) if $sleep_time > 0; 1654 } 1655 } 1656 1657 # Create all the subthreads: 1..n 1658 my @threads = map +threads->create(sub { 1659 $SIG{'KILL'} = sub { threads->exit(); }; 1660 1661 my $thread = shift; 1662 1663 # Start out with the set of tests whose number is the same as the 1664 # thread number 1665 my $test_set = $thread; 1666 1667 wait_until_time(); 1668 1669 # Loop through all the iterations for this thread 1670 my $this_iteration_start = 1; 1671 do { 1672 # Set up each category with its locale; 1673 my $this_ref = $all_tests_ref->[$test_set]; 1674 return 0 unless setlocales($this_ref->{categories}, 1675 $this_ref->{locales}); 1676 # Then run one batch of iterations 1677 my $result = iterate($thread, 1678 $this_iteration_start, 1679 $iterations_per_test_set, 1680 $this_ref->{tests}); 1681 return 0 if $result == 0; # Quit if failed 1682 1683 # Next iteration will shift to use a different set of locales for 1684 # each category 1685 $test_set++; 1686 $test_set = 0 if $test_set >= $thread_count; 1687 $this_iteration_start += $iterations_per_test_set; 1688 } while ($this_iteration_start <= $iterations); 1689 1690 return 1; # Success 1691 1692 }, $_), (1..$thread_count - 1); # For each non-0 thread 1693 1694 # Here is thread 0. We do a smaller chunk of iterations in it; then 1695 # join whatever threads have finished so far, then do another chunk. 1696 # This tests for bugs that arise as a result of joining. 1697 1698 my %thread0_corrects = (); 1699 my $this_iteration_start = 1; 1700 my $result = 1; # So far, everything is ok 1701 my $test_set = -1; # Start with 0th test set 1702 1703 wait_until_time(); 1704 alarm($alarm_clock); # Guard against hangs 1705 1706 do { 1707 # Next time, we'll use the next test set 1708 $test_set++; 1709 $test_set = 0 if $test_set >= $thread_count; 1710 1711 my $this_ref = $all_tests_ref->[$test_set]; 1712 1713 # set the locales for this test set. Do this even if we 1714 # are going to bail, so that it will be set correctly for the final 1715 # batch after the loop. 1716 $result &= setlocales($this_ref->{categories}, $this_ref->{locales}); 1717 1718 if ($debug > 1) { 1719 my @joinable = threads->list(threads::joinable); 1720 if (@joinable) { 1721 print STDERR "In thread 0, before iteration ", 1722 $this_iteration_start, 1723 " these threads are done: ", 1724 join (", ", map { $_->tid() } @joinable), 1725 "\n"; 1726 } 1727 } 1728 1729 # Join anything already finished. 1730 for my $thread (threads->list(threads::joinable)) { 1731 my $thread_result = $thread->join; 1732 if ($debug > 1) { 1733 print STDERR "In thread 0, before iteration ", 1734 $this_iteration_start, 1735 " joining thread ", $thread->tid(), 1736 "; result=", ((defined $thread_result) 1737 ? $thread_result 1738 : "undef"), 1739 "\n"; 1740 } 1741 1742 # If the thread failed badly, stop testing anything else. 1743 if (! defined $thread_result) { 1744 $_->kill('KILL')->detach() for threads->list(); 1745 print 0; 1746 exit; 1747 } 1748 1749 # Update the status 1750 $result &= $thread_result; 1751 } 1752 1753 # Do a chunk of iterations on this thread 0. 1754 $result &= iterate(0, 1755 $this_iteration_start, 1756 $iterations_per_test_set, 1757 $this_ref->{tests}, 1758 \%thread0_corrects); 1759 $this_iteration_start += $iterations_per_test_set; 1760 1761 # And repeat as long as there are other tests 1762 } while (threads->list(threads::all)); 1763 1764 print $result; 1765EOT 1766 1767 # Finally ready to run the test. 1768 fresh_perl_is($program, 1769 1, 1770 { eval $switches }, 1771 "Verify there were no failures with simultaneous running threads" 1772 ); 1773} 1774