xref: /openbsd-src/gnu/usr.bin/perl/lib/locale_threads.t (revision 5486feefcc8cb79b19e014ab332cc5dfd05b3b33)
1use strict;
2use warnings;
3
4# This file tests interactions with locale and threads
5
6BEGIN {
7    $| = 1;
8
9    chdir 't' if -d 't';
10    require './test.pl';
11    set_up_inc('../lib');
12
13    skip_all_without_config('useithreads');
14    skip_all("Fails on threaded builds on OpenBSD")
15        if ($^O =~ m/^(openbsd)$/);
16
17    require './loc_tools.pl';
18
19    eval { require POSIX; POSIX->import(qw(errno_h locale_h unistd_h )) };
20    if ($@) {
21        skip_all("could not load the POSIX module"); # running minitest?
22    }
23}
24
25use Time::HiRes qw(time usleep);
26
27use Devel::Peek;
28$Devel::Peek::pv_limit = 0; $Devel::Peek::pv_limit = 0;
29use Data::Dumper;
30$Data::Dumper::Sortkeys=1;
31$Data::Dumper::Useqq = 1;
32$Data::Dumper::Deepcopy = 1;
33
34my $debug = 0;
35
36my %map_category_name_to_number;
37my %map_category_number_to_name;
38my @valid_categories = valid_locale_categories();
39foreach my $category (@valid_categories) {
40    my $cat_num = eval "&POSIX::$category";
41    die "Can't determine ${category}'s number: $@" if $@;
42
43    $map_category_name_to_number{$category} = $cat_num;
44    $map_category_number_to_name{$cat_num} = $category;
45}
46
47my $LC_ALL;
48my $LC_ALL_string;
49if (defined $map_category_name_to_number{LC_ALL}) {
50    $LC_ALL_string = 'LC_ALL';
51    $LC_ALL = $map_category_name_to_number{LC_ALL};
52}
53elsif (defined $map_category_name_to_number{LC_CTYPE}) {
54    $LC_ALL_string = 'LC_CTYPE';
55    $LC_ALL = $map_category_name_to_number{LC_CTYPE};
56}
57else {
58    skip_all("No LC_ALL nor LC_CTYPE");
59}
60
61# reset the locale environment
62delete local @ENV{'LANGUAGE', 'LANG', keys %map_category_name_to_number};
63
64my @locales = find_locales($LC_ALL);
65skip_all("Couldn't find any locales") if @locales == 0;
66
67plan(2);
68
69my ($utf8_locales_ref, $non_utf8_locales_ref)
70                                    = classify_locales_wrt_utf8ness(\@locales);
71
72my $official_ascii_name = 'ansi_x341968';
73
74my %lang_code_to_script = (     # ISO 639.2, but without the many codes that
75                                # are for latin (but the few western European
76                                # ones that are latin1 are included)
77                            am          => 'amharic',
78                            amh         => 'amharic',
79                            amharic     => 'amharic',
80                            ar          => 'arabic',
81                            be          => 'cyrillic',
82                            bel         => 'cyrillic',
83                            ben         => 'bengali',
84                            bn          => 'bengali',
85                            bg          => 'cyrillic',
86                            bul         => 'cyrillic',
87                            bulgarski   => 'cyrillic',
88                            bulgarian   => 'cyrillic',
89                            c           => $official_ascii_name,
90                            cnr         => 'cyrillic',
91                            de          => 'latin_1',
92                            deu         => 'latin_1',
93                            deutsch     => 'latin_1',
94                            german      => 'latin_1',
95                            div         => 'thaana',
96                            dv          => 'thaana',
97                            dzo         => 'tibetan',
98                            dz          => 'tibetan',
99                            el          => 'greek',
100                            ell         => 'greek',
101                            ellada      => 'greek',
102                            en          => $official_ascii_name,
103                            eng         => $official_ascii_name,
104                            american    => $official_ascii_name,
105                            british     => $official_ascii_name,
106                            es          => 'latin_1',
107                            fa          => 'arabic',
108                            fas         => 'arabic',
109                            flamish     => 'latin_1',
110                            fra         => 'latin_1',
111                            fr          => 'latin_1',
112                            heb         => 'hebrew',
113                            he          => 'hebrew',
114                            hi          => 'hindi',
115                            hin         => 'hindi',
116                            hy          => 'armenian',
117                            hye         => 'armenian',
118                            ita         => 'latin_1',
119                            it          => 'latin_1',
120                            ja          => 'katakana',
121                            jpn         => 'katakana',
122                            nihongo     => 'katakana',
123                            japanese    => 'katakana',
124                            ka          => 'georgian',
125                            kat         => 'georgian',
126                            kaz         => 'cyrillic',
127                            khm         => 'khmer',
128                            kir         => 'cyrillic',
129                            kk          => 'cyrillic',
130                            km          => 'khmer',
131                            ko          => 'hangul',
132                            kor         => 'hangul',
133                            korean      => 'hangul',
134                            ku          => 'arabic',
135                            kur         => 'arabic',
136                            ky          => 'cyrillic',
137                            latin1      => 'latin_1',
138                            lao         => 'lao',
139                            lo          => 'lao',
140                            mk          => 'cyrillic',
141                            mkd         => 'cyrillic',
142                            macedonian  => 'cyrillic',
143                            mn          => 'cyrillic',
144                            mon         => 'cyrillic',
145                            mya         => 'myanmar',
146                            my          => 'myanmar',
147                            ne          => 'devanagari',
148                            nep         => 'devanagari',
149                            nld         => 'latin_1',
150                            nl          => 'latin_1',
151                            nederlands  => 'latin_1',
152                            dutch       => 'latin_1',
153                            por         => 'latin_1',
154                            posix       => $official_ascii_name,
155                            ps          => 'arabic',
156                            pt          => 'latin_1',
157                            pus         => 'arabic',
158                            ru          => 'cyrillic',
159                            russki      => 'cyrillic',
160                            russian     => 'cyrillic',
161                            rus         => 'cyrillic',
162                            sin         => 'sinhala',
163                            si          => 'sinhala',
164                            so          => 'arabic',
165                            som         => 'arabic',
166                            spa         => 'latin_1',
167                            sr          => 'cyrillic',
168                            srp         => 'cyrillic',
169                            tam         => 'tamil',
170                            ta          => 'tamil',
171                            tg          => 'cyrillic',
172                            tgk         => 'cyrillic',
173                            tha         => 'thai',
174                            th          => 'thai',
175                            thai        => 'thai',
176                            ti          => 'ethiopian',
177                            tir         => 'ethiopian',
178                            uk          => 'cyrillic',
179                            ukr         => 'cyrillic',
180                            ur          => 'arabic',
181                            urd         => 'arabic',
182                            zgh         => 'arabic',
183                            zh          => 'chinese',
184                            zho         => 'chinese',
185                        );
186my %codeset_to_script = (
187                            88591  => 'latin_1',
188                            88592  => 'latin_2',
189                            88593  => 'latin_3',
190                            88594  => 'latin_4',
191                            88595  => 'cyrillic',
192                            88596  => 'arabic',
193                            88597  => 'greek',
194                            88598  => 'hebrew',
195                            88599  => 'latin_5',
196                            885910 => 'latin_6',
197                            885911 => 'thai',
198                            885912 => 'devanagari',
199                            885913 => 'latin_7',
200                            885914 => 'latin_8',
201                            885915 => 'latin_9',
202                            885916 => 'latin_10',
203                            cp1251 => 'cyrillic',
204                            cp1255 => 'hebrew',
205                      );
206
207my %script_priorities = (       # In trying to make the results as distinct as
208                                # possible, make the ones closest to Unicode,
209                                # and ASCII lowest priority
210                            $official_ascii_name => 15,
211                            latin_1 => 14,
212                            latin_9 => 13,
213                            latin_2 => 12,
214                            latin_4 => 12,
215                            latin_5 => 12,
216                            latin_6 => 12,
217                            latin_7 => 12,
218                            latin_8 => 12,
219                            latin_10 => 12,
220                            latin   => 11,  # Unknown latin version
221                        );
222
223my %script_instances;   # Keys are scripts, values are how many locales use
224                        # this script.
225
226sub analyze_locale_name($) {
227
228    # Takes the input name of a locale and creates (and returns) a hash
229    # containing information about that locale
230
231    my %ret;
232    my $input_locale_name = shift;
233
234    my $old_locale = setlocale(LC_CTYPE);
235
236    # Often a locale has multiple aliases, and the base one is returned
237    # by setlocale() when called with an alias.  The base is more likely to
238    # meet the XPG standards than the alias.
239    my $new_locale = setlocale(LC_CTYPE, $input_locale_name);
240    if (! $new_locale) {
241        diag "Unexpectedly can't setlocale(LC_CTYPE, $new_locale);"
242           . " \$!=$!, \$^E=$^E";
243        return;
244    }
245
246    $ret{locale_name} = $new_locale;
247
248    # XPG standard for locale names:
249    #   language[_territory[.codeset]][@modifier]
250    # But, there are instances which violate this, where there is a codeset
251    # without a territory, so instead match:
252    #   language[_territory][.codeset][@modifier]
253    $ret{locale_name} =~ / ^
254                                      ( .+? )          # language
255                              (?:  _  ( .+? ) )?       # territory
256                              (?: \.  ( .+? ) )?       # codeset
257                              (?: \@  ( .+  ) )?       # modifier
258                            $
259                         /x;
260
261    $ret{language}  = $1 // "";
262    $ret{territory} = $2 // "";
263    $ret{codeset}   = $3 // "";
264    $ret{modifier}  = $4 // "";
265
266    # Normalize all but 'territory' to lowercase
267    foreach my $key (qw(language codeset modifier)) {
268        $ret{$key} = lc $ret{$key};
269    }
270
271    # Often, the codeset is omitted from the locale name, but it is still
272    # discoverable (via langinfo() ) for the current locale on many platforms.
273    # We already have switched locales
274    use I18N::Langinfo qw(langinfo CODESET);
275    my $langinfo_codeset = lc langinfo(CODESET);
276
277    # Now can switch back to the locale current on entry to this sub
278    if (! setlocale(LC_CTYPE, $old_locale)) {
279        die "Unexpectedly can't restore locale to $old_locale from"
280          . " $new_locale; \$!=$!, \$^E=$^E";
281    }
282
283    # Normalize the codesets
284    foreach my $codeset_ref (\$langinfo_codeset, \$ret{codeset}) {
285        $$codeset_ref =~ s/\W//g;
286        $$codeset_ref =~ s/iso8859/8859/g;
287        $$codeset_ref =~ s/\b65001\b/utf8/;     # Windows synonym
288        $$codeset_ref =~ s/\b646\b/$official_ascii_name/;
289        $$codeset_ref =~ s/\busascii\b/$official_ascii_name/;
290    }
291
292    # The langinfo codeset, if found, is considered more reliable than the one
293    # in the name.  (This is because libc looks into the actual data
294    # definition.)  So use it unconditionally when found.  But note any
295    # discrepancy as an aid for improving this test.
296    if ($langinfo_codeset) {
297        if ($ret{codeset} && $ret{codeset} ne $langinfo_codeset) {
298            diag "In $ret{locale_name}, codeset from langinfo"
299               . " ($langinfo_codeset) doesn't match codeset in"
300               . " locale_name ($ret{codeset})";
301        }
302        $ret{codeset} = $langinfo_codeset;
303    }
304
305    $ret{is_utf8} = 0 + ($ret{codeset} eq 'utf8');
306
307    # If the '@' modifier is a known script, use it as the script.
308    if (    $ret{modifier}
309        and grep { $_ eq $ret{modifier} } values %lang_code_to_script)
310    {
311        $ret{script} = $ret{nominal_script} = $ret{modifier};
312        $ret{modifier} = "";
313    }
314    elsif ($ret{codeset} && ! $ret{is_utf8}) {
315
316        # The codeset determines the script being used, except if we don't
317        # have the codeset, or it is UTF-8 (which covers a multitude of
318        # scripts).
319        #
320        # We have hard-coded the scripts corresponding to a few of these
321        # non-UTF-8 codesets.  See if this is one of them.
322        $ret{script} = $codeset_to_script{$ret{codeset}};
323        if ($ret{script}) {
324
325            # For these, the script is likely a combination of ASCII (from
326            # 0-127), and the script from (128-255).  Reflect that in the name
327            # used (for distinguishing below)
328            $ret{script} .= '_' . $official_ascii_name;
329        }
330        elsif ($ret{codeset} =~ /^koi/) {   # Another common set.
331            $ret{script} = "cyrillic_${official_ascii_name}";
332        }
333        else {  # Here the codeset name is unknown to us.  Just assume it
334                # means a whole new script.  Add the language at the end of
335                # the name to further make it distinct
336            $ret{script} = $ret{codeset};
337            $ret{script} .= "_$ret{language}"
338                                    if $ret{codeset} !~ /$official_ascii_name/;
339        }
340    }
341    else {  # Here, the codeset is unknown or is UTF-8.
342
343        # In these cases look up the script based on the language.  The table
344        # is meant to be pretty complete, but omits the many scripts that are
345        # ASCII or Latin1.  And it omits the fullnames of languages whose
346        # scripts are themselves.  The grep below catches those.  Defaulting
347        # to Latin means that a non-standard language name is considered to be
348        # latin -- maybe not the best outcome but what else is better?
349        $ret{script} = $lang_code_to_script{$ret{language}};
350        if (! $ret{script}) {
351            $ret{script} = (grep { $ret{language} eq $_ }
352                                                    values %lang_code_to_script)
353                            ? $ret{language}
354                            : 'latin';
355        }
356    }
357
358    # If we have @euro, and the script is ASCII or latin or latin1, change it
359    # into latin9, which is closer to what is going on.  latin9 has a few
360    # other differences from latin1, but it's not worth creating a whole new
361    # script type that differs only in the currency symbol.
362    if (  ($ret{modifier} && $ret{modifier} eq 'euro')
363        && $ret{script} =~ / ^ ($official_ascii_name | latin (_1)? ) $ /x)
364    {
365        $ret{script} = 'latin_9';
366    }
367
368    #  Look up the priority of this script.  All the non-listed ones have
369    #  highest (0 or 1) priority.  We arbitrarily make the ones higher
370    #  priority (0) that aren't known to be half-ascii, simply because they
371    #  might be entirely different than most locales.
372    $ret{priority} = $script_priorities{$ret{script}};
373    if (! $ret{priority}) {
374        $ret{priority} = (   $ret{script} ne $official_ascii_name
375                          && $ret{script} =~ $official_ascii_name)
376                         ? 0
377                         : 1;
378    }
379
380    # Script names have been set up so that anything after an underscore is a
381    # modifier of the main script.  We keep a counter of which occurence of
382    # this script this is.  This is used along with the priority to order the
383    # locales so that the characters are as varied as possible.
384    my $script_root = ($ret{script} =~ s/_.*//r) . "_$ret{is_utf8}";
385    $ret{script_instance} = $script_instances{$script_root}++;
386
387    return \%ret;
388}
389
390# Prioritize locales that are most unlike the standard C/Latin1-ish ones.
391# This is to minimize getting passes for tests on a category merely because
392# they share many of the same characteristics as the locale of another
393# category simultaneously in effect.
394sub sort_locales ()
395{
396    my $cmp =  $a->{script_instance} <=> $b->{script_instance};
397    return $cmp if $cmp;
398
399    $cmp =  $a->{priority} <=> $b->{priority};
400    return $cmp if $cmp;
401
402    $cmp =  $a->{script} cmp $b->{script};
403    return $cmp if $cmp;
404
405    $cmp =  $a->{modifier} cmp $b->{modifier};
406    return $cmp if $cmp;
407
408    $cmp =  $a->{codeset} cmp $b->{codeset};
409    return $cmp if $cmp;
410
411    $cmp =  $a->{territory} cmp $b->{territory};
412    return $cmp if $cmp;
413
414    return lc $a cmp lc $b;
415}
416
417# Find out extra info about each locale
418my @cleaned_up_locales;
419for my $locale (@locales) {
420    my $locale_struct = analyze_locale_name($locale);
421
422    next unless $locale_struct;
423
424    my $name = $locale_struct->{locale_name};
425    next if grep { $name eq $_->{locale_name} } @cleaned_up_locales;
426
427    push @cleaned_up_locales, $locale_struct;
428}
429
430@locales = @cleaned_up_locales;
431
432# Without a proper codeset, we can't really know how to test.  This should
433# only happen on platforms that lack the ability to determine the codeset.
434@locales = grep { $_->{codeset} ne "" } @locales;
435
436# Sort into priority order.
437@locales = sort sort_locales @locales;
438
439# First test
440SKIP: { # perl #127708
441    my $locale = $locales[0];
442    skip("No valid locale to test with", 1) if $locale->{codeset} eq
443                                                          $official_ascii_name;
444    local $ENV{LC_MESSAGES} = $locale->{locale_name};
445
446    # We're going to try with all possible error numbers on this platform
447    my $error_count = keys(%!) + 1;
448
449    print fresh_perl("
450        use threads;
451        use strict;
452        use warnings;
453        use Time::HiRes qw(usleep);
454
455        my \$errnum = 1;
456
457        my \@threads = map +threads->create(sub {
458            usleep 0.1;
459            'threads'->yield();
460
461            for (1..5_000) {
462                \$errnum = (\$errnum + 1) % $error_count;
463                \$! = \$errnum;
464
465                # no-op to trigger stringification
466                next if \"\$!\" eq \"\";
467            }
468        }), (0..1);
469        \$_->join for splice \@threads;",
470    {}
471    );
472
473    pass("Didn't segfault");
474}
475
476# Second test setup
477my %locale_name_to_object;
478for my $locale (@locales) {
479    $locale_name_to_object{$locale->{locale_name}} = $locale;
480}
481
482sub sort_by_hashed_locale {
483    local $a = $locale_name_to_object{$a};
484    local $b = $locale_name_to_object{$b};
485
486    return sort_locales;
487}
488
489sub min {
490    my ($a, $b) = @_;
491    return $a if $a <= $b;
492    return $b;
493}
494
495# Smokes have shown this to be about the maximum numbers some platforms can
496# handle.  khw has tried 500 threads/1000 iterations on Linux
497my $thread_count = 15;
498my $iterations = 100;
499
500my $alarm_clock = (1 * 10 * 60);    # A long time, just to prevent hanging
501
502# Chunk the iterations, so that every so often the test comes up for air.
503my $iterations_per_test_set = min(30, int($iterations / 5));
504$iterations_per_test_set = 1 if $iterations_per_test_set == 0;
505
506# Sometimes the test calls setlocale() for each individual locale category.
507# But every this many threads, it will be called just once, using LC_ALL to
508# specify the categories.  This way both setting individual categories and
509# LC_ALL get tested.  But skip this nicety on platforms where we are restricted from
510# using all the available categories, as it would make the code more complex
511# for not that much gain.
512my @platform_categories = platform_locale_categories();
513my $lc_all_frequency =  scalar @platform_categories == scalar @valid_categories
514                        ? 3
515                        : -1;
516
517# To avoid things getting too big; skip tests whose results are larger than
518# this many characters.
519my $max_result_length = 10000;
520
521# Estimate as to how long in seconds to allow a thread to be ready to roll
522# after creation, so as to try to get all the threads to start as
523# simultaneously as possible
524my $per_thread_startup = .18;
525
526# For use in experimentally tuning the above value
527my $die_on_negative_sleep = 1;
528
529# We don't need to test every possible errno, but you could change this to do
530# so by setting it to negative
531my $max_message_catalog_entries = 10;
532
533# December 18, 1987
534my $strftime_args = "'%c', 0, 0, , 12, 18, 11, 87";
535
536my %distincts;  # The distinct 'operation => result' cases
537my %op_counts;  # So we can bail early if more test cases than threads
538my $separator = '____';     # The operation and result are often melded into a
539                            # string separated by this.
540
541sub pack_op_result($$) {
542    my ($op, $result) = @_;
543    return $op . $separator
544         . (0 + utf8::is_utf8($op)) . $separator
545         . $result . $separator
546         . (0 + utf8::is_utf8($result));
547}
548
549sub fixup_utf8ness($$) {
550    my ($operand, $utf8ness) = @_;
551
552    # Make sure $operand is encoded properly
553
554    if ($utf8ness + 0 != 0 + utf8::is_utf8($$operand)) {
555        if ($utf8ness) {
556            utf8::upgrade($$operand);
557        }
558        else {
559            utf8::downgrade($$operand);
560        }
561    }
562}
563
564sub unpack_op_result($) {
565    my $op_result = shift;
566
567    my ($op, $op_utf8ness, $result, $result_utf8ness) =
568                                            split $separator, $op_result;
569    fixup_utf8ness(\$op, $op_utf8ness);
570    fixup_utf8ness(\$result, $result_utf8ness);
571
572    return ($op, $result);
573}
574
575sub add_trials($$;$)
576{
577    # Add a test case for category $1.
578    # $2 is the test case operation to perform
579    # $3 is a constraint, optional.
580
581    my $category_name = shift;
582    my $input_op = shift;                   # The eval string to perform
583    my $locale_constraint = shift // "";    # If defined, the test will be
584                                            # created only for locales that
585                                            # match this
586  LOCALE:
587    foreach my $locale (@locales) {
588        my $locale_name = $locale->{locale_name};
589        my $op = $input_op;
590
591        # All categories should be set to the same locale to make sure
592        # this test gets the valid results.
593        next unless setlocale($LC_ALL, $locale_name);
594
595        # As of NetBSD 10, it doesn't implement LC_COLLATE, and setting that
596        # category to anything but C or POSIX fails.  But setting LC_ALL to
597        # other locales (as we just did) returns success, while leaving
598        # LC_COLLATE untouched.  Therefore, also set the category individually
599        # to catch such things.  This problem may not be confined to NetBSD.
600        # This also works if the platform lacks LC_ALL.  We at least set
601        # LC_CTYPE (via '$LC_ALL' above) besides the category.
602        next unless setlocale($map_category_name_to_number{$category_name},
603                              $locale_name);
604
605        # Use a placeholder if this test requires a particular constraint,
606        # which isn't met in this case.
607        if ($locale_constraint) {
608            if ($locale_constraint eq 'utf8_only') {
609                next if ! $locale->{is_utf8};
610            }
611            elsif ($locale_constraint eq 'a<b') {
612                my $result = eval "use locale; 'a' lt 'B'";
613                die "$category_name: '$op (a lt B)': $@" if $@;
614                next unless $result;
615            }
616            else {
617                die "Only accepted locale constraints are 'utf8_only' and 'a<b'"
618            }
619        }
620
621        # Calculate what the expected value of the test should be.  We're
622        # doing this here in the main thread and with all the locales set to
623        # be the same thing.  The test will be that we should get this value
624        # under stress, with each thread using different locales for each
625        # category, and multiple threads simultaneously executing with
626        # disparate locales
627        my $eval_string = ($op) ? "use locale; $op;" : "";
628        my $result = eval $eval_string;
629        die "$category_name: '$op': $@" if $@;
630        if (! defined $result) {
631            if ($debug) {
632                print STDERR __FILE__, ": ", __LINE__,
633                             ": Undefined result for $locale_name",
634                             " $category_name: '$op'\n";
635            }
636            next;
637        }
638        elsif ($debug > 1) {
639            print STDERR "\n", __FILE__, ": ", __LINE__, ": $category_name:",
640                         " $locale_name: Op = ", Dumper($op), "; Returned ";
641            Dump $result;
642        }
643        if (length $result > $max_result_length) {
644            diag("For $locale_name, '$op', result is too long; skipped");
645            next;
646        }
647
648        # It seems best to not include tests with mojibake results, which here
649        # is checked for by two question marks in a row.  (strxfrm is excluded
650        # from this restriction, as the result is really binary, so '??' could
651        # and does come up, not meaning mojibake.)  A concrete example of this
652        # is in Mingw the locale Yi_China.1252.  CP 1252 is for a Latin
653        # script; just about anything from an East Asian script is bound to
654        # fail.  It makes no sense to have this locale, but it exists.
655        if ($eval_string !~ /xfrm/ && $result =~ /\?\?/) {
656            if ($debug)  {
657                print STDERR __FILE__, ": ", __LINE__,
658                  " For $locale_name, op=$op, result has mojibake: $result\n";
659            }
660
661            next;
662        }
663
664        # Some systems are buggy in that setlocale() gives non-deterministic
665        # results for some locales.   Here we try to exclude those from our
666        # test by trying the setlocale this many times to see if it varies:
667        my $deterministic_trial_count = 5;
668
669        # To do this, we set the locale to an 'alternate' locale between
670        # trials.  This defeats any attempt by the implementation to skip the
671        # setlocale if it is already in said locale.
672        my $alternate;
673        my @alternate;
674
675        # If possible, the alternate is chosen to be of the opposite UTF8ness,
676        # so as to reset internal states about that.
677        if (! $utf8_locales_ref || ! $utf8_locales_ref->@*) {
678
679            # If no UTF-8 locales, must choose one that is non-UTF-8.
680            @alternate = grep { $_ ne $locale_name } $non_utf8_locales_ref->@*;
681        }
682        elsif (! $non_utf8_locales_ref || ! $non_utf8_locales_ref->@*) {
683
684            # If no non-UTF-8 locales, must choose one that is UTF-8.
685            @alternate = grep { $_ ne $locale_name } $utf8_locales_ref->@*;
686        }
687        elsif (grep { $_ eq $locale_name } $utf8_locales_ref->@*) {
688            @alternate = $non_utf8_locales_ref->@*;
689        }
690        else {
691            @alternate = $utf8_locales_ref->@*;
692        }
693
694        # Now do the trials.  For each, we choose the next alternate on the
695        # list, rotating the list so the following iteration will choose a
696        # different alternate.
697        for my $i (1 .. $deterministic_trial_count - 1) {
698            my $other = shift @alternate;
699            push @alternate, $other;
700
701            # Run the test on the alternate locale
702            if (! setlocale($LC_ALL, $other)) {
703                if (   $LC_ALL_string eq 'LC_ALL'
704                    || ! setlocale($map_category_name_to_number{$category_name},
705                                   $other))
706                {
707                    die "Unexpectedly can't set locale to $other:"
708                      . " \$!=$!, \$^E=$^E";
709                }
710            }
711
712            eval $eval_string;
713
714            # Then run it on the one we are hoping to test
715            if (! setlocale($LC_ALL, $locale_name)) {
716                if (   $LC_ALL_string eq 'LC_ALL'
717                    || ! setlocale($map_category_name_to_number{$category_name},
718                                   $locale_name))
719                {
720                    die "Unexpectedly can't set locale to $locale_name from "
721                      . setlocale($LC_ALL)
722                      . "; \$!=$!, \$^E=$^E";
723                }
724            }
725
726            my $got = eval $eval_string;
727            next if $got eq $result
728                 && utf8::is_utf8($got) == utf8::is_utf8($result);
729
730            # If the result varied from the expected value, this is a
731            # non-deterministic locale, so, don't test it.
732            diag("For '$eval_string',\nresults in iteration $i differed from"
733               . " the original\ngot");
734            Dump($got);
735            diag("expected");
736            Dump($result);
737            next LOCALE;
738        }
739
740        # Here, the setlocale for this locale appears deterministic.  Use it.
741        my $op_result = pack_op_result($op, $result);
742        push $distincts{$category_name}{$op_result}{locales}->@*, $locale_name;
743        # No point in looking beyond this if we already have all the tests we
744        # need.  Note this assumes that the same op isn't used in two
745        # categories.
746        if (defined $op_counts{$op} && $op_counts{$op} >= $thread_count)
747        {
748            last;
749        }
750    }
751}
752
753use Config;
754
755# Figure out from config how to represent disparate LC_ALL
756my @valid_category_numbers = sort { $a <=> $b }
757                    map { $map_category_name_to_number{$_} } @valid_categories;
758
759my $use_name_value_pairs = defined $Config{d_perl_lc_all_uses_name_value_pairs};
760my $lc_all_separator = ($use_name_value_pairs)
761                       ? ";"
762                       : $Config{perl_lc_all_separator} =~ s/"//gr;
763my @position_to_category_number;
764if (! $use_name_value_pairs) {
765    my $positions = $Config{perl_lc_all_category_positions_init} =~ s/[{}]//gr;
766    $positions =~ s/,//g;
767    $positions =~ s/^ +//;
768    $positions =~ s/ +$//;
769    @position_to_category_number = split / \s+ /x, $positions
770}
771
772sub get_next_category() {
773    use feature 'state';
774    state $index;
775
776    # Called to rotate all the legal locale categories
777
778    my $which = ($use_name_value_pairs)
779                ? \@valid_category_numbers
780                : \@position_to_category_number;
781
782    $index = -1 unless defined $index;
783    $index++;
784
785    if (! defined $which->[$index]) {
786        undef $index;
787        return;
788    }
789
790    my $category_number = $which->[$index];
791    return $category_number if $category_number != $LC_ALL;
792
793    # If this was LC_ALL, the next one won't be
794    return &get_next_category();
795}
796
797SKIP: {
798    skip("Unsafe locale threads", 1) unless ${^SAFE_LOCALES};
799
800    # The second test is several threads nearly simulataneously executing
801    # locale-sensitive operations with the categories set to disparate
802    # locales.  This catches cases where the results of a given category is
803    # related to what the locale is of another category.  (As an example, this
804    # test showed that some platforms require LC_CTYPE to be the same as
805    # LC_COLLATION, and/or LC_MESSAGES for proper results, so that Perl had to
806    # change to bring these into congruence under the hood).  And it also
807    # catches where there is interference between multiple threads.
808    #
809    # This test tries to exercise every underlying locale-dependent operation
810    # available in Perl.  It doesn't test every use of the operation, but
811    # includes some Perl construct that uses each.  For example, it tests lc
812    # but not lcfirst.  That would be redundant for this test; it wants to
813    # know if lowercasing works under threads and locales.  But if the
814    # implementations were disjoint at the time this test was written, it
815    # would try each implementation.  So, various things in the POSIX module
816    # have separate tests from the ones in core.
817    #
818    # For each such underlying locale-dependent operation, a Perl-visible
819    # construct is chosen that uses it.  And a typical input or set of inputs
820    # is passed to that and the results are noted for every available locale
821    # on the platform.  Many locales will have identical results, so the
822    # duplicates are stored separately.
823    #
824    # There will be N simultaneous threads.  Each thread is configured to set
825    # a locale for each category, to run operations whose results depend on
826    # that locale, then check that the result matches the expected value, and
827    # to immediately repeat some largish number of iterations.  The goal is to
828    # see if the locales on each thread are truly independent of those on the
829    # other threads.
830    #
831    # To that end, the locales are chosen so that the results differ from
832    # every other locale.  Otherwise, the thread results wouldn't be truly
833    # independent.  But if there are more threads than there are distinct
834    # results, duplicates are used to fill up what would otherwise be empty
835    # slots.  That is the best we can do on those platforms.
836    #
837    # Having lots of locales to continually switch between stresses things so
838    # as to find potential segfaults where locale changing isn't really thread
839    # safe.
840
841    # There is a bug in older Windows runtimes in which locales in CP1252 and
842    # similar code pages whose names aren't entirely ASCII aren't recognized
843    # by later setlocales.  Some names that are all ASCII are synonyms for
844    # such names.  Weed those out by doing a setlocale of the original name,
845    # and then a setlocale of the resulting one.  Discard locales which have
846    # any unacceptable name
847    if (${^O} eq "MSWin32" && $Config{'libc'} !~ /ucrt/) {
848        @locales = grep {
849            my $locale_name = $_->{locale_name};
850            my $underlying_name = setlocale(&LC_CTYPE, $locale_name);
851
852            # Defeat any attempt to skip the setlocale if the same as current,
853            # by switching to a locale very unlikey to be the current one.
854            setlocale($LC_ALL, "Albanian");
855
856            defined($underlying_name) && setlocale(&LC_CTYPE, $underlying_name)
857        } @locales;
858    }
859
860    # Create a hash of the errnos:
861    #          "1" => "Operation\\ not\\ permitted",
862    #          "2" => "No\\ such\\ file\\ or\\ directory",
863    #          etc.
864    my %msg_catalog;
865    foreach my $error (sort keys %!) {
866        my $number = eval "Errno::$error";
867        $! = $number;
868        my $description = "$!";
869        next unless "$description";
870        $msg_catalog{$number} = quotemeta "$description";
871    }
872
873    # Then just the errnos.
874    my @msg_catalog = sort { $a <=> $b } keys %msg_catalog;
875
876    # Remove the excess ones.
877    splice @msg_catalog, $max_message_catalog_entries
878                                          if $max_message_catalog_entries >= 0;
879    my $msg_catalog = join ',', @msg_catalog;
880
881    eval  { my $discard = POSIX::localeconv()->{currency_symbol}; };
882    my $has_localeconv = $@ eq "";
883
884    # Now go through and create tests for each locale category on the system.
885    # These tests were determined by grepping through the code base for
886    # locale-sensitive operations, and then figuring out something to exercise
887    # them.
888    foreach my $category (@valid_categories) {
889        no warnings 'uninitialized';
890
891        next if $category eq 'LC_ALL';  # Tested below as a combination of the
892                                        # individual categories
893        if ($category eq 'LC_COLLATE') {
894            add_trials('LC_COLLATE',
895                       # 'reverse' causes it to be definitely out of order for
896                       # the 'sort' to correct
897                       'quotemeta join "", sort reverse map { chr } (1..255)');
898
899            # We pass an re to exclude testing locales that don't necessarily
900            # have a lt b.
901            add_trials('LC_COLLATE', '"a" lt "B"', 'a<b');
902            add_trials('LC_COLLATE', 'my $a = "a"; my $b = "B";'
903                                   . ' POSIX::strcoll($a, $b) < 0;',
904                        'a<b');
905
906            # Doesn't include NUL because our memcollxfrm implementation of it
907            # isn't perfect
908            add_trials('LC_COLLATE', 'my $string = quotemeta join "",'
909                                   . ' map { chr } (1..255);'
910                                   . ' POSIX::strxfrm($string)');
911            next;
912        }
913
914        if ($category eq 'LC_CTYPE') {
915            add_trials('LC_CTYPE', 'no warnings "locale"; quotemeta lc'
916                                 . ' join "" , map { chr } (0..255)');
917            add_trials('LC_CTYPE', 'no warnings "locale"; quotemeta uc'
918                                 . ' join "", map { chr } (0..255)');
919            add_trials('LC_CTYPE', 'no warnings "locale"; quotemeta CORE::fc'
920                                 . ' join "", map { chr } (0..255)');
921            add_trials('LC_CTYPE', 'no warnings "locale";'
922                                 . ' my $string = join "", map { chr } 0..255;'
923                                 . ' $string =~ s|(.)|$1=~/\d/?1:0|gers');
924            add_trials('LC_CTYPE', 'no warnings "locale";'
925                                 . ' my $string = join "", map { chr } 0..255;'
926                                 . ' $string =~ s|(.)|$1=~/\s/?1:0|gers');
927            add_trials('LC_CTYPE', 'no warnings "locale";'
928                                 . ' my $string = join "", map { chr } 0..255;'
929                                 . ' $string =~ s|(.)|$1=~/\w/?1:0|gers');
930            add_trials('LC_CTYPE', 'no warnings "locale";'
931                              . ' my $string = join "", map { chr } 0..255;'
932                              . ' $string =~ s|(.)|$1=~/[[:alpha:]]/?1:0|gers');
933            add_trials('LC_CTYPE', 'no warnings "locale";'
934                              . ' my $string = join "", map { chr } 0..255;'
935                              . ' $string =~ s|(.)|$1=~/[[:alnum:]]/?1:0|gers');
936            add_trials('LC_CTYPE', 'no warnings "locale";'
937                              . ' my $string = join "", map { chr } 0..255;'
938                              . ' $string =~ s|(.)|$1=~/[[:ascii:]]/?1:0|gers');
939            add_trials('LC_CTYPE', 'no warnings "locale";'
940                              . ' my $string = join "", map { chr } 0..255;'
941                              . ' $string =~ s|(.)|$1=~/[[:blank:]]/?1:0|gers');
942            add_trials('LC_CTYPE', 'no warnings "locale";'
943                              . ' my $string = join "", map { chr } 0..255;'
944                              . ' $string =~ s|(.)|$1=~/[[:cntrl:]]/?1:0|gers');
945            add_trials('LC_CTYPE', 'no warnings "locale";'
946                              . ' my $string = join "", map { chr } 0..255;'
947                              . ' $string =~ s|(.)|$1=~/[[:graph:]]/?1:0|gers');
948            add_trials('LC_CTYPE', 'no warnings "locale";'
949                              . ' my $string = join "", map { chr } 0..255;'
950                              . ' $string =~ s|(.)|$1=~/[[:lower:]]/?1:0|gers');
951            add_trials('LC_CTYPE', 'no warnings "locale";'
952                              . ' my $string = join "", map { chr } 0..255;'
953                              . ' $string =~ s|(.)|$1=~/[[:print:]]/?1:0|gers');
954            add_trials('LC_CTYPE', 'no warnings "locale";'
955                              . ' my $string = join "", map { chr } 0..255;'
956                              . ' $string =~ s|(.)|$1=~/[[:punct:]]/?1:0|gers');
957            add_trials('LC_CTYPE', 'no warnings "locale";'
958                              . ' my $string = join "", map { chr } 0..255;'
959                              . ' $string =~ s|(.)|$1=~/[[:upper:]]/?1:0|gers');
960            add_trials('LC_CTYPE', 'no warnings "locale";'
961                             . ' my $string = join "", map { chr } 0..255;'
962                             . ' $string =~ s|(.)|$1=~/[[:xdigit:]]/?1:0|gers');
963            add_trials('LC_CTYPE', 'use I18N::Langinfo qw(langinfo CODESET);'
964                                 . ' no warnings "uninitialized";'
965                                 . ' langinfo(CODESET);');
966
967            # In the multibyte functions, the non-reentrant ones can't be made
968            # thread safe
969            if ($Config{'d_mbrlen'} eq 'define') {
970                add_trials('LC_CTYPE', 'my $string = chr 0x100;'
971                                     . ' utf8::encode($string);'
972                                     . ' no warnings "uninitialized";'
973                                     . ' POSIX::mblen(undef);'
974                                     . ' POSIX::mblen($string)',
975                           'utf8_only');
976            }
977            if ($Config{'d_mbrtowc'} eq 'define') {
978                add_trials('LC_CTYPE', 'my $value; my $str = "\x{100}";'
979                                     . ' utf8::encode($str);'
980                                     . ' no warnings "uninitialized";'
981                                     . ' POSIX::mbtowc(undef, undef);'
982                                     . ' POSIX::mbtowc($value, $str); $value;',
983                           'utf8_only');
984            }
985            if ($Config{'d_wcrtomb'} eq 'define') {
986                add_trials('LC_CTYPE', 'my $value;'
987                                     . ' no warnings "uninitialized";'
988                                     . ' POSIX::wctomb(undef, undef);'
989                                     . ' POSIX::wctomb($value, 0xFF);'
990                                     . ' $value;',
991                           'utf8_only');
992            }
993
994            add_trials('LC_CTYPE',
995                       'no warnings "locale";'
996                     . ' my $uc = CORE::uc join "", map { chr } (0..255);'
997                     . ' my $fc = quotemeta CORE::fc $uc;'
998                     . ' $uc =~ / \A $fc \z /xi;');
999            next;
1000        }
1001
1002        if ($category eq 'LC_MESSAGES') {
1003            add_trials('LC_MESSAGES',
1004                     "join \"\n\", map { \$! = \$_; \"\$!\" } ($msg_catalog)");
1005            add_trials('LC_MESSAGES',
1006                  'use I18N::Langinfo qw(langinfo YESSTR NOSTR YESEXPR NOEXPR);'
1007                . ' no warnings "uninitialized";'
1008                . ' join ",",'
1009                . '     map { langinfo($_) } YESSTR, NOSTR, YESEXPR, NOEXPR;');
1010            next;
1011        }
1012
1013        if ($category eq 'LC_MONETARY') {
1014            if ($has_localeconv) {
1015                add_trials('LC_MONETARY', "localeconv()->{currency_symbol}");
1016            }
1017            add_trials('LC_MONETARY',
1018                       'use I18N::Langinfo qw(langinfo CRNCYSTR);'
1019                    . ' no warnings "uninitialized";'
1020                    . ' join "|",  map { langinfo($_) } CRNCYSTR;');
1021            next;
1022        }
1023
1024        if ($category eq 'LC_NUMERIC') {
1025            if ($has_localeconv) {
1026                add_trials('LC_NUMERIC', "no warnings; 'uninitialised';"
1027                                       . " join '|',"
1028                                       . " localeconv()->{decimal_point},"
1029                                       . " localeconv()->{thousands_sep}");
1030            }
1031            add_trials('LC_NUMERIC',
1032                       'use I18N::Langinfo qw(langinfo RADIXCHAR THOUSEP);'
1033                     . ' no warnings "uninitialized";'
1034                     . ' join "|",  map { langinfo($_) } RADIXCHAR, THOUSEP;');
1035
1036            # Use a variable to avoid runtime bugs being hidden by constant
1037            # folding
1038            add_trials('LC_NUMERIC', 'my $in = 4.2; sprintf("%g", $in)');
1039            next;
1040        }
1041
1042        if ($category eq 'LC_TIME') {
1043            add_trials('LC_TIME', "POSIX::strftime($strftime_args)");
1044            add_trials('LC_TIME', <<~'END_OF_CODE');
1045                use I18N::Langinfo qw(langinfo
1046                    ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5 ABDAY_6 ABDAY_7
1047                    ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
1048                    ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12
1049                    DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7
1050                    MON_1 MON_2 MON_3 MON_4 MON_5 MON_6
1051                    MON_7 MON_8 MON_9 MON_10 MON_11 MON_12
1052                    D_FMT D_T_FMT T_FMT);
1053                no warnings "uninitialized";
1054                join "|",
1055                    map { langinfo($_) }
1056                        ABDAY_1,ABDAY_2,ABDAY_3,ABDAY_4,ABDAY_5,
1057                        ABDAY_6,ABDAY_7,
1058                        ABMON_1,ABMON_2,ABMON_3,ABMON_4,ABMON_5,
1059                        ABMON_6, ABMON_7,ABMON_8,ABMON_9,ABMON_10,
1060                        ABMON_11,ABMON_12,
1061                        DAY_1,DAY_2,DAY_3,DAY_4,DAY_5,DAY_6,DAY_7,
1062                        MON_1,MON_2,MON_3,MON_4,MON_5,MON_6, MON_7,
1063                        MON_8,MON_9,MON_10,MON_11,MON_12,
1064                        D_FMT,D_T_FMT,T_FMT;
1065                END_OF_CODE
1066            next;
1067        }
1068    } # End of creating test cases.
1069
1070
1071    # Now analyze the test cases
1072    my %all_tests;
1073    foreach my $category (keys %distincts) {
1074        my %results;
1075        my %distinct_results_count;
1076
1077        # Find just the distinct test operations; sort for repeatibility
1078        my %distinct_ops;
1079        for my $op_result (sort keys $distincts{$category}->%*) {
1080            my ($op, $result) = unpack_op_result($op_result);
1081
1082            $distinct_ops{$op}++;
1083            push $results{$op}->@*, $result;
1084            $distinct_results_count{$result} +=
1085                        scalar $distincts{$category}{$op_result}{locales}->@*;
1086        }
1087
1088        # And get a sorted list of all the test operations
1089        my @ops = sort keys %distinct_ops;
1090
1091        sub gen_combinations {
1092
1093            # Generate all the non-empty combinations of operations and
1094            # results (for the current category) possible on this platform.
1095            # That is, if a category has N operations, it will generate a list
1096            # of entries.  Each entry will itself have N elements, one for
1097            # each operation, and when all the entries are considered
1098            # together, every possible outcome is represented.
1099
1100            my $op_ref = shift;         # Reference to list of operations
1101            my $results_ref = shift;    # Reference to hash; key is operation;
1102                                        # value is an array of all possible
1103                                        # outcomes of this operation.
1104            my $distincts_ref = shift;  # Reference to %distincts of this
1105                                        # category
1106
1107            # Get the first operation on the list
1108            my $op = shift $op_ref->@*;
1109
1110            # The return starts out as a list of hashes of all possible
1111            # outcomes for executing 'op'.  Each hash has two keys:
1112            #   'op_results' is an array of one element: 'op => result',
1113            #                packed into a string.
1114            #   'locales'    is an array of all the locales which have the
1115            #                same result for 'op'
1116            my @return;
1117            foreach my $result ($results_ref->{$op}->@*) {
1118                my $op_result = pack_op_result($op, $result);
1119                push @return, {
1120                            op_results => [ $op_result ],
1121                            locales    => $distincts_ref->{$op_result}{locales},
1122                          };
1123            }
1124
1125            # If this is the final element of the list, we are done.
1126            return (\@return) unless $op_ref->@*;
1127
1128            # Otherwise recurse to generate the combinations for the remainder
1129            # of the list.
1130            my $recurse_return = &gen_combinations($op_ref,
1131                                                   $results_ref,
1132                                                   $distincts_ref);
1133            # Now we have to generate the combinations of the current item
1134            # with the ones returned by the recursion.  Each element of the
1135            # current item is combined with each element of the recursed.
1136            my @combined;
1137            foreach my $this (@return) {
1138                my @this_locales = $this->{locales}->@*;
1139                foreach my $recursed ($recurse_return->@*) {
1140                    my @recursed_locales = $recursed->{locales}->@*;
1141
1142                    # @this_locales is a list of locales this op => result is
1143                    # valid for.  @recursed_locales is similarly a list of the
1144                    # valid ones for the recursed return.  Their intersection
1145                    # is a list of the locales valid for this combination.
1146                    my %seen;
1147                    $seen{$_}++ foreach @this_locales, @recursed_locales;
1148                    my @intersection = grep $seen{$_} == 2, keys %seen;
1149
1150                    # An alternative intersection algorithm:
1151                    # my (%set1, %set2);
1152                    # @set1{@list1} = ();
1153                    # @set2{@list2} = ();
1154                    # my @intersection = grep exists $set1{$_}, keys %set2;
1155
1156                    # If the intersection is empty, this combination can't
1157                    # actually happen on this platform.
1158                    next unless @intersection;
1159
1160                    # Append the recursed list to the current list to form the
1161                    # combined list.
1162                    my @combined_result = $this->{op_results}->@*;
1163                    push @combined_result, $recursed->{op_results}->@*;
1164                    # And create the hash for the combined result, including
1165                    # the locales it is valid for
1166                    push @combined, {
1167                                      op_results => \@combined_result,
1168                                      locales    => \@intersection,
1169                                    };
1170                }
1171            }
1172
1173            return \@combined;
1174        } # End of gen_combinations() definition
1175
1176        # The result of calling gen_combinations() will be an array of hashes.
1177        #
1178        # The main value in each hash is an array (whose key is 'op_results')
1179        # containing all the tests for this category for a thread.  If there
1180        # were N calls to 'add_trial' for this category, there will be 'N'
1181        # elements in the array.  Each element is a string packed with the
1182        # operation to eval in a thread and the operation's expected result.
1183        #
1184        # The other data structure in each hash is an array with the key
1185        # 'locales'.  That array is a list of every locale which yields the
1186        # identical results in 'op_results'.
1187        #
1188        # Effectively, each hash gives all the tests for this category for a
1189        # thread.  The total array of hashes gives the complete list of
1190        # distinct tests possible on this system.  So later, a thread will
1191        # pluck the next available one from the array..
1192        my $combinations_ref = gen_combinations(\@ops, \%results,
1193                                                $distincts{$category});
1194
1195        # Fix up the entries ...
1196        foreach my $test ($combinations_ref->@*) {
1197
1198            # Sort the locale names; this makes it work for later comparisons
1199            # to look at just the first element of each list.
1200            $test->{locales}->@* =
1201                                sort sort_by_hashed_locale $test->{locales}->@*;
1202
1203            # And for each test, calculate and store how many locales have the
1204            # same result (saves recomputation later in a sort).  This adds
1205            # another data structure to each hash in the main array.
1206            my @individual_tests = $test->{op_results}->@*;
1207            my @in_common_locale_counts;
1208            foreach my $this_test (@individual_tests) {
1209
1210                # Each test came from %distincts, and there we have stored the
1211                # list of all locales that yield the same result
1212                push @in_common_locale_counts,
1213                        scalar $distincts{$category}{$this_test}{locales}->@*;
1214            }
1215            push $test->{in_common_locale_counts}->@*, @in_common_locale_counts;
1216        }
1217
1218        # Make a copy
1219        my @cat_tests = $combinations_ref->@*;
1220
1221        # This sorts the test cases so that the ones with the least overlap
1222        # with other cases are first.
1223        sub sort_test_order {
1224            my $a_tests_count = scalar $a->{in_common_locale_counts}->@*;
1225            my $b_tests_count = scalar $b->{in_common_locale_counts}->@*;
1226            my $tests_count = min($a_tests_count, $b_tests_count);
1227
1228            # Choose the one that is most distinctive (least overlap); that is
1229            # the one that has the most tests whose results are not shared by
1230            # any other locale.
1231            my $a_nondistincts = 0;
1232            my $b_nondistincts = 0;
1233            for my $i (0 .. $tests_count - 1) {
1234                $a_nondistincts += ($a->{in_common_locale_counts}[$i] != 1);
1235                $b_nondistincts += ($b->{in_common_locale_counts}[$i] != 1);
1236            }
1237
1238            my $cmp = $a_nondistincts <=> $b_nondistincts;
1239            return $cmp if $cmp;
1240
1241            # If they have the same number of those, choose the one with the
1242            # fewest total number of locales that have the same result
1243            my $a_count = 0;
1244            my $b_count = 0;
1245            for my $i (0 .. $tests_count - 1) {
1246                $a_count += $a->{in_common_locale_counts}[$i];
1247                $b_count += $b->{in_common_locale_counts}[$i];
1248            }
1249
1250            $cmp = $a_count <=> $b_count;
1251            return $cmp if $cmp;
1252
1253            # If that still doesn't yield a winner, use the general sort order.
1254            local $a = $a->{locales}[0];
1255            local $b = $b->{locales}[0];
1256            return sort_by_hashed_locale;
1257        }
1258
1259        # Actually perform the sort.
1260        @cat_tests = sort sort_test_order @cat_tests;
1261
1262        # This category will now have all the distinct tests possible for it
1263        # on this platform, with the first test being the one with the least
1264        # overlap with other test cases
1265        push $all_tests{$category}->@*, @cat_tests;
1266    }     # End of loop through the categories creating and sorting the test
1267          # cases
1268
1269    my %thread_already_used_locales;
1270
1271    # Now generate the tests for each thread.
1272    my @tests_by_thread;
1273    for my $i (0 .. $thread_count - 1) {
1274        foreach my $category (sort keys %all_tests) {
1275            my $skipped = 0;    # Used below to not loop infinitely
1276
1277            # Get the next test case
1278          NEXT_CANDIDATE:
1279            my $candidate = shift $all_tests{$category}->@*;
1280
1281            my $locale_name = $candidate->{locales}[0];
1282
1283            # Avoid, if possible, using the same locale name twice (for
1284            # different categories) in the same thread.
1285            if (defined $thread_already_used_locales{$locale_name =~ s/\W.*//r})
1286            {
1287                # Look through the synonyms of this locale for an
1288                # as-yet-unused one
1289                for my $j (1 .. $candidate->{locales}->@* - 1) {
1290                    my $synonym = $candidate->{locales}[$j];
1291                    next if defined $thread_already_used_locales{$synonym =~
1292                                                                    s/\W.*//r};
1293                    $locale_name = $synonym;
1294                    goto found_synonym;
1295                }
1296
1297                # Here, no synonym was found.  If we haven't cycled through
1298                # all the possible tests, try another (putting this one at the
1299                # end as a last resort in the future).
1300                $skipped++;
1301                if ($skipped < scalar $all_tests{$category}->@*) {
1302                    push $all_tests{$category}->@*, $candidate;
1303                    goto NEXT_CANDIDATE;
1304                }
1305
1306                # Here no synonym was found, this test has already been used,
1307                # but there are no unused ones, so have to re-use it.
1308
1309              found_synonym:
1310            }
1311
1312            # Here, we have found a test case.  The thread needs to know what
1313            # locale to use,
1314            $tests_by_thread[$i]->{$category}{locale_name} = $locale_name;
1315
1316            # And it needs to know each test to run, and the expected result.
1317            my @cases;
1318            for my $j (0 .. $candidate->{op_results}->@* - 1) {
1319                my ($op, $result) =
1320                             unpack_op_result($candidate->{op_results}[$j]);
1321                push @cases, { op => $op, expected => $result };
1322            }
1323            push $tests_by_thread[$i]->{$category}{locale_tests}->@*, @cases;
1324
1325            # Done with this category in this thread.  Setup for subsequent
1326            # categories in this thread, and subsequent threads.
1327            #
1328            # It's best to not have two categories in a thread use the same
1329            # locale.  Save this locale name so that later iterations handling
1330            # other categories can avoid using it, if possible.
1331            $thread_already_used_locales{$locale_name =~ s/\W.*//r} = 1;
1332
1333            # In pursuit of using as many different locales as possible, the
1334            # first shall be last in line next time, and eventually the last
1335            # shall be first
1336            push $candidate->{locales}->@*, shift $candidate->{locales}->@*;
1337
1338            # Similarly, this test case is added back at the end of the list,
1339            # so will be used only as a last resort in the next thread, and as
1340            # the penultimate resort in the thread following that, etc. as the
1341            # test cases are cycled through.
1342            push $all_tests{$category}->@*, $candidate;
1343        } # End of looping through the categories for this thread
1344    } # End of generating all threads
1345
1346    # Now reformat the tests to a form convenient for the actual test file
1347    # script to use; minimizing the amount of ancillary work it needs to do.
1348    my @cooked_tests;
1349    for my $i (0 .. $#tests_by_thread) {
1350
1351        my $this_tests = $tests_by_thread[$i];
1352        my @this_cooked_tests;
1353        my (@this_categories, @this_locales);    # Parallel arrays
1354
1355        # Every so often we use LC_ALL instead of individual locales, provided
1356        # it is available on the platform
1357        if (   ($i % $lc_all_frequency == $lc_all_frequency - 1)
1358            && $LC_ALL_string eq 'LC_ALL')
1359        {
1360            my $lc_all= "";
1361            my $category_number;
1362
1363            # Compute the LC_ALL string for the syntax accepted by this
1364            # platform from the locale each category is to be set to.
1365            while (defined($category_number = get_next_category())) {
1366                my $category_name =
1367                                $map_category_number_to_name{$category_number};
1368                my $locale = $this_tests->{$category_name}{locale_name};
1369                $locale = "C" unless defined $locale;
1370                $category_name =~ s/\@/\\@/g;
1371
1372                $lc_all .= $lc_all_separator if $lc_all ne "";
1373
1374                if ($use_name_value_pairs) {
1375                    $lc_all .= $category_name . "=";
1376                }
1377
1378                $lc_all .= $locale;
1379            }
1380
1381            $this_categories[0] = $LC_ALL;
1382            $this_locales[0] = $lc_all;
1383        }
1384        else {  # The other times, just set each category to its locale
1385                # individually
1386            foreach my $category_name (sort keys $this_tests->%*) {
1387                push @this_categories,
1388                                $map_category_name_to_number{$category_name};
1389                push @this_locales,
1390                            $this_tests->{$category_name}{locale_name};
1391            }
1392        }
1393
1394        while (keys $this_tests->%*) {
1395            foreach my $category_name (sort keys $this_tests->%*) {
1396                my $this_category_tests = $this_tests->{$category_name};
1397                my $test = shift
1398                                $this_category_tests->{locale_tests}->@*;
1399                print STDERR __FILE__, ': ', __LINE__, ': ', Dumper $test
1400                                                                    if $debug;
1401                if (! $test) {
1402                    delete $this_tests->{$category_name};
1403                    next;
1404                }
1405
1406                $test->{category_name} = $category_name;
1407                my $locale_name = $this_category_tests->{locale_name};
1408                $test->{locale_name} = $locale_name;
1409                $test->{codeset} =
1410                                $locale_name_to_object{$locale_name}{codeset};
1411
1412                push @this_cooked_tests, $test;
1413            }
1414        }
1415
1416        push @cooked_tests, {
1417                              thread => $i,
1418                              categories => \@this_categories,
1419                              locales => \@this_locales,
1420                              tests => \@this_cooked_tests,
1421                            };
1422    }
1423
1424    my $all_tests_ref = \@cooked_tests;
1425    my $all_tests_file = tempfile();
1426
1427    # Store the tests into a file, retrievable by the subprocess
1428    use Storable;
1429    if (! defined store($all_tests_ref, $all_tests_file)) {
1430        die "Could not save the built-up data structure";
1431    }
1432
1433    my $category_number_to_name = Data::Dumper->Dump(
1434                                            [ \%map_category_number_to_name ],
1435                                            [  'map_category_number_to_name']);
1436
1437    my $switches = "";
1438    $switches = "switches => [ -DLv ]" if $debug > 2;
1439
1440    # Build up the program to run.  This stresses locale thread safety.  We
1441    # start a bunch of threads.  Each sets the locale of each category being
1442    # tested to the value determined in the code above.  Then each sleeps to a
1443    # common start time, at which point they awaken and iterate their
1444    # respective loops.  Each iteration runs a set of tests and checks that
1445    # the results are as expected.  This should catch any instances of other
1446    # threads interfering.  Every so often, each thread shifts to instead use
1447    # the locales and tests of another thread.  This catches bugs dealing with
1448    # changing the locale on the fly.
1449    #
1450    # The code above has set up things so that each thread has as disparate
1451    # results from the other threads as possible, so to more likely catch any
1452    # bleed-through.
1453    my $program = <<EOT;
1454
1455    BEGIN { \$| = 1; }
1456    my \$debug = $debug;
1457    my \$thread_count = $thread_count;
1458    my \$iterations_per_test_set = $iterations_per_test_set;
1459    my \$iterations = $iterations;
1460    my \$die_on_negative_sleep = $die_on_negative_sleep;
1461    my \$per_thread_startup = $per_thread_startup;
1462    my \$all_tests_file = $all_tests_file;
1463    my \$alarm_clock = $alarm_clock;
1464EOT
1465
1466    $program .= <<'EOT';
1467    use threads;
1468    use strict;
1469    use warnings;
1470    use POSIX qw(locale_h);
1471    use utf8;
1472    use Time::HiRes qw(time usleep);
1473    $|=1;
1474
1475    use Data::Dumper;
1476    $Data::Dumper::Sortkeys=1;
1477    $Data::Dumper::Useqq = 1;
1478    $Data::Dumper::Deepcopy = 1;
1479
1480    # Get the tests stored for us by the setup process
1481    use Storable;
1482    my $all_tests_ref = retrieve($all_tests_file);
1483    if (! defined $all_tests_ref) {
1484        die "Could not restore the built-up data structure";
1485    }
1486
1487    my %corrects;
1488
1489    sub output_test_failure_prefix {
1490        my ($iteration, $category_name, $test) = @_;
1491        my $tid = threads->tid();
1492        print STDERR "\nthread ", $tid,
1493                     " failed in iteration $iteration",
1494                     " for locale $test->{locale_name}",
1495                     " codeset='$test->{codeset}'",
1496                     " $category_name",
1497                     "\nop='$test->{op}'",
1498                     "\nafter getting ", ($corrects{$category_name}
1499                                          {$test->{locale_name}}
1500                                          {all} // 0),
1501                     " previous correct results for this category and",
1502                     " locale,\nincluding ", ($corrects{$category_name}
1503                                              {$test->{locale_name}}
1504                                              {$tid} // 0),
1505                     " in this thread\n";
1506    }
1507
1508    sub output_test_result($$$) {
1509        my ($type, $result, $utf8_matches) = @_;
1510
1511        no locale;
1512
1513        print STDERR "$type";
1514
1515        my $copy = $result;
1516        if (! $utf8_matches) {
1517            if (utf8::is_utf8($copy)) {
1518                print STDERR " (result already was in UTF-8)";
1519            }
1520            else {
1521                utf8::upgrade($copy);
1522                print STDERR " (result wasn't in UTF-8; converted for easier",
1523                             " comparison)";
1524            }
1525        }
1526        print STDERR ":\n";
1527
1528        use Devel::Peek;
1529        Dump $copy;
1530    }
1531
1532    sub iterate {       # Run some chunk of iterations of the tests
1533        my ($tid,                  # Which thread
1534            $initial_iteration,    # The number of the first iteration
1535            $count,                # How many
1536            $tests_ref)            # The tests
1537            = @_;
1538
1539        my $iteration = $initial_iteration;
1540        $count += $initial_iteration;
1541
1542        # Repeatedly ...
1543        while ($iteration < $count) {
1544            my $errors = 0;
1545
1546            use locale;
1547
1548            # ... execute the tests
1549            foreach my $test ($tests_ref->@*) {
1550
1551                # We know what we are expecting
1552                my $expected = $test->{expected};
1553
1554                my $category_name = $test->{category_name};
1555
1556                # And do the test.
1557                my $got = eval $test->{op};
1558
1559                if (! defined $got) {
1560                    output_test_failure_prefix($iteration,
1561                                               $category_name,
1562                                               $test);
1563                    output_test_result("expected", $expected,
1564                                        1 # utf8ness matches, since only one
1565                                      );
1566                    $errors++;
1567                    next;
1568                }
1569
1570                my $utf8ness_matches = (   utf8::is_utf8($got)
1571                                        == utf8::is_utf8($expected));
1572
1573                my $matched = ($got eq $expected);
1574                if ($matched) {
1575                    if ($utf8ness_matches) {
1576                        no warnings 'uninitialized';
1577                        $corrects{$category_name}{$test->{locale_name}}{all}++;
1578                        $corrects{$category_name}{$test->{locale_name}}{$tid}++;
1579                        next;   # Complete success!
1580                    }
1581                }
1582
1583                $errors++;
1584                output_test_failure_prefix($iteration, $category_name, $test);
1585
1586                if ($matched) {
1587                    print STDERR "Only difference is UTF8ness of results\n";
1588                }
1589                output_test_result("expected", $expected, $utf8ness_matches);
1590                output_test_result("got", $got, $utf8ness_matches);
1591
1592            } # Loop to do the remaining tests for this iteration
1593
1594            return 0 if $errors;
1595
1596            $iteration++;
1597
1598            # A way to set a gdb break point pp_study
1599            #study if $iteration % 10 == 0;
1600
1601            threads->yield();
1602        }
1603
1604        return 1;
1605    } # End of iterate() definition
1606
1607EOT
1608
1609    $program .= "my $category_number_to_name\n";
1610
1611    $program .= <<'EOT';
1612    sub setlocales {
1613        # Set each category to the appropriate locale for this test set
1614        my ($categories, $locales) = @_;
1615        for my $i (0 .. $categories->@* - 1) {
1616            if (! setlocale($categories->[$i], $locales->[$i])) {
1617                my $category_name =
1618                            $map_category_number_to_name->{$categories->[$i]};
1619                print STDERR "\nthread ", threads->tid(),
1620                             " setlocale($category_name ($categories->[$i]),",
1621                             " $locales->[$i]) failed\n";
1622                return 0;
1623            }
1624        }
1625
1626        return 1;
1627    }
1628
1629    my $startup_insurance = 1;
1630    my $future = $startup_insurance + $thread_count * $per_thread_startup;
1631    my $starting_time = time() + $future;
1632
1633    sub wait_until_time {
1634
1635        # Sleep until the time when all the threads are due to wake up, so
1636        # they run as simultaneously as we can make it.
1637        my $sleep_time = ($starting_time - time());
1638        #printf STDERR "thread %d started, sleeping %g sec\n",
1639        #              threads->tid, $sleep_time;
1640        if ($sleep_time < 0 && $die_on_negative_sleep) {
1641            # What the start time should have been
1642            my $a_better_future = $future - $sleep_time;
1643
1644            my $better_per_thread =
1645                        ($a_better_future - $startup_insurance) / $thread_count;
1646            printf STDERR "$per_thread_startup would need to be %g",
1647                          " for thread %d to have started\nin sync with",
1648                          " the other threads\n",
1649                          $better_per_thread, threads->tid;
1650            die "Thread started too late";
1651        }
1652        else {
1653            usleep($sleep_time * 1_000_000) if $sleep_time > 0;
1654        }
1655    }
1656
1657    # Create all the subthreads: 1..n
1658    my @threads = map +threads->create(sub {
1659        $SIG{'KILL'} = sub { threads->exit(); };
1660
1661        my $thread = shift;
1662
1663        # Start out with the set of tests whose number is the same as the
1664        # thread number
1665        my $test_set = $thread;
1666
1667        wait_until_time();
1668
1669        # Loop through all the iterations for this thread
1670        my $this_iteration_start = 1;
1671        do {
1672             # Set up each category with its locale;
1673            my $this_ref = $all_tests_ref->[$test_set];
1674            return 0 unless setlocales($this_ref->{categories},
1675                                       $this_ref->{locales});
1676            # Then run one batch of iterations
1677            my $result = iterate($thread,
1678                                 $this_iteration_start,
1679                                 $iterations_per_test_set,
1680                                 $this_ref->{tests});
1681            return 0 if $result == 0;   # Quit if failed
1682
1683            # Next iteration will shift to use a different set of locales for
1684            # each category
1685            $test_set++;
1686            $test_set = 0 if $test_set >= $thread_count;
1687            $this_iteration_start += $iterations_per_test_set;
1688        } while ($this_iteration_start <= $iterations);
1689
1690        return 1;   # Success
1691
1692    }, $_), (1..$thread_count - 1);     # For each non-0 thread
1693
1694    # Here is thread 0.  We do a smaller chunk of iterations in it; then
1695    # join whatever threads have finished so far, then do another chunk.
1696    # This tests for bugs that arise as a result of joining.
1697
1698    my %thread0_corrects = ();
1699    my $this_iteration_start = 1;
1700    my $result = 1;    # So far, everything is ok
1701    my $test_set = -1;  # Start with 0th test set
1702
1703    wait_until_time();
1704    alarm($alarm_clock);    # Guard against hangs
1705
1706    do {
1707        # Next time, we'll use the next test set
1708        $test_set++;
1709        $test_set = 0 if $test_set >= $thread_count;
1710
1711        my $this_ref = $all_tests_ref->[$test_set];
1712
1713        # set the locales for this test set.  Do this even if we
1714        # are going to bail, so that it will be set correctly for the final
1715        # batch after the loop.
1716        $result &= setlocales($this_ref->{categories}, $this_ref->{locales});
1717
1718        if ($debug > 1) {
1719            my @joinable = threads->list(threads::joinable);
1720            if (@joinable) {
1721                print STDERR "In thread 0, before iteration ",
1722                             $this_iteration_start,
1723                             " these threads are done: ",
1724                             join (", ", map { $_->tid() } @joinable),
1725                             "\n";
1726            }
1727        }
1728
1729        # Join anything already finished.
1730        for my $thread (threads->list(threads::joinable)) {
1731            my $thread_result = $thread->join;
1732            if ($debug > 1) {
1733                print STDERR "In thread 0, before iteration ",
1734                             $this_iteration_start,
1735                             " joining thread ", $thread->tid(),
1736                             "; result=", ((defined $thread_result)
1737                                           ? $thread_result
1738                                           : "undef"),
1739                             "\n";
1740            }
1741
1742            # If the thread failed badly, stop testing anything else.
1743            if (! defined $thread_result) {
1744                $_->kill('KILL')->detach() for threads->list();
1745                print 0;
1746                exit;
1747            }
1748
1749            # Update the status
1750            $result &= $thread_result;
1751        }
1752
1753        # Do a chunk of iterations on this thread 0.
1754        $result &= iterate(0,
1755                           $this_iteration_start,
1756                           $iterations_per_test_set,
1757                           $this_ref->{tests},
1758                           \%thread0_corrects);
1759        $this_iteration_start += $iterations_per_test_set;
1760
1761        # And repeat as long as there are other tests
1762    } while (threads->list(threads::all));
1763
1764    print $result;
1765EOT
1766
1767    # Finally ready to run the test.
1768    fresh_perl_is($program,
1769        1,
1770        { eval $switches },
1771        "Verify there were no failures with simultaneous running threads"
1772    );
1773}
1774