xref: /openbsd-src/gnu/usr.bin/perl/t/run/runenv_hashseed.t (revision 256a93a44f36679bee503f12e49566c2183f6181)
1*256a93a4Safresh1#!./perl
2*256a93a4Safresh1
3*256a93a4Safresh1# Test that setting PERL_HASH_SEED and PERL_PERTURB_KEYS in different
4*256a93a4Safresh1# combinations works as expected, and that changing the values provided
5*256a93a4Safresh1# produces the expected results
6*256a93a4Safresh1#
7*256a93a4Safresh1# We do this by first executing Perl with a given PERL_PERTURB_KEYS
8*256a93a4Safresh1# mode, and then extract the randomly chosen PERL_HASH_SEED it ran under
9*256a93a4Safresh1# from its debug output which was printed to STDERR, and then use it for
10*256a93a4Safresh1# further tests. This allows the tests to be robust to the choice of hash
11*256a93a4Safresh1# function and seed sizes that might be in use in the perl being tested.
12*256a93a4Safresh1# We do not ask perl to output any keys on this run, as our subsequent
13*256a93a4Safresh1# runs will use different environment variables (specifically
14*256a93a4Safresh1# PERL_HASH_SEED) which will change any key order results we see.
15*256a93a4Safresh1#
16*256a93a4Safresh1# We then execute perl a further three times and ask perl to build a
17*256a93a4Safresh1# hash with a specific number of buckets and a specific set of keys. We
18*256a93a4Safresh1# then have perl print the raw keys to STDOUT.
19*256a93a4Safresh1#
20*256a93a4Safresh1# For two of these three runs we supply the same seed, and both of those
21*256a93a4Safresh1# times we supply the same perturb mode, but in different ways, once as
22*256a93a4Safresh1# a name and once as a digit. The debug output should be identical in
23*256a93a4Safresh1# both cases regardless of mode. For PERL_PERTURB_KEYS mode 0=NO, and
24*256a93a4Safresh1# 2=DETERMINISTIC the key order should match. For mode 1=RANDOM the key
25*256a93a4Safresh1# order should differ the vast majority of the time, however the test is
26*256a93a4Safresh1# probabilistic and occasionally may result in the same key order.
27*256a93a4Safresh1#
28*256a93a4Safresh1# The third run we supply a different seed, with a 1 bit difference, but
29*256a93a4Safresh1# with the same PERL_PERTURB_KEYS mode. In this case we expect the key
30*256a93a4Safresh1# order to differ for all three modes, but again the test is
31*256a93a4Safresh1# probabilistic and we may get the same key order in a small percentage
32*256a93a4Safresh1# of the times we try this.
33*256a93a4Safresh1#
34*256a93a4Safresh1# To address the probabilistic nature of these tests we run them
35*256a93a4Safresh1# multiple times and count how many times we get the same key order.
36*256a93a4Safresh1# Most times this should be zero, but occasionally it might be higher.
37*256a93a4Safresh1# Therefore we use a threshold $allowed_fails to determine how many
38*256a93a4Safresh1# times the key order may be unchanged before we consider the tests
39*256a93a4Safresh1# actually failed. We also use a largish number of keys in a hash with
40*256a93a4Safresh1# a large number of buckets, which means we produce a lot a large temp
41*256a93a4Safresh1# files as we test, so we aggressively clean them up as we go.
42*256a93a4Safresh1
43*256a93a4Safresh1
44*256a93a4Safresh1BEGIN {
45*256a93a4Safresh1    chdir 't' if -d 't';
46*256a93a4Safresh1    @INC = '../lib';
47*256a93a4Safresh1    require './test.pl';
48*256a93a4Safresh1    require Config;
49*256a93a4Safresh1    Config->import;
50*256a93a4Safresh1}
51*256a93a4Safresh1
52*256a93a4Safresh1skip_all_without_config('d_fork');
53*256a93a4Safresh1skip_all("NO_PERL_HASH_ENV or NO_PERL_HASH_SEED_DEBUG set")
54*256a93a4Safresh1    if $Config{ccflags} =~ /-DNO_PERL_HASH_ENV\b/
55*256a93a4Safresh1    || $Config{ccflags} =~ /-DNO_PERL_HASH_SEED_DEBUG\b/;
56*256a93a4Safresh1use strict;
57*256a93a4Safresh1use warnings;
58*256a93a4Safresh1
59*256a93a4Safresh1# enable DEBUG_RUNENV if you want to see what is being returned
60*256a93a4Safresh1# by the executed perl.
61*256a93a4Safresh1sub my_runperl {
62*256a93a4Safresh1    my ($cmd_array, $perturb, $set_seed) = @_;
63*256a93a4Safresh1    my $opts_hash= {
64*256a93a4Safresh1        PERL_HASH_SEED_DEBUG => 1,
65*256a93a4Safresh1        PERL_PERTURB_KEYS => $perturb
66*256a93a4Safresh1    };
67*256a93a4Safresh1    $opts_hash->{PERL_HASH_SEED}= $set_seed if $set_seed;
68*256a93a4Safresh1
69*256a93a4Safresh1    my ( $out, $err )
70*256a93a4Safresh1        = runperl_and_capture( $opts_hash, $cmd_array );
71*256a93a4Safresh1    my @err= split /\n/, $err;
72*256a93a4Safresh1
73*256a93a4Safresh1    my $seed;
74*256a93a4Safresh1    my $mode_name;
75*256a93a4Safresh1    my $mode_digit;
76*256a93a4Safresh1    my @err_got_data;
77*256a93a4Safresh1    my @rand_bits;
78*256a93a4Safresh1    foreach my $line (@err) {
79*256a93a4Safresh1        if ($line=~/^Got.*/) {
80*256a93a4Safresh1            push @err_got_data, $line;
81*256a93a4Safresh1        }
82*256a93a4Safresh1        elsif ($line=~/^PL_hash_rand_bits=.*/) {
83*256a93a4Safresh1            push @rand_bits, $line;
84*256a93a4Safresh1        }
85*256a93a4Safresh1        elsif ($line=~/HASH_SEED = (0x[a-f0-9]+)/) {
86*256a93a4Safresh1            $seed= $1;
87*256a93a4Safresh1            $line =~ /PERTURB_KEYS = (\d) \((\w+)\)/
88*256a93a4Safresh1                or die "Failed to extract perturb mode: $err";
89*256a93a4Safresh1            $mode_digit = $1;
90*256a93a4Safresh1            $mode_name = $2;
91*256a93a4Safresh1
92*256a93a4Safresh1        }
93*256a93a4Safresh1    }
94*256a93a4Safresh1    if (!$seed){
95*256a93a4Safresh1        die "Failed to extract seed: $err";
96*256a93a4Safresh1    }
97*256a93a4Safresh1    my $err_got_data= join("\n",@err_got_data);
98*256a93a4Safresh1    return ($seed, $mode_digit, $mode_name, $out, $err_got_data, \@rand_bits);
99*256a93a4Safresh1}
100*256a93a4Safresh1
101*256a93a4Safresh1my @mode_names = (
102*256a93a4Safresh1    'NO',            # 0
103*256a93a4Safresh1    'RANDOM',        # 1
104*256a93a4Safresh1    'DETERMINISTIC', # 2
105*256a93a4Safresh1);
106*256a93a4Safresh1
107*256a93a4Safresh1my $repeat = 50; # if this changes adjust the comments below.
108*256a93a4Safresh1my $min_buckets = 100_000;
109*256a93a4Safresh1my $actual_buckets = 1;
110*256a93a4Safresh1$actual_buckets *= 2 while $actual_buckets <= $min_buckets;
111*256a93a4Safresh1my $key_expr = '0..999, "aa".."zz", map { $_ x 30 } "a".."z"'; #1702 keys
112*256a93a4Safresh1my @keys = eval $key_expr
113*256a93a4Safresh1    or die "bad '$key_expr': $@";
114*256a93a4Safresh1my $allowed_fails = 2; # Adjust this up to make the test tolerate
115*256a93a4Safresh1                       # more "errors". Maybe one day we will compute
116*256a93a4Safresh1                       # it from the value of $repeat, and $actual_buckets
117*256a93a4Safresh1                       # and the number of @keys.
118*256a93a4Safresh1
119*256a93a4Safresh1plan tests => (4 * $repeat)     # DETERMINISTIC
120*256a93a4Safresh1            + (1 * $repeat)     # NO
121*256a93a4Safresh1            + 1                 # RANDOM mode
122*256a93a4Safresh1            + (8 * @mode_names) # validation per mode
123*256a93a4Safresh1            + @mode_names;      # all modes
124*256a93a4Safresh1
125*256a93a4Safresh1
126*256a93a4Safresh1# Note the keys(%h) = $n will cause perl to allocate the power of 2 larger
127*256a93a4Safresh1# than $n buckets, so if $n = 100_000, then $actual_buckets will be 131072.
128*256a93a4Safresh1
129*256a93a4Safresh1my @perl_args = (
130*256a93a4Safresh1    '-I../lib',
131*256a93a4Safresh1    (is_miniperl() ? () # no Hash::Util here!
132*256a93a4Safresh1                   : '-MHash::Util=hash_traversal_mask,num_buckets'),
133*256a93a4Safresh1    '-e',
134*256a93a4Safresh1    'my %h; keys(%h)=' . $min_buckets . '; ' .
135*256a93a4Safresh1    '@h{' . $key_expr . '}=(); @k=keys %h; ' .
136*256a93a4Safresh1      'print join ":", 0+@k, ' .
137*256a93a4Safresh1      (is_miniperl() ? '' :  # no Hash::Util here!
138*256a93a4Safresh1          'num_buckets(%h),hash_traversal_mask(\\%h), ') .
139*256a93a4Safresh1      'join ",", @k;'
140*256a93a4Safresh1  );
141*256a93a4Safresh1
142*256a93a4Safresh1for my $test_mode_digit (0 .. $#mode_names) {
143*256a93a4Safresh1    my $test_mode_name = $mode_names[$test_mode_digit];
144*256a93a4Safresh1    my $descr_mode = "mode = $test_mode_name";
145*256a93a4Safresh1
146*256a93a4Safresh1    my $print_keys= [ ($test_mode_name eq "DETERMINISTIC")
147*256a93a4Safresh1                      ? "-Dh" : (), # enable hash diags
148*256a93a4Safresh1                      @perl_args ];
149*256a93a4Safresh1
150*256a93a4Safresh1    my $validated_mode= 0;
151*256a93a4Safresh1    my $random_same = 0;
152*256a93a4Safresh1    my $seed_change_same = 0;
153*256a93a4Safresh1    for my $try (1 .. $repeat) {
154*256a93a4Safresh1
155*256a93a4Safresh1        my $descr = sprintf "%s, try %2d:", $descr_mode, $try;
156*256a93a4Safresh1
157*256a93a4Safresh1        # First let perl choose the seed. We only use the $seed and $err
158*256a93a4Safresh1        # output here. We extract the seed that perl chose, which
159*256a93a4Safresh1        # hardens us against the use of different hash functions with
160*256a93a4Safresh1        # different seed sizes. Also the act of adding the PERL_HASH_SEED
161*256a93a4Safresh1        # to the environment later on will likely change the $out.
162*256a93a4Safresh1        my ( $seed, $digit, $mode )
163*256a93a4Safresh1            = my_runperl( ['-e1'], $test_mode_name );
164*256a93a4Safresh1
165*256a93a4Safresh1        # Now we have to run it again.
166*256a93a4Safresh1        my ( $seed1, $digit1, $mode1, $out1, $err_got_data1, $rand_bits1 )
167*256a93a4Safresh1            = my_runperl( $print_keys, $test_mode_name, $seed );
168*256a93a4Safresh1
169*256a93a4Safresh1        # And once more, these two should do the same thing for
170*256a93a4Safresh1        # DETERMINISTIC and NO, and be different for RANDOM.
171*256a93a4Safresh1        # We set the mode via the digit not the name here.
172*256a93a4Safresh1        my ( $seed2, $digit2, $mode2, $out2, $err_got_data2, $rand_bits2 )
173*256a93a4Safresh1            = my_runperl( $print_keys, $test_mode_digit, $seed );
174*256a93a4Safresh1
175*256a93a4Safresh1        if (!$validated_mode++) {
176*256a93a4Safresh1            is($digit, $test_mode_digit,
177*256a93a4Safresh1                "$descr base run set the mode digit as expected");
178*256a93a4Safresh1
179*256a93a4Safresh1            is($mode, $test_mode_name,
180*256a93a4Safresh1                "$descr base run set the mode name as expected");
181*256a93a4Safresh1
182*256a93a4Safresh1            is( $seed1, $seed,
183*256a93a4Safresh1                "$descr retry 1 set the seed as expected");
184*256a93a4Safresh1
185*256a93a4Safresh1            is( $mode1, $test_mode_name,
186*256a93a4Safresh1                "$descr retry 1 set the mode by name as expected");
187*256a93a4Safresh1
188*256a93a4Safresh1            is( $digit2, $test_mode_digit,
189*256a93a4Safresh1                "$descr retry 2 set the mode by digit as expected");
190*256a93a4Safresh1
191*256a93a4Safresh1            is( $seed1, $seed2,
192*256a93a4Safresh1                "$descr seeds match between retries");
193*256a93a4Safresh1
194*256a93a4Safresh1            is( $digit1, $digit2,
195*256a93a4Safresh1                "$descr mode digits match between retries");
196*256a93a4Safresh1
197*256a93a4Safresh1            is( $mode1, $mode2,
198*256a93a4Safresh1                "$descr mode names match between retries");
199*256a93a4Safresh1        }
200*256a93a4Safresh1
201*256a93a4Safresh1        {
202*256a93a4Safresh1            # We also test that a 1 bit change to the seed will
203*256a93a4Safresh1            # actually change the output in all modes. It should
204*256a93a4Safresh1            # most of the time.
205*256a93a4Safresh1            my $munged_seed = $seed;
206*256a93a4Safresh1            substr($munged_seed,-1)=~tr/0-9a-f/1-9a-f0/;
207*256a93a4Safresh1            if ( $munged_seed eq $seed ) {
208*256a93a4Safresh1                die "Failed to munge seed '$seed'";
209*256a93a4Safresh1            }
210*256a93a4Safresh1
211*256a93a4Safresh1            my ( $new_seed, $new_digit, $new_mode, $new_out )
212*256a93a4Safresh1                = my_runperl( \@perl_args, $test_mode_name, $munged_seed );
213*256a93a4Safresh1            if ($new_seed ne $munged_seed) {
214*256a93a4Safresh1                die "panic: seed change didn't seem to propagate";
215*256a93a4Safresh1            }
216*256a93a4Safresh1            if (
217*256a93a4Safresh1                $new_mode  ne $test_mode_name or
218*256a93a4Safresh1                $new_digit ne $test_mode_digit
219*256a93a4Safresh1            ) {
220*256a93a4Safresh1                die "panic: mode setting not as expected";
221*256a93a4Safresh1            }
222*256a93a4Safresh1
223*256a93a4Safresh1            # The result should be different most times, but there
224*256a93a4Safresh1            # is a small chance that we got the same result, so
225*256a93a4Safresh1            # count how many times it happens and then check if it
226*256a93a4Safresh1            # exceeds $allowed_fails later.
227*256a93a4Safresh1            $seed_change_same++ if $out1 eq $new_out;
228*256a93a4Safresh1        }
229*256a93a4Safresh1
230*256a93a4Safresh1        if ( $test_mode_name eq 'RANDOM' ) {
231*256a93a4Safresh1            # The result should be different most times, but there is a
232*256a93a4Safresh1            # small chance that we get the same result, so count how
233*256a93a4Safresh1            # many times it happens and then check if it exceeds
234*256a93a4Safresh1            # $allowed_fails later.
235*256a93a4Safresh1            $random_same++ if $out1 eq $out2;
236*256a93a4Safresh1            next;
237*256a93a4Safresh1        }
238*256a93a4Safresh1
239*256a93a4Safresh1        # From this point on we are testing DETERMINISTIC and NO
240*256a93a4Safresh1        # modes only.
241*256a93a4Safresh1
242*256a93a4Safresh1        is( $out1, $out2,
243*256a93a4Safresh1            "$descr results in the same key order each time"
244*256a93a4Safresh1        );
245*256a93a4Safresh1
246*256a93a4Safresh1        next if $test_mode_name eq "NO";
247*256a93a4Safresh1
248*256a93a4Safresh1        # From this point on we are testing the DETERMINISTIC
249*256a93a4Safresh1        # mode only.
250*256a93a4Safresh1
251*256a93a4Safresh1        SKIP: {
252*256a93a4Safresh1            # skip these tests if we are not running in a DEBUGGING perl.
253*256a93a4Safresh1            skip "$descr not testing rand bits, not a DEBUGGING perl", 3
254*256a93a4Safresh1                if @$rand_bits1 + @$rand_bits2 == 0;
255*256a93a4Safresh1
256*256a93a4Safresh1            is ( 0+@$rand_bits1, 0+@$rand_bits2,
257*256a93a4Safresh1                "$descr same count of rand_bits entries each time");
258*256a93a4Safresh1
259*256a93a4Safresh1            my $max_i = $#$rand_bits1 > $#$rand_bits2
260*256a93a4Safresh1                      ? $#$rand_bits1 : $#$rand_bits2;
261*256a93a4Safresh1
262*256a93a4Safresh1            my $bad_idx;
263*256a93a4Safresh1            for my $i (0 .. $max_i) {
264*256a93a4Safresh1                if (($rand_bits2->[$i] // "") ne
265*256a93a4Safresh1                    ($rand_bits1->[$i] // ""))
266*256a93a4Safresh1                {
267*256a93a4Safresh1                    $bad_idx = $i;
268*256a93a4Safresh1                    last;
269*256a93a4Safresh1                }
270*256a93a4Safresh1            }
271*256a93a4Safresh1            is($bad_idx, undef,
272*256a93a4Safresh1                "$descr bad rand bits data index should be undef");
273*256a93a4Safresh1            if (defined $bad_idx) {
274*256a93a4Safresh1                # we use is() to see the differing data, but this test
275*256a93a4Safresh1                # is expected to fail - the description seems a little
276*256a93a4Safresh1                # odd here, but since it will always fail it makes sense
277*256a93a4Safresh1                # in context.
278*256a93a4Safresh1                is($rand_bits2->[$bad_idx],$rand_bits1->[$bad_idx],
279*256a93a4Safresh1                    "$descr rand bits data is the same at idx $bad_idx");
280*256a93a4Safresh1            } else {
281*256a93a4Safresh1                pass("$descr rand bits data is the same");
282*256a93a4Safresh1            }
283*256a93a4Safresh1        }
284*256a93a4Safresh1    }
285*256a93a4Safresh1    continue {
286*256a93a4Safresh1        # We create a lot of big temp files so clean them up as we go.
287*256a93a4Safresh1        # This is in a continue block so we can do this cleanup after
288*256a93a4Safresh1        # each iteration even if we call next in the middle of the loop.
289*256a93a4Safresh1        unlink_tempfiles();
290*256a93a4Safresh1    }
291*256a93a4Safresh1
292*256a93a4Safresh1    # We just finished $repeat tests, now deal with the probabilistic
293*256a93a4Safresh1    # results and ensure that we are under the $allowed_fails threshold
294*256a93a4Safresh1
295*256a93a4Safresh1    if ($test_mode_name eq "RANDOM") {
296*256a93a4Safresh1        # There is a small chance we got the same result a few times
297*256a93a4Safresh1        # even when everything is working as expected. So allow a
298*256a93a4Safresh1        # small number number of fails determined by $allowed_fails.
299*256a93a4Safresh1        ok( $random_same <= $allowed_fails,
300*256a93a4Safresh1            "$descr_mode same key order no more than $allowed_fails times")
301*256a93a4Safresh1            or diag(
302*256a93a4Safresh1                "Key order was the same $random_same/$repeat times in",
303*256a93a4Safresh1                "RANDOM mode. This test is probabilistic so if the number",
304*256a93a4Safresh1                "is low and you re-run the tests and it does not fail",
305*256a93a4Safresh1                "again then you can ignore this test fail.");
306*256a93a4Safresh1
307*256a93a4Safresh1    }
308*256a93a4Safresh1
309*256a93a4Safresh1    # There is a small chance we got the same result a few times even
310*256a93a4Safresh1    # when everything is working as expected. So allow a small number
311*256a93a4Safresh1    # of fails as determined by $allowed_fails.
312*256a93a4Safresh1    ok( $seed_change_same <= $allowed_fails,
313*256a93a4Safresh1        "$descr_mode same key order with different seed no more " .
314*256a93a4Safresh1        "than $allowed_fails times" )
315*256a93a4Safresh1        or diag(
316*256a93a4Safresh1            "Key order was the same $random_same/$repeat times with",
317*256a93a4Safresh1            "a different seed. This test is probabilistic so if the number",
318*256a93a4Safresh1            "is low and you re-run the tests and it does not fail",
319*256a93a4Safresh1            "again then you can ignore this test fail.");
320*256a93a4Safresh1}
321