1*256a93a4Safresh1#!./perl 2*256a93a4Safresh1 3*256a93a4Safresh1# Test that setting PERL_HASH_SEED and PERL_PERTURB_KEYS in different 4*256a93a4Safresh1# combinations works as expected, and that changing the values provided 5*256a93a4Safresh1# produces the expected results 6*256a93a4Safresh1# 7*256a93a4Safresh1# We do this by first executing Perl with a given PERL_PERTURB_KEYS 8*256a93a4Safresh1# mode, and then extract the randomly chosen PERL_HASH_SEED it ran under 9*256a93a4Safresh1# from its debug output which was printed to STDERR, and then use it for 10*256a93a4Safresh1# further tests. This allows the tests to be robust to the choice of hash 11*256a93a4Safresh1# function and seed sizes that might be in use in the perl being tested. 12*256a93a4Safresh1# We do not ask perl to output any keys on this run, as our subsequent 13*256a93a4Safresh1# runs will use different environment variables (specifically 14*256a93a4Safresh1# PERL_HASH_SEED) which will change any key order results we see. 15*256a93a4Safresh1# 16*256a93a4Safresh1# We then execute perl a further three times and ask perl to build a 17*256a93a4Safresh1# hash with a specific number of buckets and a specific set of keys. We 18*256a93a4Safresh1# then have perl print the raw keys to STDOUT. 19*256a93a4Safresh1# 20*256a93a4Safresh1# For two of these three runs we supply the same seed, and both of those 21*256a93a4Safresh1# times we supply the same perturb mode, but in different ways, once as 22*256a93a4Safresh1# a name and once as a digit. The debug output should be identical in 23*256a93a4Safresh1# both cases regardless of mode. For PERL_PERTURB_KEYS mode 0=NO, and 24*256a93a4Safresh1# 2=DETERMINISTIC the key order should match. For mode 1=RANDOM the key 25*256a93a4Safresh1# order should differ the vast majority of the time, however the test is 26*256a93a4Safresh1# probabilistic and occasionally may result in the same key order. 27*256a93a4Safresh1# 28*256a93a4Safresh1# The third run we supply a different seed, with a 1 bit difference, but 29*256a93a4Safresh1# with the same PERL_PERTURB_KEYS mode. In this case we expect the key 30*256a93a4Safresh1# order to differ for all three modes, but again the test is 31*256a93a4Safresh1# probabilistic and we may get the same key order in a small percentage 32*256a93a4Safresh1# of the times we try this. 33*256a93a4Safresh1# 34*256a93a4Safresh1# To address the probabilistic nature of these tests we run them 35*256a93a4Safresh1# multiple times and count how many times we get the same key order. 36*256a93a4Safresh1# Most times this should be zero, but occasionally it might be higher. 37*256a93a4Safresh1# Therefore we use a threshold $allowed_fails to determine how many 38*256a93a4Safresh1# times the key order may be unchanged before we consider the tests 39*256a93a4Safresh1# actually failed. We also use a largish number of keys in a hash with 40*256a93a4Safresh1# a large number of buckets, which means we produce a lot a large temp 41*256a93a4Safresh1# files as we test, so we aggressively clean them up as we go. 42*256a93a4Safresh1 43*256a93a4Safresh1 44*256a93a4Safresh1BEGIN { 45*256a93a4Safresh1 chdir 't' if -d 't'; 46*256a93a4Safresh1 @INC = '../lib'; 47*256a93a4Safresh1 require './test.pl'; 48*256a93a4Safresh1 require Config; 49*256a93a4Safresh1 Config->import; 50*256a93a4Safresh1} 51*256a93a4Safresh1 52*256a93a4Safresh1skip_all_without_config('d_fork'); 53*256a93a4Safresh1skip_all("NO_PERL_HASH_ENV or NO_PERL_HASH_SEED_DEBUG set") 54*256a93a4Safresh1 if $Config{ccflags} =~ /-DNO_PERL_HASH_ENV\b/ 55*256a93a4Safresh1 || $Config{ccflags} =~ /-DNO_PERL_HASH_SEED_DEBUG\b/; 56*256a93a4Safresh1use strict; 57*256a93a4Safresh1use warnings; 58*256a93a4Safresh1 59*256a93a4Safresh1# enable DEBUG_RUNENV if you want to see what is being returned 60*256a93a4Safresh1# by the executed perl. 61*256a93a4Safresh1sub my_runperl { 62*256a93a4Safresh1 my ($cmd_array, $perturb, $set_seed) = @_; 63*256a93a4Safresh1 my $opts_hash= { 64*256a93a4Safresh1 PERL_HASH_SEED_DEBUG => 1, 65*256a93a4Safresh1 PERL_PERTURB_KEYS => $perturb 66*256a93a4Safresh1 }; 67*256a93a4Safresh1 $opts_hash->{PERL_HASH_SEED}= $set_seed if $set_seed; 68*256a93a4Safresh1 69*256a93a4Safresh1 my ( $out, $err ) 70*256a93a4Safresh1 = runperl_and_capture( $opts_hash, $cmd_array ); 71*256a93a4Safresh1 my @err= split /\n/, $err; 72*256a93a4Safresh1 73*256a93a4Safresh1 my $seed; 74*256a93a4Safresh1 my $mode_name; 75*256a93a4Safresh1 my $mode_digit; 76*256a93a4Safresh1 my @err_got_data; 77*256a93a4Safresh1 my @rand_bits; 78*256a93a4Safresh1 foreach my $line (@err) { 79*256a93a4Safresh1 if ($line=~/^Got.*/) { 80*256a93a4Safresh1 push @err_got_data, $line; 81*256a93a4Safresh1 } 82*256a93a4Safresh1 elsif ($line=~/^PL_hash_rand_bits=.*/) { 83*256a93a4Safresh1 push @rand_bits, $line; 84*256a93a4Safresh1 } 85*256a93a4Safresh1 elsif ($line=~/HASH_SEED = (0x[a-f0-9]+)/) { 86*256a93a4Safresh1 $seed= $1; 87*256a93a4Safresh1 $line =~ /PERTURB_KEYS = (\d) \((\w+)\)/ 88*256a93a4Safresh1 or die "Failed to extract perturb mode: $err"; 89*256a93a4Safresh1 $mode_digit = $1; 90*256a93a4Safresh1 $mode_name = $2; 91*256a93a4Safresh1 92*256a93a4Safresh1 } 93*256a93a4Safresh1 } 94*256a93a4Safresh1 if (!$seed){ 95*256a93a4Safresh1 die "Failed to extract seed: $err"; 96*256a93a4Safresh1 } 97*256a93a4Safresh1 my $err_got_data= join("\n",@err_got_data); 98*256a93a4Safresh1 return ($seed, $mode_digit, $mode_name, $out, $err_got_data, \@rand_bits); 99*256a93a4Safresh1} 100*256a93a4Safresh1 101*256a93a4Safresh1my @mode_names = ( 102*256a93a4Safresh1 'NO', # 0 103*256a93a4Safresh1 'RANDOM', # 1 104*256a93a4Safresh1 'DETERMINISTIC', # 2 105*256a93a4Safresh1); 106*256a93a4Safresh1 107*256a93a4Safresh1my $repeat = 50; # if this changes adjust the comments below. 108*256a93a4Safresh1my $min_buckets = 100_000; 109*256a93a4Safresh1my $actual_buckets = 1; 110*256a93a4Safresh1$actual_buckets *= 2 while $actual_buckets <= $min_buckets; 111*256a93a4Safresh1my $key_expr = '0..999, "aa".."zz", map { $_ x 30 } "a".."z"'; #1702 keys 112*256a93a4Safresh1my @keys = eval $key_expr 113*256a93a4Safresh1 or die "bad '$key_expr': $@"; 114*256a93a4Safresh1my $allowed_fails = 2; # Adjust this up to make the test tolerate 115*256a93a4Safresh1 # more "errors". Maybe one day we will compute 116*256a93a4Safresh1 # it from the value of $repeat, and $actual_buckets 117*256a93a4Safresh1 # and the number of @keys. 118*256a93a4Safresh1 119*256a93a4Safresh1plan tests => (4 * $repeat) # DETERMINISTIC 120*256a93a4Safresh1 + (1 * $repeat) # NO 121*256a93a4Safresh1 + 1 # RANDOM mode 122*256a93a4Safresh1 + (8 * @mode_names) # validation per mode 123*256a93a4Safresh1 + @mode_names; # all modes 124*256a93a4Safresh1 125*256a93a4Safresh1 126*256a93a4Safresh1# Note the keys(%h) = $n will cause perl to allocate the power of 2 larger 127*256a93a4Safresh1# than $n buckets, so if $n = 100_000, then $actual_buckets will be 131072. 128*256a93a4Safresh1 129*256a93a4Safresh1my @perl_args = ( 130*256a93a4Safresh1 '-I../lib', 131*256a93a4Safresh1 (is_miniperl() ? () # no Hash::Util here! 132*256a93a4Safresh1 : '-MHash::Util=hash_traversal_mask,num_buckets'), 133*256a93a4Safresh1 '-e', 134*256a93a4Safresh1 'my %h; keys(%h)=' . $min_buckets . '; ' . 135*256a93a4Safresh1 '@h{' . $key_expr . '}=(); @k=keys %h; ' . 136*256a93a4Safresh1 'print join ":", 0+@k, ' . 137*256a93a4Safresh1 (is_miniperl() ? '' : # no Hash::Util here! 138*256a93a4Safresh1 'num_buckets(%h),hash_traversal_mask(\\%h), ') . 139*256a93a4Safresh1 'join ",", @k;' 140*256a93a4Safresh1 ); 141*256a93a4Safresh1 142*256a93a4Safresh1for my $test_mode_digit (0 .. $#mode_names) { 143*256a93a4Safresh1 my $test_mode_name = $mode_names[$test_mode_digit]; 144*256a93a4Safresh1 my $descr_mode = "mode = $test_mode_name"; 145*256a93a4Safresh1 146*256a93a4Safresh1 my $print_keys= [ ($test_mode_name eq "DETERMINISTIC") 147*256a93a4Safresh1 ? "-Dh" : (), # enable hash diags 148*256a93a4Safresh1 @perl_args ]; 149*256a93a4Safresh1 150*256a93a4Safresh1 my $validated_mode= 0; 151*256a93a4Safresh1 my $random_same = 0; 152*256a93a4Safresh1 my $seed_change_same = 0; 153*256a93a4Safresh1 for my $try (1 .. $repeat) { 154*256a93a4Safresh1 155*256a93a4Safresh1 my $descr = sprintf "%s, try %2d:", $descr_mode, $try; 156*256a93a4Safresh1 157*256a93a4Safresh1 # First let perl choose the seed. We only use the $seed and $err 158*256a93a4Safresh1 # output here. We extract the seed that perl chose, which 159*256a93a4Safresh1 # hardens us against the use of different hash functions with 160*256a93a4Safresh1 # different seed sizes. Also the act of adding the PERL_HASH_SEED 161*256a93a4Safresh1 # to the environment later on will likely change the $out. 162*256a93a4Safresh1 my ( $seed, $digit, $mode ) 163*256a93a4Safresh1 = my_runperl( ['-e1'], $test_mode_name ); 164*256a93a4Safresh1 165*256a93a4Safresh1 # Now we have to run it again. 166*256a93a4Safresh1 my ( $seed1, $digit1, $mode1, $out1, $err_got_data1, $rand_bits1 ) 167*256a93a4Safresh1 = my_runperl( $print_keys, $test_mode_name, $seed ); 168*256a93a4Safresh1 169*256a93a4Safresh1 # And once more, these two should do the same thing for 170*256a93a4Safresh1 # DETERMINISTIC and NO, and be different for RANDOM. 171*256a93a4Safresh1 # We set the mode via the digit not the name here. 172*256a93a4Safresh1 my ( $seed2, $digit2, $mode2, $out2, $err_got_data2, $rand_bits2 ) 173*256a93a4Safresh1 = my_runperl( $print_keys, $test_mode_digit, $seed ); 174*256a93a4Safresh1 175*256a93a4Safresh1 if (!$validated_mode++) { 176*256a93a4Safresh1 is($digit, $test_mode_digit, 177*256a93a4Safresh1 "$descr base run set the mode digit as expected"); 178*256a93a4Safresh1 179*256a93a4Safresh1 is($mode, $test_mode_name, 180*256a93a4Safresh1 "$descr base run set the mode name as expected"); 181*256a93a4Safresh1 182*256a93a4Safresh1 is( $seed1, $seed, 183*256a93a4Safresh1 "$descr retry 1 set the seed as expected"); 184*256a93a4Safresh1 185*256a93a4Safresh1 is( $mode1, $test_mode_name, 186*256a93a4Safresh1 "$descr retry 1 set the mode by name as expected"); 187*256a93a4Safresh1 188*256a93a4Safresh1 is( $digit2, $test_mode_digit, 189*256a93a4Safresh1 "$descr retry 2 set the mode by digit as expected"); 190*256a93a4Safresh1 191*256a93a4Safresh1 is( $seed1, $seed2, 192*256a93a4Safresh1 "$descr seeds match between retries"); 193*256a93a4Safresh1 194*256a93a4Safresh1 is( $digit1, $digit2, 195*256a93a4Safresh1 "$descr mode digits match between retries"); 196*256a93a4Safresh1 197*256a93a4Safresh1 is( $mode1, $mode2, 198*256a93a4Safresh1 "$descr mode names match between retries"); 199*256a93a4Safresh1 } 200*256a93a4Safresh1 201*256a93a4Safresh1 { 202*256a93a4Safresh1 # We also test that a 1 bit change to the seed will 203*256a93a4Safresh1 # actually change the output in all modes. It should 204*256a93a4Safresh1 # most of the time. 205*256a93a4Safresh1 my $munged_seed = $seed; 206*256a93a4Safresh1 substr($munged_seed,-1)=~tr/0-9a-f/1-9a-f0/; 207*256a93a4Safresh1 if ( $munged_seed eq $seed ) { 208*256a93a4Safresh1 die "Failed to munge seed '$seed'"; 209*256a93a4Safresh1 } 210*256a93a4Safresh1 211*256a93a4Safresh1 my ( $new_seed, $new_digit, $new_mode, $new_out ) 212*256a93a4Safresh1 = my_runperl( \@perl_args, $test_mode_name, $munged_seed ); 213*256a93a4Safresh1 if ($new_seed ne $munged_seed) { 214*256a93a4Safresh1 die "panic: seed change didn't seem to propagate"; 215*256a93a4Safresh1 } 216*256a93a4Safresh1 if ( 217*256a93a4Safresh1 $new_mode ne $test_mode_name or 218*256a93a4Safresh1 $new_digit ne $test_mode_digit 219*256a93a4Safresh1 ) { 220*256a93a4Safresh1 die "panic: mode setting not as expected"; 221*256a93a4Safresh1 } 222*256a93a4Safresh1 223*256a93a4Safresh1 # The result should be different most times, but there 224*256a93a4Safresh1 # is a small chance that we got the same result, so 225*256a93a4Safresh1 # count how many times it happens and then check if it 226*256a93a4Safresh1 # exceeds $allowed_fails later. 227*256a93a4Safresh1 $seed_change_same++ if $out1 eq $new_out; 228*256a93a4Safresh1 } 229*256a93a4Safresh1 230*256a93a4Safresh1 if ( $test_mode_name eq 'RANDOM' ) { 231*256a93a4Safresh1 # The result should be different most times, but there is a 232*256a93a4Safresh1 # small chance that we get the same result, so count how 233*256a93a4Safresh1 # many times it happens and then check if it exceeds 234*256a93a4Safresh1 # $allowed_fails later. 235*256a93a4Safresh1 $random_same++ if $out1 eq $out2; 236*256a93a4Safresh1 next; 237*256a93a4Safresh1 } 238*256a93a4Safresh1 239*256a93a4Safresh1 # From this point on we are testing DETERMINISTIC and NO 240*256a93a4Safresh1 # modes only. 241*256a93a4Safresh1 242*256a93a4Safresh1 is( $out1, $out2, 243*256a93a4Safresh1 "$descr results in the same key order each time" 244*256a93a4Safresh1 ); 245*256a93a4Safresh1 246*256a93a4Safresh1 next if $test_mode_name eq "NO"; 247*256a93a4Safresh1 248*256a93a4Safresh1 # From this point on we are testing the DETERMINISTIC 249*256a93a4Safresh1 # mode only. 250*256a93a4Safresh1 251*256a93a4Safresh1 SKIP: { 252*256a93a4Safresh1 # skip these tests if we are not running in a DEBUGGING perl. 253*256a93a4Safresh1 skip "$descr not testing rand bits, not a DEBUGGING perl", 3 254*256a93a4Safresh1 if @$rand_bits1 + @$rand_bits2 == 0; 255*256a93a4Safresh1 256*256a93a4Safresh1 is ( 0+@$rand_bits1, 0+@$rand_bits2, 257*256a93a4Safresh1 "$descr same count of rand_bits entries each time"); 258*256a93a4Safresh1 259*256a93a4Safresh1 my $max_i = $#$rand_bits1 > $#$rand_bits2 260*256a93a4Safresh1 ? $#$rand_bits1 : $#$rand_bits2; 261*256a93a4Safresh1 262*256a93a4Safresh1 my $bad_idx; 263*256a93a4Safresh1 for my $i (0 .. $max_i) { 264*256a93a4Safresh1 if (($rand_bits2->[$i] // "") ne 265*256a93a4Safresh1 ($rand_bits1->[$i] // "")) 266*256a93a4Safresh1 { 267*256a93a4Safresh1 $bad_idx = $i; 268*256a93a4Safresh1 last; 269*256a93a4Safresh1 } 270*256a93a4Safresh1 } 271*256a93a4Safresh1 is($bad_idx, undef, 272*256a93a4Safresh1 "$descr bad rand bits data index should be undef"); 273*256a93a4Safresh1 if (defined $bad_idx) { 274*256a93a4Safresh1 # we use is() to see the differing data, but this test 275*256a93a4Safresh1 # is expected to fail - the description seems a little 276*256a93a4Safresh1 # odd here, but since it will always fail it makes sense 277*256a93a4Safresh1 # in context. 278*256a93a4Safresh1 is($rand_bits2->[$bad_idx],$rand_bits1->[$bad_idx], 279*256a93a4Safresh1 "$descr rand bits data is the same at idx $bad_idx"); 280*256a93a4Safresh1 } else { 281*256a93a4Safresh1 pass("$descr rand bits data is the same"); 282*256a93a4Safresh1 } 283*256a93a4Safresh1 } 284*256a93a4Safresh1 } 285*256a93a4Safresh1 continue { 286*256a93a4Safresh1 # We create a lot of big temp files so clean them up as we go. 287*256a93a4Safresh1 # This is in a continue block so we can do this cleanup after 288*256a93a4Safresh1 # each iteration even if we call next in the middle of the loop. 289*256a93a4Safresh1 unlink_tempfiles(); 290*256a93a4Safresh1 } 291*256a93a4Safresh1 292*256a93a4Safresh1 # We just finished $repeat tests, now deal with the probabilistic 293*256a93a4Safresh1 # results and ensure that we are under the $allowed_fails threshold 294*256a93a4Safresh1 295*256a93a4Safresh1 if ($test_mode_name eq "RANDOM") { 296*256a93a4Safresh1 # There is a small chance we got the same result a few times 297*256a93a4Safresh1 # even when everything is working as expected. So allow a 298*256a93a4Safresh1 # small number number of fails determined by $allowed_fails. 299*256a93a4Safresh1 ok( $random_same <= $allowed_fails, 300*256a93a4Safresh1 "$descr_mode same key order no more than $allowed_fails times") 301*256a93a4Safresh1 or diag( 302*256a93a4Safresh1 "Key order was the same $random_same/$repeat times in", 303*256a93a4Safresh1 "RANDOM mode. This test is probabilistic so if the number", 304*256a93a4Safresh1 "is low and you re-run the tests and it does not fail", 305*256a93a4Safresh1 "again then you can ignore this test fail."); 306*256a93a4Safresh1 307*256a93a4Safresh1 } 308*256a93a4Safresh1 309*256a93a4Safresh1 # There is a small chance we got the same result a few times even 310*256a93a4Safresh1 # when everything is working as expected. So allow a small number 311*256a93a4Safresh1 # of fails as determined by $allowed_fails. 312*256a93a4Safresh1 ok( $seed_change_same <= $allowed_fails, 313*256a93a4Safresh1 "$descr_mode same key order with different seed no more " . 314*256a93a4Safresh1 "than $allowed_fails times" ) 315*256a93a4Safresh1 or diag( 316*256a93a4Safresh1 "Key order was the same $random_same/$repeat times with", 317*256a93a4Safresh1 "a different seed. This test is probabilistic so if the number", 318*256a93a4Safresh1 "is low and you re-run the tests and it does not fail", 319*256a93a4Safresh1 "again then you can ignore this test fail."); 320*256a93a4Safresh1} 321