t/run/runenv_hashseed.t

*256a93a4Safresh1#!./perl
*256a93a4Safresh1
*256a93a4Safresh1# Test that setting PERL_HASH_SEED and PERL_PERTURB_KEYS in different
*256a93a4Safresh1# combinations works as expected, and that changing the values provided
*256a93a4Safresh1# produces the expected results
*256a93a4Safresh1#
*256a93a4Safresh1# We do this by first executing Perl with a given PERL_PERTURB_KEYS
*256a93a4Safresh1# mode, and then extract the randomly chosen PERL_HASH_SEED it ran under
*256a93a4Safresh1# from its debug output which was printed to STDERR, and then use it for
*256a93a4Safresh1# further tests. This allows the tests to be robust to the choice of hash
*256a93a4Safresh1# function and seed sizes that might be in use in the perl being tested.
*256a93a4Safresh1# We do not ask perl to output any keys on this run, as our subsequent
*256a93a4Safresh1# runs will use different environment variables (specifically
*256a93a4Safresh1# PERL_HASH_SEED) which will change any key order results we see.
*256a93a4Safresh1#
*256a93a4Safresh1# We then execute perl a further three times and ask perl to build a
*256a93a4Safresh1# hash with a specific number of buckets and a specific set of keys. We
*256a93a4Safresh1# then have perl print the raw keys to STDOUT.
*256a93a4Safresh1#
*256a93a4Safresh1# For two of these three runs we supply the same seed, and both of those
*256a93a4Safresh1# times we supply the same perturb mode, but in different ways, once as
*256a93a4Safresh1# a name and once as a digit. The debug output should be identical in
*256a93a4Safresh1# both cases regardless of mode. For PERL_PERTURB_KEYS mode 0=NO, and
*256a93a4Safresh1# 2=DETERMINISTIC the key order should match. For mode 1=RANDOM the key
*256a93a4Safresh1# order should differ the vast majority of the time, however the test is
*256a93a4Safresh1# probabilistic and occasionally may result in the same key order.
*256a93a4Safresh1#
*256a93a4Safresh1# The third run we supply a different seed, with a 1 bit difference, but
*256a93a4Safresh1# with the same PERL_PERTURB_KEYS mode. In this case we expect the key
*256a93a4Safresh1# order to differ for all three modes, but again the test is
*256a93a4Safresh1# probabilistic and we may get the same key order in a small percentage
*256a93a4Safresh1# of the times we try this.
*256a93a4Safresh1#
*256a93a4Safresh1# To address the probabilistic nature of these tests we run them
*256a93a4Safresh1# multiple times and count how many times we get the same key order.
*256a93a4Safresh1# Most times this should be zero, but occasionally it might be higher.
*256a93a4Safresh1# Therefore we use a threshold $allowed_fails to determine how many
*256a93a4Safresh1# times the key order may be unchanged before we consider the tests
*256a93a4Safresh1# actually failed. We also use a largish number of keys in a hash with
*256a93a4Safresh1# a large number of buckets, which means we produce a lot a large temp
*256a93a4Safresh1# files as we test, so we aggressively clean them up as we go.
*256a93a4Safresh1
*256a93a4Safresh1
*256a93a4Safresh1BEGIN {
*256a93a4Safresh1    chdir 't' if -d 't';
*256a93a4Safresh1    @INC = '../lib';
*256a93a4Safresh1    require './test.pl';
*256a93a4Safresh1    require Config;
*256a93a4Safresh1    Config->import;
*256a93a4Safresh1}
*256a93a4Safresh1
*256a93a4Safresh1skip_all_without_config('d_fork');
*256a93a4Safresh1skip_all("NO_PERL_HASH_ENV or NO_PERL_HASH_SEED_DEBUG set")
*256a93a4Safresh1    if $Config{ccflags} =~ /-DNO_PERL_HASH_ENV\b/
*256a93a4Safresh1    || $Config{ccflags} =~ /-DNO_PERL_HASH_SEED_DEBUG\b/;
*256a93a4Safresh1use strict;
*256a93a4Safresh1use warnings;
*256a93a4Safresh1
*256a93a4Safresh1# enable DEBUG_RUNENV if you want to see what is being returned
*256a93a4Safresh1# by the executed perl.
*256a93a4Safresh1sub my_runperl {
*256a93a4Safresh1    my ($cmd_array, $perturb, $set_seed) = @_;
*256a93a4Safresh1    my $opts_hash= {
*256a93a4Safresh1        PERL_HASH_SEED_DEBUG => 1,
*256a93a4Safresh1        PERL_PERTURB_KEYS => $perturb
*256a93a4Safresh1    };
*256a93a4Safresh1    $opts_hash->{PERL_HASH_SEED}= $set_seed if $set_seed;
*256a93a4Safresh1
*256a93a4Safresh1    my ( $out, $err )
*256a93a4Safresh1        = runperl_and_capture( $opts_hash, $cmd_array );
*256a93a4Safresh1    my @err= split /\n/, $err;
*256a93a4Safresh1
*256a93a4Safresh1    my $seed;
*256a93a4Safresh1    my $mode_name;
*256a93a4Safresh1    my $mode_digit;
*256a93a4Safresh1    my @err_got_data;
*256a93a4Safresh1    my @rand_bits;
*256a93a4Safresh1    foreach my $line (@err) {
*256a93a4Safresh1        if ($line=~/^Got.*/) {
*256a93a4Safresh1            push @err_got_data, $line;
*256a93a4Safresh1        }
*256a93a4Safresh1        elsif ($line=~/^PL_hash_rand_bits=.*/) {
*256a93a4Safresh1            push @rand_bits, $line;
*256a93a4Safresh1        }
*256a93a4Safresh1        elsif ($line=~/HASH_SEED = (0x[a-f0-9]+)/) {
*256a93a4Safresh1            $seed= $1;
*256a93a4Safresh1            $line =~ /PERTURB_KEYS = (\d) \((\w+)\)/
*256a93a4Safresh1                or die "Failed to extract perturb mode: $err";
*256a93a4Safresh1            $mode_digit = $1;
*256a93a4Safresh1            $mode_name = $2;
*256a93a4Safresh1
*256a93a4Safresh1        }
*256a93a4Safresh1    }
*256a93a4Safresh1    if (!$seed){
*256a93a4Safresh1        die "Failed to extract seed: $err";
*256a93a4Safresh1    }
*256a93a4Safresh1    my $err_got_data= join("\n",@err_got_data);
*256a93a4Safresh1    return ($seed, $mode_digit, $mode_name, $out, $err_got_data, \@rand_bits);
*256a93a4Safresh1}
*256a93a4Safresh1
*256a93a4Safresh1my @mode_names = (
*256a93a4Safresh1    'NO',            # 0
*256a93a4Safresh1    'RANDOM',        # 1
*256a93a4Safresh1    'DETERMINISTIC', # 2
*256a93a4Safresh1);
*256a93a4Safresh1
*256a93a4Safresh1my $repeat = 50; # if this changes adjust the comments below.
*256a93a4Safresh1my $min_buckets = 100_000;
*256a93a4Safresh1my $actual_buckets = 1;
*256a93a4Safresh1$actual_buckets *= 2 while $actual_buckets <= $min_buckets;
*256a93a4Safresh1my $key_expr = '0..999, "aa".."zz", map { $_ x 30 } "a".."z"'; #1702 keys
*256a93a4Safresh1my @keys = eval $key_expr
*256a93a4Safresh1    or die "bad '$key_expr': $@";
*256a93a4Safresh1my $allowed_fails = 2; # Adjust this up to make the test tolerate
*256a93a4Safresh1                       # more "errors". Maybe one day we will compute
*256a93a4Safresh1                       # it from the value of $repeat, and $actual_buckets
*256a93a4Safresh1                       # and the number of @keys.
*256a93a4Safresh1
*256a93a4Safresh1plan tests => (4 * $repeat)     # DETERMINISTIC
*256a93a4Safresh1            + (1 * $repeat)     # NO
*256a93a4Safresh1            + 1                 # RANDOM mode
*256a93a4Safresh1            + (8 * @mode_names) # validation per mode
*256a93a4Safresh1            + @mode_names;      # all modes
*256a93a4Safresh1
*256a93a4Safresh1
*256a93a4Safresh1# Note the keys(%h) = $n will cause perl to allocate the power of 2 larger
*256a93a4Safresh1# than $n buckets, so if $n = 100_000, then $actual_buckets will be 131072.
*256a93a4Safresh1
*256a93a4Safresh1my @perl_args = (
*256a93a4Safresh1    '-I../lib',
*256a93a4Safresh1    (is_miniperl() ? () # no Hash::Util here!
*256a93a4Safresh1                   : '-MHash::Util=hash_traversal_mask,num_buckets'),
*256a93a4Safresh1    '-e',
*256a93a4Safresh1    'my %h; keys(%h)=' . $min_buckets . '; ' .
*256a93a4Safresh1    '@h{' . $key_expr . '}=(); @k=keys %h; ' .
*256a93a4Safresh1      'print join ":", 0+@k, ' .
*256a93a4Safresh1      (is_miniperl() ? '' :  # no Hash::Util here!
*256a93a4Safresh1          'num_buckets(%h),hash_traversal_mask(\\%h), ') .
*256a93a4Safresh1      'join ",", @k;'
*256a93a4Safresh1  );
*256a93a4Safresh1
*256a93a4Safresh1for my $test_mode_digit (0 .. $#mode_names) {
*256a93a4Safresh1    my $test_mode_name = $mode_names[$test_mode_digit];
*256a93a4Safresh1    my $descr_mode = "mode = $test_mode_name";
*256a93a4Safresh1
*256a93a4Safresh1    my $print_keys= [ ($test_mode_name eq "DETERMINISTIC")
*256a93a4Safresh1                      ? "-Dh" : (), # enable hash diags
*256a93a4Safresh1                      @perl_args ];
*256a93a4Safresh1
*256a93a4Safresh1    my $validated_mode= 0;
*256a93a4Safresh1    my $random_same = 0;
*256a93a4Safresh1    my $seed_change_same = 0;
*256a93a4Safresh1    for my $try (1 .. $repeat) {
*256a93a4Safresh1
*256a93a4Safresh1        my $descr = sprintf "%s, try %2d:", $descr_mode, $try;
*256a93a4Safresh1
*256a93a4Safresh1        # First let perl choose the seed. We only use the $seed and $err
*256a93a4Safresh1        # output here. We extract the seed that perl chose, which
*256a93a4Safresh1        # hardens us against the use of different hash functions with
*256a93a4Safresh1        # different seed sizes. Also the act of adding the PERL_HASH_SEED
*256a93a4Safresh1        # to the environment later on will likely change the $out.
*256a93a4Safresh1        my ( $seed, $digit, $mode )
*256a93a4Safresh1            = my_runperl( ['-e1'], $test_mode_name );
*256a93a4Safresh1
*256a93a4Safresh1        # Now we have to run it again.
*256a93a4Safresh1        my ( $seed1, $digit1, $mode1, $out1, $err_got_data1, $rand_bits1 )
*256a93a4Safresh1            = my_runperl( $print_keys, $test_mode_name, $seed );
*256a93a4Safresh1
*256a93a4Safresh1        # And once more, these two should do the same thing for
*256a93a4Safresh1        # DETERMINISTIC and NO, and be different for RANDOM.
*256a93a4Safresh1        # We set the mode via the digit not the name here.
*256a93a4Safresh1        my ( $seed2, $digit2, $mode2, $out2, $err_got_data2, $rand_bits2 )
*256a93a4Safresh1            = my_runperl( $print_keys, $test_mode_digit, $seed );
*256a93a4Safresh1
*256a93a4Safresh1        if (!$validated_mode++) {
*256a93a4Safresh1            is($digit, $test_mode_digit,
*256a93a4Safresh1                "$descr base run set the mode digit as expected");
*256a93a4Safresh1
*256a93a4Safresh1            is($mode, $test_mode_name,
*256a93a4Safresh1                "$descr base run set the mode name as expected");
*256a93a4Safresh1
*256a93a4Safresh1            is( $seed1, $seed,
*256a93a4Safresh1                "$descr retry 1 set the seed as expected");
*256a93a4Safresh1
*256a93a4Safresh1            is( $mode1, $test_mode_name,
*256a93a4Safresh1                "$descr retry 1 set the mode by name as expected");
*256a93a4Safresh1
*256a93a4Safresh1            is( $digit2, $test_mode_digit,
*256a93a4Safresh1                "$descr retry 2 set the mode by digit as expected");
*256a93a4Safresh1
*256a93a4Safresh1            is( $seed1, $seed2,
*256a93a4Safresh1                "$descr seeds match between retries");
*256a93a4Safresh1
*256a93a4Safresh1            is( $digit1, $digit2,
*256a93a4Safresh1                "$descr mode digits match between retries");
*256a93a4Safresh1
*256a93a4Safresh1            is( $mode1, $mode2,
*256a93a4Safresh1                "$descr mode names match between retries");
*256a93a4Safresh1        }
*256a93a4Safresh1
*256a93a4Safresh1        {
*256a93a4Safresh1            # We also test that a 1 bit change to the seed will
*256a93a4Safresh1            # actually change the output in all modes. It should
*256a93a4Safresh1            # most of the time.
*256a93a4Safresh1            my $munged_seed = $seed;
*256a93a4Safresh1            substr($munged_seed,-1)=~tr/0-9a-f/1-9a-f0/;
*256a93a4Safresh1            if ( $munged_seed eq $seed ) {
*256a93a4Safresh1                die "Failed to munge seed '$seed'";
*256a93a4Safresh1            }
*256a93a4Safresh1
*256a93a4Safresh1            my ( $new_seed, $new_digit, $new_mode, $new_out )
*256a93a4Safresh1                = my_runperl( \@perl_args, $test_mode_name, $munged_seed );
*256a93a4Safresh1            if ($new_seed ne $munged_seed) {
*256a93a4Safresh1                die "panic: seed change didn't seem to propagate";
*256a93a4Safresh1            }
*256a93a4Safresh1            if (
*256a93a4Safresh1                $new_mode  ne $test_mode_name or
*256a93a4Safresh1                $new_digit ne $test_mode_digit
*256a93a4Safresh1            ) {
*256a93a4Safresh1                die "panic: mode setting not as expected";
*256a93a4Safresh1            }
*256a93a4Safresh1
*256a93a4Safresh1            # The result should be different most times, but there
*256a93a4Safresh1            # is a small chance that we got the same result, so
*256a93a4Safresh1            # count how many times it happens and then check if it
*256a93a4Safresh1            # exceeds $allowed_fails later.
*256a93a4Safresh1            $seed_change_same++ if $out1 eq $new_out;
*256a93a4Safresh1        }
*256a93a4Safresh1
*256a93a4Safresh1        if ( $test_mode_name eq 'RANDOM' ) {
*256a93a4Safresh1            # The result should be different most times, but there is a
*256a93a4Safresh1            # small chance that we get the same result, so count how
*256a93a4Safresh1            # many times it happens and then check if it exceeds
*256a93a4Safresh1            # $allowed_fails later.
*256a93a4Safresh1            $random_same++ if $out1 eq $out2;
*256a93a4Safresh1            next;
*256a93a4Safresh1        }
*256a93a4Safresh1
*256a93a4Safresh1        # From this point on we are testing DETERMINISTIC and NO
*256a93a4Safresh1        # modes only.
*256a93a4Safresh1
*256a93a4Safresh1        is( $out1, $out2,
*256a93a4Safresh1            "$descr results in the same key order each time"
*256a93a4Safresh1        );
*256a93a4Safresh1
*256a93a4Safresh1        next if $test_mode_name eq "NO";
*256a93a4Safresh1
*256a93a4Safresh1        # From this point on we are testing the DETERMINISTIC
*256a93a4Safresh1        # mode only.
*256a93a4Safresh1
*256a93a4Safresh1        SKIP: {
*256a93a4Safresh1            # skip these tests if we are not running in a DEBUGGING perl.
*256a93a4Safresh1            skip "$descr not testing rand bits, not a DEBUGGING perl", 3
*256a93a4Safresh1                if @$rand_bits1 + @$rand_bits2 == 0;
*256a93a4Safresh1
*256a93a4Safresh1            is ( 0+@$rand_bits1, 0+@$rand_bits2,
*256a93a4Safresh1                "$descr same count of rand_bits entries each time");
*256a93a4Safresh1
*256a93a4Safresh1            my $max_i = $#$rand_bits1 > $#$rand_bits2
*256a93a4Safresh1                      ? $#$rand_bits1 : $#$rand_bits2;
*256a93a4Safresh1
*256a93a4Safresh1            my $bad_idx;
*256a93a4Safresh1            for my $i (0 .. $max_i) {
*256a93a4Safresh1                if (($rand_bits2->[$i] // "") ne
*256a93a4Safresh1                    ($rand_bits1->[$i] // ""))
*256a93a4Safresh1                {
*256a93a4Safresh1                    $bad_idx = $i;
*256a93a4Safresh1                    last;
*256a93a4Safresh1                }
*256a93a4Safresh1            }
*256a93a4Safresh1            is($bad_idx, undef,
*256a93a4Safresh1                "$descr bad rand bits data index should be undef");
*256a93a4Safresh1            if (defined $bad_idx) {
*256a93a4Safresh1                # we use is() to see the differing data, but this test
*256a93a4Safresh1                # is expected to fail - the description seems a little
*256a93a4Safresh1                # odd here, but since it will always fail it makes sense
*256a93a4Safresh1                # in context.
*256a93a4Safresh1                is($rand_bits2->[$bad_idx],$rand_bits1->[$bad_idx],
*256a93a4Safresh1                    "$descr rand bits data is the same at idx $bad_idx");
*256a93a4Safresh1            } else {
*256a93a4Safresh1                pass("$descr rand bits data is the same");
*256a93a4Safresh1            }
*256a93a4Safresh1        }
*256a93a4Safresh1    }
*256a93a4Safresh1    continue {
*256a93a4Safresh1        # We create a lot of big temp files so clean them up as we go.
*256a93a4Safresh1        # This is in a continue block so we can do this cleanup after
*256a93a4Safresh1        # each iteration even if we call next in the middle of the loop.
*256a93a4Safresh1        unlink_tempfiles();
*256a93a4Safresh1    }
*256a93a4Safresh1
*256a93a4Safresh1    # We just finished $repeat tests, now deal with the probabilistic
*256a93a4Safresh1    # results and ensure that we are under the $allowed_fails threshold
*256a93a4Safresh1
*256a93a4Safresh1    if ($test_mode_name eq "RANDOM") {
*256a93a4Safresh1        # There is a small chance we got the same result a few times
*256a93a4Safresh1        # even when everything is working as expected. So allow a
*256a93a4Safresh1        # small number number of fails determined by $allowed_fails.
*256a93a4Safresh1        ok( $random_same <= $allowed_fails,
*256a93a4Safresh1            "$descr_mode same key order no more than $allowed_fails times")
*256a93a4Safresh1            or diag(
*256a93a4Safresh1                "Key order was the same $random_same/$repeat times in",
*256a93a4Safresh1                "RANDOM mode. This test is probabilistic so if the number",
*256a93a4Safresh1                "is low and you re-run the tests and it does not fail",
*256a93a4Safresh1                "again then you can ignore this test fail.");
*256a93a4Safresh1
*256a93a4Safresh1    }
*256a93a4Safresh1
*256a93a4Safresh1    # There is a small chance we got the same result a few times even
*256a93a4Safresh1    # when everything is working as expected. So allow a small number
*256a93a4Safresh1    # of fails as determined by $allowed_fails.
*256a93a4Safresh1    ok( $seed_change_same <= $allowed_fails,
*256a93a4Safresh1        "$descr_mode same key order with different seed no more " .
*256a93a4Safresh1        "than $allowed_fails times" )
*256a93a4Safresh1        or diag(
*256a93a4Safresh1            "Key order was the same $random_same/$repeat times with",
*256a93a4Safresh1            "a different seed. This test is probabilistic so if the number",
*256a93a4Safresh1            "is low and you re-run the tests and it does not fail",
*256a93a4Safresh1            "again then you can ignore this test fail.");
*256a93a4Safresh1}