xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/t/op/hash.t (revision 0:68f95e015346)
1*0Sstevel@tonic-gate#!./perl -w
2*0Sstevel@tonic-gate
3*0Sstevel@tonic-gateBEGIN {
4*0Sstevel@tonic-gate    chdir 't' if -d 't';
5*0Sstevel@tonic-gate    @INC = '../lib';
6*0Sstevel@tonic-gate    require './test.pl';
7*0Sstevel@tonic-gate}
8*0Sstevel@tonic-gate
9*0Sstevel@tonic-gateuse strict;
10*0Sstevel@tonic-gate
11*0Sstevel@tonic-gateplan tests => 5;
12*0Sstevel@tonic-gate
13*0Sstevel@tonic-gatemy %h;
14*0Sstevel@tonic-gate
15*0Sstevel@tonic-gateok (!Internals::HvREHASH(%h), "hash doesn't start with rehash flag on");
16*0Sstevel@tonic-gate
17*0Sstevel@tonic-gateforeach (1..10) {
18*0Sstevel@tonic-gate  $h{"\0"x$_}++;
19*0Sstevel@tonic-gate}
20*0Sstevel@tonic-gate
21*0Sstevel@tonic-gateok (!Internals::HvREHASH(%h), "10 entries doesn't trigger rehash");
22*0Sstevel@tonic-gate
23*0Sstevel@tonic-gateforeach (11..20) {
24*0Sstevel@tonic-gate  $h{"\0"x$_}++;
25*0Sstevel@tonic-gate}
26*0Sstevel@tonic-gate
27*0Sstevel@tonic-gateok (Internals::HvREHASH(%h), "20 entries triggers rehash");
28*0Sstevel@tonic-gate
29*0Sstevel@tonic-gate
30*0Sstevel@tonic-gate
31*0Sstevel@tonic-gate
32*0Sstevel@tonic-gate# second part using an emulation of the PERL_HASH in perl, mounting an
33*0Sstevel@tonic-gate# attack on a prepopulated hash. This is also useful if you need normal
34*0Sstevel@tonic-gate# keys which don't contain \0 -- suitable for stashes
35*0Sstevel@tonic-gate
36*0Sstevel@tonic-gateuse constant MASK_U32  => 2**32;
37*0Sstevel@tonic-gateuse constant HASH_SEED => 0;
38*0Sstevel@tonic-gateuse constant THRESHOLD => 14;
39*0Sstevel@tonic-gateuse constant START     => "a";
40*0Sstevel@tonic-gate
41*0Sstevel@tonic-gate# some initial hash data
42*0Sstevel@tonic-gatemy %h2 = map {$_ => 1} 'a'..'cc';
43*0Sstevel@tonic-gate
44*0Sstevel@tonic-gateok (!Internals::HvREHASH(%h2),
45*0Sstevel@tonic-gate    "starting with pre-populated non-pathalogical hash (rehash flag if off)");
46*0Sstevel@tonic-gate
47*0Sstevel@tonic-gatemy @keys = get_keys(\%h2);
48*0Sstevel@tonic-gate$h2{$_}++ for @keys;
49*0Sstevel@tonic-gateok (Internals::HvREHASH(%h2),
50*0Sstevel@tonic-gate    scalar(@keys) . " colliding into the same bucket keys are triggerring rehash");
51*0Sstevel@tonic-gate
52*0Sstevel@tonic-gatesub get_keys {
53*0Sstevel@tonic-gate    my $hr = shift;
54*0Sstevel@tonic-gate
55*0Sstevel@tonic-gate    # the minimum of bits required to mount the attack on a hash
56*0Sstevel@tonic-gate    my $min_bits = log(THRESHOLD)/log(2);
57*0Sstevel@tonic-gate
58*0Sstevel@tonic-gate    # if the hash has already been populated with a significant amount
59*0Sstevel@tonic-gate    # of entries the number of mask bits can be higher
60*0Sstevel@tonic-gate    my $keys = scalar keys %$hr;
61*0Sstevel@tonic-gate    my $bits = $keys ? log($keys)/log(2) : 0;
62*0Sstevel@tonic-gate    $bits = $min_bits if $min_bits > $bits;
63*0Sstevel@tonic-gate
64*0Sstevel@tonic-gate    $bits = int($bits) < $bits ? int($bits) + 1 : int($bits);
65*0Sstevel@tonic-gate    # need to add 2 bits to cover the internal split cases
66*0Sstevel@tonic-gate    $bits += 2;
67*0Sstevel@tonic-gate    my $mask = 2**$bits-1;
68*0Sstevel@tonic-gate    print "# using mask: $mask ($bits)\n";
69*0Sstevel@tonic-gate
70*0Sstevel@tonic-gate    my @keys;
71*0Sstevel@tonic-gate    my $s = START;
72*0Sstevel@tonic-gate    my $c = 0;
73*0Sstevel@tonic-gate    # get 2 keys on top of the THRESHOLD
74*0Sstevel@tonic-gate    my $hash;
75*0Sstevel@tonic-gate    while (@keys < THRESHOLD+2) {
76*0Sstevel@tonic-gate        # next if exists $hash->{$s};
77*0Sstevel@tonic-gate        $hash = hash($s);
78*0Sstevel@tonic-gate        next unless ($hash & $mask) == 0;
79*0Sstevel@tonic-gate        $c++;
80*0Sstevel@tonic-gate        printf "# %2d: %5s, %10s\n", $c, $s, $hash;
81*0Sstevel@tonic-gate        push @keys, $s;
82*0Sstevel@tonic-gate    } continue {
83*0Sstevel@tonic-gate        $s++;
84*0Sstevel@tonic-gate    }
85*0Sstevel@tonic-gate
86*0Sstevel@tonic-gate    return @keys;
87*0Sstevel@tonic-gate}
88*0Sstevel@tonic-gate
89*0Sstevel@tonic-gate
90*0Sstevel@tonic-gate# trying to provide the fastest equivalent of C macro's PERL_HASH in
91*0Sstevel@tonic-gate# Perl - the main complication is that it uses U32 integer, which we
92*0Sstevel@tonic-gate# can't do it perl, without doing some tricks
93*0Sstevel@tonic-gatesub hash {
94*0Sstevel@tonic-gate    my $s = shift;
95*0Sstevel@tonic-gate    my @c = split //, $s;
96*0Sstevel@tonic-gate    my $u = HASH_SEED;
97*0Sstevel@tonic-gate    for (@c) {
98*0Sstevel@tonic-gate        # (A % M) + (B % M) == (A + B) % M
99*0Sstevel@tonic-gate        # This works because '+' produces a NV, which is big enough to hold
100*0Sstevel@tonic-gate        # the intermidiate result. We only need the % before any "^" and "&"
101*0Sstevel@tonic-gate        # to get the result in the range for an I32.
102*0Sstevel@tonic-gate        # and << doesn't work on NV, so using 1 << 10
103*0Sstevel@tonic-gate        $u += ord;
104*0Sstevel@tonic-gate        $u += $u * (1 << 10); $u %= MASK_U32;
105*0Sstevel@tonic-gate        $u ^= $u >> 6;
106*0Sstevel@tonic-gate    }
107*0Sstevel@tonic-gate    $u += $u << 3;  $u %= MASK_U32;
108*0Sstevel@tonic-gate    $u ^= $u >> 11; $u %= MASK_U32;
109*0Sstevel@tonic-gate    $u += $u << 15; $u %= MASK_U32;
110*0Sstevel@tonic-gate    $u;
111*0Sstevel@tonic-gate}
112