xref: /llvm-project/libc/test/src/__support/hash_test.cpp (revision e399a317ef63d05991f6b15f74247eee6bfc5279)
1 //===-- Unittests for hash ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "src/__support/CPP/new.h"
10 #include "src/__support/hash.h"
11 #include "src/stdlib/rand.h"
12 #include "src/stdlib/srand.h"
13 #include "src/string/memset.h"
14 #include "test/UnitTest/Test.h"
15 
16 template <class T> struct AlignedMemory {
17   T *data;
18   size_t offset;
19   std::align_val_t alignment;
AlignedMemoryAlignedMemory20   AlignedMemory(size_t size, size_t alignment, size_t offset)
21       : offset(offset), alignment{alignment} {
22     size_t sz = size * sizeof(T);
23     size_t aligned = sz + ((-sz) & (alignment - 1)) + alignment;
24     LIBC_NAMESPACE::AllocChecker ac;
25     data = static_cast<T *>(operator new(aligned, this->alignment, ac));
26     data += offset % alignment;
27   }
~AlignedMemoryAlignedMemory28   ~AlignedMemory() { operator delete(data - offset, alignment); }
29 };
30 
31 size_t sizes[] = {0, 1, 23, 59, 1024, 5261};
32 uint8_t values[] = {0, 1, 23, 59, 102, 255};
33 
34 // Hash value should not change with different alignments.
TEST(LlvmLibcHashTest,SanityCheck)35 TEST(LlvmLibcHashTest, SanityCheck) {
36   for (size_t sz : sizes) {
37     for (uint8_t val : values) {
38       uint64_t hash;
39       {
40         AlignedMemory<char> mem(sz, 64, 0);
41         LIBC_NAMESPACE::memset(mem.data, val, sz);
42         LIBC_NAMESPACE::internal::HashState state{0x1234567890abcdef};
43         state.update(mem.data, sz);
44         hash = state.finish();
45       }
46       for (size_t offset = 1; offset < 64; ++offset) {
47         AlignedMemory<char> mem(sz, 64, offset);
48         LIBC_NAMESPACE::memset(mem.data, val, sz);
49         LIBC_NAMESPACE::internal::HashState state{0x1234567890abcdef};
50         state.update(mem.data, sz);
51         ASSERT_EQ(hash, state.finish());
52       }
53     }
54   }
55 }
56 
popcnt(uint64_t x)57 static inline size_t popcnt(uint64_t x) {
58   size_t count = 0;
59   while (x) {
60     count += x & 1;
61     x >>= 1;
62   }
63   return count;
64 }
65 
66 // Mutate a single bit in a rather large input. The hash should change
67 // significantly. At least one fifth of the bits should not match.
TEST(LlvmLibcHashTest,Avalanche)68 TEST(LlvmLibcHashTest, Avalanche) {
69   for (size_t sz : sizes) {
70     for (uint8_t val : values) {
71       uint64_t hash;
72       AlignedMemory<char> mem(sz, 64, 0);
73       LIBC_NAMESPACE::memset(mem.data, val, sz);
74       {
75         LIBC_NAMESPACE::internal::HashState state{0xabcdef1234567890};
76         state.update(mem.data, sz);
77         hash = state.finish();
78       }
79       for (size_t i = 0; i < sz; ++i) {
80         for (size_t j = 0; j < 8; ++j) {
81           uint8_t mask = 1 << j;
82           mem.data[i] ^= mask;
83           {
84             LIBC_NAMESPACE::internal::HashState state{0xabcdef1234567890};
85             state.update(mem.data, sz);
86             uint64_t new_hash = state.finish();
87             ASSERT_GE(popcnt(hash ^ new_hash), size_t{13});
88           }
89           mem.data[i] ^= mask;
90         }
91       }
92     }
93   }
94 }
95 
96 // Hash a random sequence of input. The LSB should be uniform enough such that
97 // values spread across the entire range.
TEST(LlvmLibcHashTest,UniformLSB)98 TEST(LlvmLibcHashTest, UniformLSB) {
99   LIBC_NAMESPACE::srand(0xffffffff);
100   for (size_t sz : sizes) {
101     AlignedMemory<size_t> counters(sz, sizeof(size_t), 0);
102     LIBC_NAMESPACE::memset(counters.data, 0, sz * sizeof(size_t));
103     for (size_t i = 0; i < 200 * sz; ++i) {
104       int randomness[8] = {LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(),
105                            LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(),
106                            LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(),
107                            LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand()};
108       {
109         LIBC_NAMESPACE::internal::HashState state{0x1a2b3c4d5e6f7a8b};
110         state.update(randomness, sizeof(randomness));
111         uint64_t hash = state.finish();
112         counters.data[hash % sz]++;
113       }
114     }
115     for (size_t i = 0; i < sz; ++i) {
116       ASSERT_GE(counters.data[i], size_t{140});
117       ASSERT_LE(counters.data[i], size_t{260});
118     }
119   }
120 }
121 
122 // Hash a low entropy sequence. The MSB should be uniform enough such that
123 // there is no significant bias even if the value range is small.
124 // Top 7 bits is examined because it will be used as a secondary key in
125 // the hash table.
TEST(LlvmLibcHashTest,UniformMSB)126 TEST(LlvmLibcHashTest, UniformMSB) {
127   size_t sz = 1 << 7;
128   AlignedMemory<size_t> counters(sz, sizeof(size_t), 0);
129   LIBC_NAMESPACE::memset(counters.data, 0, sz * sizeof(size_t));
130   for (size_t i = 0; i < 200 * sz; ++i) {
131     LIBC_NAMESPACE::internal::HashState state{0xa1b2c3d4e5f6a7b8};
132     state.update(&i, sizeof(i));
133     uint64_t hash = state.finish();
134     counters.data[hash >> 57]++;
135   }
136   for (size_t i = 0; i < sz; ++i) {
137     ASSERT_GE(counters.data[i], size_t{140});
138     ASSERT_LE(counters.data[i], size_t{260});
139   }
140 }
141