xref: /llvm-project/llvm/unittests/ADT/HashingTest.cpp (revision becfda3e40831b06814f18a119db36183f2476b4)
1 //===- llvm/unittest/ADT/HashingTest.cpp ----------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Hashing.h unit tests.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "gtest/gtest.h"
15 #include "llvm/ADT/Hashing.h"
16 #include "llvm/Support/DataTypes.h"
17 #include <deque>
18 #include <list>
19 #include <map>
20 #include <vector>
21 
22 namespace llvm {
23 
24 // Helper for test code to print hash codes.
25 void PrintTo(const hash_code &code, std::ostream *os) {
26   *os << static_cast<size_t>(code);
27 }
28 
29 // Fake an object that is recognized as hashable data to test super large
30 // objects.
31 struct LargeTestInteger { uint64_t arr[8]; };
32 
33 namespace hashing {
34 namespace detail {
35 template <> struct is_hashable_data<LargeTestInteger> : true_type {};
36 } // namespace detail
37 } // namespace hashing
38 
39 } // namespace llvm
40 
41 using namespace llvm;
42 
43 namespace {
44 
45 TEST(HashingTest, HashValueBasicTest) {
46   int x = 42, y = 43, c = 'x';
47   void *p = 0;
48   uint64_t i = 71;
49   const unsigned ci = 71;
50   volatile int vi = 71;
51   const volatile int cvi = 71;
52   uintptr_t addr = reinterpret_cast<uintptr_t>(&y);
53   EXPECT_EQ(hash_value(42), hash_value(x));
54   EXPECT_NE(hash_value(42), hash_value(y));
55   EXPECT_NE(hash_value(42), hash_value(p));
56   EXPECT_EQ(hash_value(71), hash_value(i));
57   EXPECT_EQ(hash_value(71), hash_value(ci));
58   EXPECT_EQ(hash_value(71), hash_value(vi));
59   EXPECT_EQ(hash_value(71), hash_value(cvi));
60   EXPECT_EQ(hash_value(c), hash_value('x'));
61   EXPECT_EQ(hash_value('4'), hash_value('0' + 4));
62   EXPECT_EQ(hash_value(addr), hash_value(&y));
63 
64   EXPECT_EQ(hash_combine(42, 43), hash_value(std::make_pair(42, 43)));
65   EXPECT_NE(hash_combine(43, 42), hash_value(std::make_pair(42, 43)));
66   EXPECT_NE(hash_combine(42, 43), hash_value(std::make_pair(42ull, 43ull)));
67   EXPECT_NE(hash_combine(42, 43), hash_value(std::make_pair(42, 43ull)));
68   EXPECT_NE(hash_combine(42, 43), hash_value(std::make_pair(42ull, 43)));
69 
70   // Note that pairs are implicitly flattened to a direct sequence of data and
71   // hashed efficiently as a consequence.
72   EXPECT_EQ(hash_combine(42, 43, 44),
73             hash_value(std::make_pair(42, std::make_pair(43, 44))));
74   EXPECT_EQ(hash_value(std::make_pair(42, std::make_pair(43, 44))),
75             hash_value(std::make_pair(std::make_pair(42, 43), 44)));
76 }
77 
78 template <typename T, size_t N> T *begin(T (&arr)[N]) { return arr; }
79 template <typename T, size_t N> T *end(T (&arr)[N]) { return arr + N; }
80 
81 // Provide a dummy, hashable type designed for easy verification: its hash is
82 // the same as its value.
83 struct HashableDummy { size_t value; };
84 hash_code hash_value(HashableDummy dummy) { return dummy.value; }
85 
86 TEST(HashingTest, HashCombineRangeBasicTest) {
87   // Leave this uninitialized in the hope that valgrind will catch bad reads.
88   int dummy;
89   hash_code dummy_hash = hash_combine_range(&dummy, &dummy);
90   EXPECT_NE(hash_code(0), dummy_hash);
91 
92   const int arr1[] = { 1, 2, 3 };
93   hash_code arr1_hash = hash_combine_range(begin(arr1), end(arr1));
94   EXPECT_NE(dummy_hash, arr1_hash);
95   EXPECT_EQ(arr1_hash, hash_combine_range(begin(arr1), end(arr1)));
96 
97   const std::vector<int> vec(begin(arr1), end(arr1));
98   EXPECT_EQ(arr1_hash, hash_combine_range(vec.begin(), vec.end()));
99 
100   const std::list<int> list(begin(arr1), end(arr1));
101   EXPECT_EQ(arr1_hash, hash_combine_range(list.begin(), list.end()));
102 
103   const std::deque<int> deque(begin(arr1), end(arr1));
104   EXPECT_EQ(arr1_hash, hash_combine_range(deque.begin(), deque.end()));
105 
106   const int arr2[] = { 3, 2, 1 };
107   hash_code arr2_hash = hash_combine_range(begin(arr2), end(arr2));
108   EXPECT_NE(dummy_hash, arr2_hash);
109   EXPECT_NE(arr1_hash, arr2_hash);
110 
111   const int arr3[] = { 1, 1, 2, 3 };
112   hash_code arr3_hash = hash_combine_range(begin(arr3), end(arr3));
113   EXPECT_NE(dummy_hash, arr3_hash);
114   EXPECT_NE(arr1_hash, arr3_hash);
115 
116   const int arr4[] = { 1, 2, 3, 3 };
117   hash_code arr4_hash = hash_combine_range(begin(arr4), end(arr4));
118   EXPECT_NE(dummy_hash, arr4_hash);
119   EXPECT_NE(arr1_hash, arr4_hash);
120 
121   const size_t arr5[] = { 1, 2, 3 };
122   const HashableDummy d_arr5[] = { {1}, {2}, {3} };
123   hash_code arr5_hash = hash_combine_range(begin(arr5), end(arr5));
124   hash_code d_arr5_hash = hash_combine_range(begin(d_arr5), end(d_arr5));
125   EXPECT_EQ(arr5_hash, d_arr5_hash);
126 }
127 
128 TEST(HashingTest, HashCombineRangeLengthDiff) {
129   // Test that as only the length varies, we compute different hash codes for
130   // sequences.
131   std::map<size_t, size_t> code_to_size;
132   std::vector<char> all_one_c(256, '\xff');
133   for (unsigned Idx = 1, Size = all_one_c.size(); Idx < Size; ++Idx) {
134     hash_code code = hash_combine_range(&all_one_c[0], &all_one_c[0] + Idx);
135     std::map<size_t, size_t>::iterator
136       I = code_to_size.insert(std::make_pair(code, Idx)).first;
137     EXPECT_EQ(Idx, I->second);
138   }
139   code_to_size.clear();
140   std::vector<char> all_zero_c(256, '\0');
141   for (unsigned Idx = 1, Size = all_zero_c.size(); Idx < Size; ++Idx) {
142     hash_code code = hash_combine_range(&all_zero_c[0], &all_zero_c[0] + Idx);
143     std::map<size_t, size_t>::iterator
144       I = code_to_size.insert(std::make_pair(code, Idx)).first;
145     EXPECT_EQ(Idx, I->second);
146   }
147   code_to_size.clear();
148   std::vector<unsigned> all_one_int(512, -1);
149   for (unsigned Idx = 1, Size = all_one_int.size(); Idx < Size; ++Idx) {
150     hash_code code = hash_combine_range(&all_one_int[0], &all_one_int[0] + Idx);
151     std::map<size_t, size_t>::iterator
152       I = code_to_size.insert(std::make_pair(code, Idx)).first;
153     EXPECT_EQ(Idx, I->second);
154   }
155   code_to_size.clear();
156   std::vector<unsigned> all_zero_int(512, 0);
157   for (unsigned Idx = 1, Size = all_zero_int.size(); Idx < Size; ++Idx) {
158     hash_code code = hash_combine_range(&all_zero_int[0], &all_zero_int[0] + Idx);
159     std::map<size_t, size_t>::iterator
160       I = code_to_size.insert(std::make_pair(code, Idx)).first;
161     EXPECT_EQ(Idx, I->second);
162   }
163 }
164 
165 TEST(HashingTest, HashCombineRangeGoldenTest) {
166   struct { const char *s; uint64_t hash; } golden_data[] = {
167 #if SIZE_MAX == UINT64_MAX
168     { "a",                                0xaeb6f9d5517c61f8ULL },
169     { "ab",                               0x7ab1edb96be496b4ULL },
170     { "abc",                              0xe38e60bf19c71a3fULL },
171     { "abcde",                            0xd24461a66de97f6eULL },
172     { "abcdefgh",                         0x4ef872ec411dec9dULL },
173     { "abcdefghijklm",                    0xe8a865539f4eadfeULL },
174     { "abcdefghijklmnopqrstu",            0x261cdf85faaf4e79ULL },
175     { "abcdefghijklmnopqrstuvwxyzabcdef", 0x43ba70e4198e3b2aULL },
176     { "abcdefghijklmnopqrstuvwxyzabcdef"
177       "abcdefghijklmnopqrstuvwxyzghijkl"
178       "abcdefghijklmnopqrstuvwxyzmnopqr"
179       "abcdefghijklmnopqrstuvwxyzstuvwx"
180       "abcdefghijklmnopqrstuvwxyzyzabcd", 0xdcd57fb2afdf72beULL },
181     { "a",                                0xaeb6f9d5517c61f8ULL },
182     { "aa",                               0xf2b3b69a9736a1ebULL },
183     { "aaa",                              0xf752eb6f07b1cafeULL },
184     { "aaaaa",                            0x812bd21e1236954cULL },
185     { "aaaaaaaa",                         0xff07a2cff08ac587ULL },
186     { "aaaaaaaaaaaaa",                    0x84ac949d54d704ecULL },
187     { "aaaaaaaaaaaaaaaaaaaaa",            0xcb2c8fb6be8f5648ULL },
188     { "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0xcc40ab7f164091b6ULL },
189     { "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
190       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
191       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
192       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
193       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0xc58e174c1e78ffe9ULL },
194     { "z",                                0x1ba160d7e8f8785cULL },
195     { "zz",                               0x2c5c03172f1285d7ULL },
196     { "zzz",                              0x9d2c4f4b507a2ac3ULL },
197     { "zzzzz",                            0x0f03b9031735693aULL },
198     { "zzzzzzzz",                         0xe674147c8582c08eULL },
199     { "zzzzzzzzzzzzz",                    0x3162d9fa6938db83ULL },
200     { "zzzzzzzzzzzzzzzzzzzzz",            0x37b9a549e013620cULL },
201     { "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0x8921470aff885016ULL },
202     { "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
203       "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
204       "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
205       "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
206       "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0xf60fdcd9beb08441ULL },
207     { "a",                                0xaeb6f9d5517c61f8ULL },
208     { "ab",                               0x7ab1edb96be496b4ULL },
209     { "aba",                              0x3edb049950884d0aULL },
210     { "ababa",                            0x8f2de9e73a97714bULL },
211     { "abababab",                         0xee14a29ddf0ce54cULL },
212     { "ababababababa",                    0x38b3ddaada2d52b4ULL },
213     { "ababababababababababa",            0xd3665364219f2b85ULL },
214     { "abababababababababababababababab", 0xa75cd6afbf1bc972ULL },
215     { "abababababababababababababababab"
216       "abababababababababababababababab"
217       "abababababababababababababababab"
218       "abababababababababababababababab"
219       "abababababababababababababababab", 0x840192d129f7a22bULL }
220 #elif SIZE_MAX == UINT32_MAX
221     { "a",                                0x000000004605f745ULL },
222     { "ab",                               0x00000000d5f06301ULL },
223     { "abc",                              0x00000000559fe1eeULL },
224     { "abcde",                            0x00000000424028d7ULL },
225     { "abcdefgh",                         0x000000007bb119f8ULL },
226     { "abcdefghijklm",                    0x00000000edbca513ULL },
227     { "abcdefghijklmnopqrstu",            0x000000007c15712eULL },
228     { "abcdefghijklmnopqrstuvwxyzabcdef", 0x000000000b3aad66ULL },
229     { "abcdefghijklmnopqrstuvwxyzabcdef"
230       "abcdefghijklmnopqrstuvwxyzghijkl"
231       "abcdefghijklmnopqrstuvwxyzmnopqr"
232       "abcdefghijklmnopqrstuvwxyzstuvwx"
233       "abcdefghijklmnopqrstuvwxyzyzabcd", 0x000000008c758c8bULL },
234     { "a",                                0x000000004605f745ULL },
235     { "aa",                               0x00000000dc0a52daULL },
236     { "aaa",                              0x00000000b309274fULL },
237     { "aaaaa",                            0x00000000203b5ef6ULL },
238     { "aaaaaaaa",                         0x00000000a429e18fULL },
239     { "aaaaaaaaaaaaa",                    0x000000008662070bULL },
240     { "aaaaaaaaaaaaaaaaaaaaa",            0x000000003f11151cULL },
241     { "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0x000000008600fe20ULL },
242     { "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
243       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
244       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
245       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
246       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0x000000004e0e0804ULL },
247     { "z",                                0x00000000c5e405e9ULL },
248     { "zz",                               0x00000000a8d8a2c6ULL },
249     { "zzz",                              0x00000000fc2af672ULL },
250     { "zzzzz",                            0x0000000047d9efe6ULL },
251     { "zzzzzzzz",                         0x0000000080d77794ULL },
252     { "zzzzzzzzzzzzz",                    0x00000000405f93adULL },
253     { "zzzzzzzzzzzzzzzzzzzzz",            0x00000000fc72838dULL },
254     { "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0x000000007ce160f1ULL },
255     { "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
256       "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
257       "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
258       "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
259       "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0x00000000aed9ed1bULL },
260     { "a",                                0x000000004605f745ULL },
261     { "ab",                               0x00000000d5f06301ULL },
262     { "aba",                              0x00000000a85cd91bULL },
263     { "ababa",                            0x000000009e3bb52eULL },
264     { "abababab",                         0x000000002709b3b9ULL },
265     { "ababababababa",                    0x000000003a234174ULL },
266     { "ababababababababababa",            0x000000005c63e5ceULL },
267     { "abababababababababababababababab", 0x0000000013f74334ULL },
268     { "abababababababababababababababab"
269       "abababababababababababababababab"
270       "abababababababababababababababab"
271       "abababababababababababababababab"
272       "abababababababababababababababab", 0x00000000c1a6f135ULL },
273 #else
274 #error This test only supports 64-bit and 32-bit systems.
275 #endif
276   };
277   for (unsigned i = 0; i < sizeof(golden_data)/sizeof(*golden_data); ++i) {
278     StringRef str = golden_data[i].s;
279     hash_code hash = hash_combine_range(str.begin(), str.end());
280 #if 0 // Enable this to generate paste-able text for the above structure.
281     std::string member_str = "\"" + str.str() + "\",";
282     fprintf(stderr, " { %-35s 0x%016llxULL },\n",
283             member_str.c_str(), static_cast<uint64_t>(hash));
284 #endif
285     EXPECT_EQ(static_cast<size_t>(golden_data[i].hash),
286               static_cast<size_t>(hash));
287   }
288 }
289 
290 TEST(HashingTest, HashCombineBasicTest) {
291   // Hashing a sequence of homogenous types matches range hashing.
292   const int i1 = 42, i2 = 43, i3 = 123, i4 = 999, i5 = 0, i6 = 79;
293   const int arr1[] = { i1, i2, i3, i4, i5, i6 };
294   EXPECT_EQ(hash_combine_range(arr1, arr1 + 1), hash_combine(i1));
295   EXPECT_EQ(hash_combine_range(arr1, arr1 + 2), hash_combine(i1, i2));
296   EXPECT_EQ(hash_combine_range(arr1, arr1 + 3), hash_combine(i1, i2, i3));
297   EXPECT_EQ(hash_combine_range(arr1, arr1 + 4), hash_combine(i1, i2, i3, i4));
298   EXPECT_EQ(hash_combine_range(arr1, arr1 + 5),
299             hash_combine(i1, i2, i3, i4, i5));
300   EXPECT_EQ(hash_combine_range(arr1, arr1 + 6),
301             hash_combine(i1, i2, i3, i4, i5, i6));
302 
303   // Hashing a sequence of heterogenous types which *happen* to all produce the
304   // same data for hashing produces the same as a range-based hash of the
305   // fundamental values.
306   const size_t s1 = 1024, s2 = 8888, s3 = 9000000;
307   const HashableDummy d1 = { 1024 }, d2 = { 8888 }, d3 = { 9000000 };
308   const size_t arr2[] = { s1, s2, s3 };
309   EXPECT_EQ(hash_combine_range(begin(arr2), end(arr2)),
310             hash_combine(s1, s2, s3));
311   EXPECT_EQ(hash_combine(s1, s2, s3), hash_combine(s1, s2, d3));
312   EXPECT_EQ(hash_combine(s1, s2, s3), hash_combine(s1, d2, s3));
313   EXPECT_EQ(hash_combine(s1, s2, s3), hash_combine(d1, s2, s3));
314   EXPECT_EQ(hash_combine(s1, s2, s3), hash_combine(d1, d2, s3));
315   EXPECT_EQ(hash_combine(s1, s2, s3), hash_combine(d1, d2, d3));
316 
317   // Permuting values causes hashes to change.
318   EXPECT_NE(hash_combine(i1, i1, i1), hash_combine(i1, i1, i2));
319   EXPECT_NE(hash_combine(i1, i1, i1), hash_combine(i1, i2, i1));
320   EXPECT_NE(hash_combine(i1, i1, i1), hash_combine(i2, i1, i1));
321   EXPECT_NE(hash_combine(i1, i1, i1), hash_combine(i2, i2, i1));
322   EXPECT_NE(hash_combine(i1, i1, i1), hash_combine(i2, i2, i2));
323   EXPECT_NE(hash_combine(i2, i1, i1), hash_combine(i1, i1, i2));
324   EXPECT_NE(hash_combine(i1, i1, i2), hash_combine(i1, i2, i1));
325   EXPECT_NE(hash_combine(i1, i2, i1), hash_combine(i2, i1, i1));
326 
327   // Changing type w/o changing value causes hashes to change.
328   EXPECT_NE(hash_combine(i1, i2, i3), hash_combine((char)i1, i2, i3));
329   EXPECT_NE(hash_combine(i1, i2, i3), hash_combine(i1, (char)i2, i3));
330   EXPECT_NE(hash_combine(i1, i2, i3), hash_combine(i1, i2, (char)i3));
331 
332   // This is array of uint64, but it should have the exact same byte pattern as
333   // an array of LargeTestIntegers.
334   const uint64_t bigarr[] = {
335     0xaaaaaaaaababababULL, 0xacacacacbcbcbcbcULL, 0xccddeeffeeddccbbULL,
336     0xdeadbeafdeadbeefULL, 0xfefefefededededeULL, 0xafafafafededededULL,
337     0xffffeeeeddddccccULL, 0xaaaacbcbffffababULL,
338     0xaaaaaaaaababababULL, 0xacacacacbcbcbcbcULL, 0xccddeeffeeddccbbULL,
339     0xdeadbeafdeadbeefULL, 0xfefefefededededeULL, 0xafafafafededededULL,
340     0xffffeeeeddddccccULL, 0xaaaacbcbffffababULL,
341     0xaaaaaaaaababababULL, 0xacacacacbcbcbcbcULL, 0xccddeeffeeddccbbULL,
342     0xdeadbeafdeadbeefULL, 0xfefefefededededeULL, 0xafafafafededededULL,
343     0xffffeeeeddddccccULL, 0xaaaacbcbffffababULL
344   };
345   // Hash a preposterously large integer, both aligned with the buffer and
346   // misaligned.
347   const LargeTestInteger li = { {
348     0xaaaaaaaaababababULL, 0xacacacacbcbcbcbcULL, 0xccddeeffeeddccbbULL,
349     0xdeadbeafdeadbeefULL, 0xfefefefededededeULL, 0xafafafafededededULL,
350     0xffffeeeeddddccccULL, 0xaaaacbcbffffababULL
351   } };
352   // Rotate the storage from 'li'.
353   const LargeTestInteger l2 = { {
354     0xacacacacbcbcbcbcULL, 0xccddeeffeeddccbbULL, 0xdeadbeafdeadbeefULL,
355     0xfefefefededededeULL, 0xafafafafededededULL, 0xffffeeeeddddccccULL,
356     0xaaaacbcbffffababULL, 0xaaaaaaaaababababULL
357   } };
358   const LargeTestInteger l3 = { {
359     0xccddeeffeeddccbbULL, 0xdeadbeafdeadbeefULL, 0xfefefefededededeULL,
360     0xafafafafededededULL, 0xffffeeeeddddccccULL, 0xaaaacbcbffffababULL,
361     0xaaaaaaaaababababULL, 0xacacacacbcbcbcbcULL
362   } };
363   EXPECT_EQ(hash_combine_range(begin(bigarr), end(bigarr)),
364             hash_combine(li, li, li));
365   EXPECT_EQ(hash_combine_range(bigarr, bigarr + 9),
366             hash_combine(bigarr[0], l2));
367   EXPECT_EQ(hash_combine_range(bigarr, bigarr + 10),
368             hash_combine(bigarr[0], bigarr[1], l3));
369   EXPECT_EQ(hash_combine_range(bigarr, bigarr + 17),
370             hash_combine(li, bigarr[0], l2));
371   EXPECT_EQ(hash_combine_range(bigarr, bigarr + 18),
372             hash_combine(li, bigarr[0], bigarr[1], l3));
373   EXPECT_EQ(hash_combine_range(bigarr, bigarr + 18),
374             hash_combine(bigarr[0], l2, bigarr[9], l3));
375   EXPECT_EQ(hash_combine_range(bigarr, bigarr + 20),
376             hash_combine(bigarr[0], l2, bigarr[9], l3, bigarr[18], bigarr[19]));
377 }
378 
379 }
380