xref: /llvm-project/llvm/unittests/ADT/TrieRawHashMapTest.cpp (revision b510cdb895b9188e5819c4c85a6dab22a4d14385)
1 //===- TrieRawHashMapTest.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ADT/TrieRawHashMap.h"
10 #include "llvm/ADT/Twine.h"
11 #include "llvm/Support/Endian.h"
12 #include "llvm/Support/SHA1.h"
13 #include "gtest/gtest.h"
14 
15 using namespace llvm;
16 
17 namespace llvm {
18 class TrieRawHashMapTestHelper {
19 public:
20   TrieRawHashMapTestHelper() = default;
21 
22   void setTrie(ThreadSafeTrieRawHashMapBase *T) { Trie = T; }
23 
24   ThreadSafeTrieRawHashMapBase::PointerBase getRoot() const {
25     return Trie->getRoot();
26   }
27   unsigned getStartBit(ThreadSafeTrieRawHashMapBase::PointerBase P) const {
28     return Trie->getStartBit(P);
29   }
30   unsigned getNumBits(ThreadSafeTrieRawHashMapBase::PointerBase P) const {
31     return Trie->getNumBits(P);
32   }
33   unsigned getNumSlotUsed(ThreadSafeTrieRawHashMapBase::PointerBase P) const {
34     return Trie->getNumSlotUsed(P);
35   }
36   unsigned getNumTries() const { return Trie->getNumTries(); }
37   std::string
38   getTriePrefixAsString(ThreadSafeTrieRawHashMapBase::PointerBase P) const {
39     return Trie->getTriePrefixAsString(P);
40   }
41   ThreadSafeTrieRawHashMapBase::PointerBase
42   getNextTrie(ThreadSafeTrieRawHashMapBase::PointerBase P) const {
43     return Trie->getNextTrie(P);
44   }
45 
46 private:
47   ThreadSafeTrieRawHashMapBase *Trie = nullptr;
48 };
49 } // namespace llvm
50 
51 namespace {
52 template <typename DataType, size_t HashSize = sizeof(uint64_t)>
53 class SimpleTrieHashMapTest : public TrieRawHashMapTestHelper,
54                               public ::testing::Test {
55 public:
56   using NumType = DataType;
57   using HashType = std::array<uint8_t, HashSize>;
58   using TrieType = ThreadSafeTrieRawHashMap<DataType, sizeof(HashType)>;
59 
60   TrieType &createTrie(size_t RootBits, size_t SubtrieBits) {
61     auto &Ret = Trie.emplace(RootBits, SubtrieBits);
62     TrieRawHashMapTestHelper::setTrie(&Ret);
63     return Ret;
64   }
65 
66   void destroyTrie() { Trie.reset(); }
67   ~SimpleTrieHashMapTest() { destroyTrie(); }
68 
69   // Use the number itself as hash to test the pathological case.
70   static HashType hash(uint64_t Num) {
71     uint64_t HashN =
72         llvm::support::endian::byte_swap(Num, llvm::endianness::big);
73     HashType Hash;
74     memcpy(&Hash[0], &HashN, sizeof(HashType));
75     return Hash;
76   };
77 
78 private:
79   std::optional<TrieType> Trie;
80 };
81 
82 using SmallNodeTrieTest = SimpleTrieHashMapTest<uint64_t>;
83 
84 TEST_F(SmallNodeTrieTest, TrieAllocation) {
85   NumType Numbers[] = {
86       0x0, std::numeric_limits<NumType>::max(),      0x1, 0x2,
87       0x3, std::numeric_limits<NumType>::max() - 1u,
88   };
89 
90   unsigned ExpectedTries[] = {
91       1,       // Allocate Root.
92       1,       // Both on the root.
93       64,      // 0 and 1 sinks all the way down.
94       64,      // no new allocation needed.
95       65,      // need a new node between 2 and 3.
96       65 + 63, // 63 new allocation to sink two big numbers all the way.
97   };
98 
99   const char *ExpectedPrefix[] = {
100       "", // Root.
101       "", // Root.
102       "00000000000000[0000000]",
103       "00000000000000[0000000]",
104       "00000000000000[0000001]",
105       "ffffffffffffff[1111111]",
106   };
107 
108   // Use root and subtrie sizes of 1 so this gets sunk quite deep.
109   auto &Trie = createTrie(/*RootBits=*/1, /*SubtrieBits=*/1);
110 
111   for (unsigned I = 0; I < 6; ++I) {
112     // Lookup first to exercise hint code for deep tries.
113     TrieType::pointer Lookup = Trie.find(hash(Numbers[I]));
114     EXPECT_FALSE(Lookup);
115 
116     Trie.insert(Lookup, TrieType::value_type(hash(Numbers[I]), Numbers[I]));
117     EXPECT_EQ(getNumTries(), ExpectedTries[I]);
118     EXPECT_EQ(getTriePrefixAsString(getNextTrie(getRoot())), ExpectedPrefix[I]);
119   }
120 }
121 
122 TEST_F(SmallNodeTrieTest, TrieStructure) {
123   NumType Numbers[] = {
124       // Three numbers that will nest deeply to test (1) sinking subtries and
125       // (2) deep, non-trivial hints.
126       std::numeric_limits<NumType>::max(),
127       std::numeric_limits<NumType>::max() - 2u,
128       std::numeric_limits<NumType>::max() - 3u,
129       // One number to stay at the top-level.
130       0x37,
131   };
132 
133   // Use root and subtrie sizes of 1 so this gets sunk quite deep.
134   auto &Trie = createTrie(/*RootBits=*/1, /*SubtrieBits=*/1);
135 
136   for (NumType N : Numbers) {
137     // Lookup first to exercise hint code for deep tries.
138     TrieType::pointer Lookup = Trie.find(hash(N));
139     EXPECT_FALSE(Lookup);
140 
141     Trie.insert(Lookup, TrieType::value_type(hash(N), N));
142   }
143   for (NumType N : Numbers) {
144     TrieType::pointer Lookup = Trie.find(hash(N));
145     EXPECT_TRUE(Lookup);
146     if (!Lookup)
147       continue;
148     EXPECT_EQ(hash(N), Lookup->Hash);
149     EXPECT_EQ(N, Lookup->Data);
150 
151     // Confirm a subsequent insertion fails to overwrite by trying to insert a
152     // bad value.
153     auto Result = Trie.insert(Lookup, TrieType::value_type(hash(N), N - 1));
154     EXPECT_EQ(N, Result->Data);
155   }
156 
157   // Check the trie so we can confirm the structure is correct. Each subtrie
158   // should have 2 slots. The root's index=0 should have the content for
159   // 0x37 directly, and index=1 should be a linked-list of subtries, finally
160   // ending with content for (max-2) and (max-3).
161   //
162   // Note: This structure is not exhaustive (too expensive to update tests),
163   // but it does test that the dump format is somewhat readable and that the
164   // basic structure is correct.
165   //
166   // Note: This test requires that the trie reads bytes starting from index 0
167   // of the array of uint8_t, and then reads each byte's bits from high to low.
168 
169   // Check the Trie.
170   // We should allocated a total of 64 SubTries for 64 bit hash.
171   ASSERT_EQ(getNumTries(), 64u);
172   // Check the root trie. Two slots and both are used.
173   ASSERT_EQ(getNumSlotUsed(getRoot()), 2u);
174   // Check last subtrie.
175   // Last allocated trie is the next node in the allocation chain.
176   auto LastAlloctedSubTrie = getNextTrie(getRoot());
177   ASSERT_EQ(getTriePrefixAsString(LastAlloctedSubTrie),
178             "ffffffffffffff[1111110]");
179   ASSERT_EQ(getStartBit(LastAlloctedSubTrie), 63u);
180   ASSERT_EQ(getNumBits(LastAlloctedSubTrie), 1u);
181   ASSERT_EQ(getNumSlotUsed(LastAlloctedSubTrie), 2u);
182 }
183 
184 TEST_F(SmallNodeTrieTest, TrieStructureSmallFinalSubtrie) {
185   NumType Numbers[] = {
186       // Three numbers that will nest deeply to test (1) sinking subtries and
187       // (2) deep, non-trivial hints.
188       std::numeric_limits<NumType>::max(),
189       std::numeric_limits<NumType>::max() - 2u,
190       std::numeric_limits<NumType>::max() - 3u,
191       // One number to stay at the top-level.
192       0x37,
193   };
194 
195   // Use subtrie size of 5 to avoid hitting 64 evenly, making the final subtrie
196   // small.
197   auto &Trie = createTrie(/*RootBits=*/8, /*SubtrieBits=*/5);
198 
199   for (NumType N : Numbers) {
200     // Lookup first to exercise hint code for deep tries.
201     TrieType::pointer Lookup = Trie.find(hash(N));
202     EXPECT_FALSE(Lookup);
203 
204     Trie.insert(Lookup, TrieType::value_type(hash(N), N));
205   }
206   for (NumType N : Numbers) {
207     TrieType::pointer Lookup = Trie.find(hash(N));
208     ASSERT_TRUE(Lookup);
209     EXPECT_EQ(hash(N), Lookup->Hash);
210     EXPECT_EQ(N, Lookup->Data);
211 
212     // Confirm a subsequent insertion fails to overwrite by trying to insert a
213     // bad value.
214     auto Result = Trie.insert(Lookup, TrieType::value_type(hash(N), N - 1));
215     EXPECT_EQ(N, Result->Data);
216   }
217 
218   // Check the trie so we can confirm the structure is correct. The root
219   // should have 2^8=256 slots, most subtries should have 2^5=32 slots, and the
220   // deepest subtrie should have 2^1=2 slots (since (64-8)mod(5)=1).
221   // should have 2 slots. The root's index=0 should have the content for
222   // 0x37 directly, and index=1 should be a linked-list of subtries, finally
223   // ending with content for (max-2) and (max-3).
224   //
225   // Note: This structure is not exhaustive (too expensive to update tests),
226   // but it does test that the dump format is somewhat readable and that the
227   // basic structure is correct.
228   //
229   // Note: This test requires that the trie reads bytes starting from index 0
230   // of the array of uint8_t, and then reads each byte's bits from high to low.
231 
232   // Check the Trie.
233   // 64 bit hash = 8 + 5 * 11 + 1, so 1 root, 11 8bit subtrie and 1 last level
234   // subtrie, 13 total.
235   ASSERT_EQ(getNumTries(), 13u);
236   // Check the root trie. Two slots and both are used.
237   ASSERT_EQ(getNumSlotUsed(getRoot()), 2u);
238   // Check last subtrie.
239   // Last allocated trie is the next node in the allocation chain.
240   auto LastAlloctedSubTrie = getNextTrie(getRoot());
241   ASSERT_EQ(getTriePrefixAsString(LastAlloctedSubTrie),
242             "ffffffffffffff[1111110]");
243   ASSERT_EQ(getStartBit(LastAlloctedSubTrie), 63u);
244   ASSERT_EQ(getNumBits(LastAlloctedSubTrie), 1u);
245   ASSERT_EQ(getNumSlotUsed(LastAlloctedSubTrie), 2u);
246 }
247 
248 TEST_F(SmallNodeTrieTest, TrieDestructionLoop) {
249   // Test destroying large Trie. Make sure there is no recursion that can
250   // overflow the stack.
251 
252   // Limit the tries to 2 slots (1 bit) to generate subtries at a higher rate.
253   auto &Trie = createTrie(/*NumRootBits=*/1, /*NumSubtrieBits=*/1);
254 
255   // Fill them up. Pick a MaxN high enough to cause a stack overflow in debug
256   // builds.
257   static constexpr uint64_t MaxN = 100000;
258   for (uint64_t N = 0; N != MaxN; ++N) {
259     HashType Hash = hash(N);
260     Trie.insert(TrieType::pointer(), TrieType::value_type(Hash, NumType{N}));
261   }
262 
263   // Destroy tries. If destruction is recursive and MaxN is high enough, these
264   // will both fail.
265   destroyTrie();
266 }
267 
268 struct NumWithDestructorT {
269   uint64_t Num;
270   llvm::function_ref<void()> DestructorCallback;
271   ~NumWithDestructorT() { DestructorCallback(); }
272 };
273 
274 using NodeWithDestructorTrieTest = SimpleTrieHashMapTest<NumWithDestructorT>;
275 
276 TEST_F(NodeWithDestructorTrieTest, TrieDestructionLoop) {
277   // Test destroying large Trie. Make sure there is no recursion that can
278   // overflow the stack.
279 
280   // Limit the tries to 2 slots (1 bit) to generate subtries at a higher rate.
281   auto &Trie = createTrie(/*NumRootBits=*/1, /*NumSubtrieBits=*/1);
282 
283   // Fill them up. Pick a MaxN high enough to cause a stack overflow in debug
284   // builds.
285   static constexpr uint64_t MaxN = 100000;
286 
287   uint64_t DestructorCalled = 0;
288   auto DtorCallback = [&DestructorCalled]() { ++DestructorCalled; };
289   for (uint64_t N = 0; N != MaxN; ++N) {
290     HashType Hash = hash(N);
291     Trie.insert(TrieType::pointer(),
292                 TrieType::value_type(Hash, NumType{N, DtorCallback}));
293   }
294   // Reset the count after all the temporaries get destroyed.
295   DestructorCalled = 0;
296 
297   // Destroy tries. If destruction is recursive and MaxN is high enough, these
298   // will both fail.
299   destroyTrie();
300 
301   // Count the number of destructor calls during `destroyTrie()`.
302   ASSERT_EQ(DestructorCalled, MaxN);
303 }
304 
305 using NumStrNodeTrieTest = SimpleTrieHashMapTest<std::string>;
306 
307 TEST_F(NumStrNodeTrieTest, TrieInsertLazy) {
308   for (unsigned RootBits : {2, 3, 6, 10}) {
309     for (unsigned SubtrieBits : {2, 3, 4}) {
310       auto &Trie = createTrie(RootBits, SubtrieBits);
311       for (int I = 0, E = 1000; I != E; ++I) {
312         TrieType::pointer Lookup;
313         HashType H = hash(I);
314         if (I & 1)
315           Lookup = Trie.find(H);
316 
317         auto insertNum = [&](uint64_t Num) {
318           std::string S = Twine(I).str();
319           auto Hash = hash(Num);
320           return Trie.insertLazy(
321               Hash, [&](TrieType::LazyValueConstructor C) { C(std::move(S)); });
322         };
323         auto S1 = insertNum(I);
324         // The address of the Data should be the same.
325         EXPECT_EQ(&S1->Data, &insertNum(I)->Data);
326 
327         auto insertStr = [&](std::string S) {
328           int Num = std::stoi(S);
329           return insertNum(Num);
330         };
331         std::string S2 = S1->Data;
332         // The address of the Data should be the same.
333         EXPECT_EQ(&S1->Data, &insertStr(S2)->Data);
334       }
335       for (int I = 0, E = 1000; I != E; ++I) {
336         std::string S = Twine(I).str();
337         TrieType::pointer Lookup = Trie.find(hash(I));
338         EXPECT_TRUE(Lookup);
339         if (!Lookup)
340           continue;
341         EXPECT_EQ(S, Lookup->Data);
342       }
343     }
344   }
345 }
346 } // end anonymous namespace
347