1 //===- TrieRawHashMapTest.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/ADT/TrieRawHashMap.h" 10 #include "llvm/ADT/Twine.h" 11 #include "llvm/Support/Endian.h" 12 #include "llvm/Support/SHA1.h" 13 #include "gtest/gtest.h" 14 15 using namespace llvm; 16 17 namespace llvm { 18 class TrieRawHashMapTestHelper { 19 public: 20 TrieRawHashMapTestHelper() = default; 21 22 void setTrie(ThreadSafeTrieRawHashMapBase *T) { Trie = T; } 23 24 ThreadSafeTrieRawHashMapBase::PointerBase getRoot() const { 25 return Trie->getRoot(); 26 } 27 unsigned getStartBit(ThreadSafeTrieRawHashMapBase::PointerBase P) const { 28 return Trie->getStartBit(P); 29 } 30 unsigned getNumBits(ThreadSafeTrieRawHashMapBase::PointerBase P) const { 31 return Trie->getNumBits(P); 32 } 33 unsigned getNumSlotUsed(ThreadSafeTrieRawHashMapBase::PointerBase P) const { 34 return Trie->getNumSlotUsed(P); 35 } 36 unsigned getNumTries() const { return Trie->getNumTries(); } 37 std::string 38 getTriePrefixAsString(ThreadSafeTrieRawHashMapBase::PointerBase P) const { 39 return Trie->getTriePrefixAsString(P); 40 } 41 ThreadSafeTrieRawHashMapBase::PointerBase 42 getNextTrie(ThreadSafeTrieRawHashMapBase::PointerBase P) const { 43 return Trie->getNextTrie(P); 44 } 45 46 private: 47 ThreadSafeTrieRawHashMapBase *Trie = nullptr; 48 }; 49 } // namespace llvm 50 51 namespace { 52 template <typename DataType, size_t HashSize = sizeof(uint64_t)> 53 class SimpleTrieHashMapTest : public TrieRawHashMapTestHelper, 54 public ::testing::Test { 55 public: 56 using NumType = DataType; 57 using HashType = std::array<uint8_t, HashSize>; 58 using TrieType = ThreadSafeTrieRawHashMap<DataType, sizeof(HashType)>; 59 60 TrieType &createTrie(size_t RootBits, size_t SubtrieBits) { 61 auto &Ret = Trie.emplace(RootBits, SubtrieBits); 62 TrieRawHashMapTestHelper::setTrie(&Ret); 63 return Ret; 64 } 65 66 void destroyTrie() { Trie.reset(); } 67 ~SimpleTrieHashMapTest() { destroyTrie(); } 68 69 // Use the number itself as hash to test the pathological case. 70 static HashType hash(uint64_t Num) { 71 uint64_t HashN = 72 llvm::support::endian::byte_swap(Num, llvm::endianness::big); 73 HashType Hash; 74 memcpy(&Hash[0], &HashN, sizeof(HashType)); 75 return Hash; 76 }; 77 78 private: 79 std::optional<TrieType> Trie; 80 }; 81 82 using SmallNodeTrieTest = SimpleTrieHashMapTest<uint64_t>; 83 84 TEST_F(SmallNodeTrieTest, TrieAllocation) { 85 NumType Numbers[] = { 86 0x0, std::numeric_limits<NumType>::max(), 0x1, 0x2, 87 0x3, std::numeric_limits<NumType>::max() - 1u, 88 }; 89 90 unsigned ExpectedTries[] = { 91 1, // Allocate Root. 92 1, // Both on the root. 93 64, // 0 and 1 sinks all the way down. 94 64, // no new allocation needed. 95 65, // need a new node between 2 and 3. 96 65 + 63, // 63 new allocation to sink two big numbers all the way. 97 }; 98 99 const char *ExpectedPrefix[] = { 100 "", // Root. 101 "", // Root. 102 "00000000000000[0000000]", 103 "00000000000000[0000000]", 104 "00000000000000[0000001]", 105 "ffffffffffffff[1111111]", 106 }; 107 108 // Use root and subtrie sizes of 1 so this gets sunk quite deep. 109 auto &Trie = createTrie(/*RootBits=*/1, /*SubtrieBits=*/1); 110 111 for (unsigned I = 0; I < 6; ++I) { 112 // Lookup first to exercise hint code for deep tries. 113 TrieType::pointer Lookup = Trie.find(hash(Numbers[I])); 114 EXPECT_FALSE(Lookup); 115 116 Trie.insert(Lookup, TrieType::value_type(hash(Numbers[I]), Numbers[I])); 117 EXPECT_EQ(getNumTries(), ExpectedTries[I]); 118 EXPECT_EQ(getTriePrefixAsString(getNextTrie(getRoot())), ExpectedPrefix[I]); 119 } 120 } 121 122 TEST_F(SmallNodeTrieTest, TrieStructure) { 123 NumType Numbers[] = { 124 // Three numbers that will nest deeply to test (1) sinking subtries and 125 // (2) deep, non-trivial hints. 126 std::numeric_limits<NumType>::max(), 127 std::numeric_limits<NumType>::max() - 2u, 128 std::numeric_limits<NumType>::max() - 3u, 129 // One number to stay at the top-level. 130 0x37, 131 }; 132 133 // Use root and subtrie sizes of 1 so this gets sunk quite deep. 134 auto &Trie = createTrie(/*RootBits=*/1, /*SubtrieBits=*/1); 135 136 for (NumType N : Numbers) { 137 // Lookup first to exercise hint code for deep tries. 138 TrieType::pointer Lookup = Trie.find(hash(N)); 139 EXPECT_FALSE(Lookup); 140 141 Trie.insert(Lookup, TrieType::value_type(hash(N), N)); 142 } 143 for (NumType N : Numbers) { 144 TrieType::pointer Lookup = Trie.find(hash(N)); 145 EXPECT_TRUE(Lookup); 146 if (!Lookup) 147 continue; 148 EXPECT_EQ(hash(N), Lookup->Hash); 149 EXPECT_EQ(N, Lookup->Data); 150 151 // Confirm a subsequent insertion fails to overwrite by trying to insert a 152 // bad value. 153 auto Result = Trie.insert(Lookup, TrieType::value_type(hash(N), N - 1)); 154 EXPECT_EQ(N, Result->Data); 155 } 156 157 // Check the trie so we can confirm the structure is correct. Each subtrie 158 // should have 2 slots. The root's index=0 should have the content for 159 // 0x37 directly, and index=1 should be a linked-list of subtries, finally 160 // ending with content for (max-2) and (max-3). 161 // 162 // Note: This structure is not exhaustive (too expensive to update tests), 163 // but it does test that the dump format is somewhat readable and that the 164 // basic structure is correct. 165 // 166 // Note: This test requires that the trie reads bytes starting from index 0 167 // of the array of uint8_t, and then reads each byte's bits from high to low. 168 169 // Check the Trie. 170 // We should allocated a total of 64 SubTries for 64 bit hash. 171 ASSERT_EQ(getNumTries(), 64u); 172 // Check the root trie. Two slots and both are used. 173 ASSERT_EQ(getNumSlotUsed(getRoot()), 2u); 174 // Check last subtrie. 175 // Last allocated trie is the next node in the allocation chain. 176 auto LastAlloctedSubTrie = getNextTrie(getRoot()); 177 ASSERT_EQ(getTriePrefixAsString(LastAlloctedSubTrie), 178 "ffffffffffffff[1111110]"); 179 ASSERT_EQ(getStartBit(LastAlloctedSubTrie), 63u); 180 ASSERT_EQ(getNumBits(LastAlloctedSubTrie), 1u); 181 ASSERT_EQ(getNumSlotUsed(LastAlloctedSubTrie), 2u); 182 } 183 184 TEST_F(SmallNodeTrieTest, TrieStructureSmallFinalSubtrie) { 185 NumType Numbers[] = { 186 // Three numbers that will nest deeply to test (1) sinking subtries and 187 // (2) deep, non-trivial hints. 188 std::numeric_limits<NumType>::max(), 189 std::numeric_limits<NumType>::max() - 2u, 190 std::numeric_limits<NumType>::max() - 3u, 191 // One number to stay at the top-level. 192 0x37, 193 }; 194 195 // Use subtrie size of 5 to avoid hitting 64 evenly, making the final subtrie 196 // small. 197 auto &Trie = createTrie(/*RootBits=*/8, /*SubtrieBits=*/5); 198 199 for (NumType N : Numbers) { 200 // Lookup first to exercise hint code for deep tries. 201 TrieType::pointer Lookup = Trie.find(hash(N)); 202 EXPECT_FALSE(Lookup); 203 204 Trie.insert(Lookup, TrieType::value_type(hash(N), N)); 205 } 206 for (NumType N : Numbers) { 207 TrieType::pointer Lookup = Trie.find(hash(N)); 208 ASSERT_TRUE(Lookup); 209 EXPECT_EQ(hash(N), Lookup->Hash); 210 EXPECT_EQ(N, Lookup->Data); 211 212 // Confirm a subsequent insertion fails to overwrite by trying to insert a 213 // bad value. 214 auto Result = Trie.insert(Lookup, TrieType::value_type(hash(N), N - 1)); 215 EXPECT_EQ(N, Result->Data); 216 } 217 218 // Check the trie so we can confirm the structure is correct. The root 219 // should have 2^8=256 slots, most subtries should have 2^5=32 slots, and the 220 // deepest subtrie should have 2^1=2 slots (since (64-8)mod(5)=1). 221 // should have 2 slots. The root's index=0 should have the content for 222 // 0x37 directly, and index=1 should be a linked-list of subtries, finally 223 // ending with content for (max-2) and (max-3). 224 // 225 // Note: This structure is not exhaustive (too expensive to update tests), 226 // but it does test that the dump format is somewhat readable and that the 227 // basic structure is correct. 228 // 229 // Note: This test requires that the trie reads bytes starting from index 0 230 // of the array of uint8_t, and then reads each byte's bits from high to low. 231 232 // Check the Trie. 233 // 64 bit hash = 8 + 5 * 11 + 1, so 1 root, 11 8bit subtrie and 1 last level 234 // subtrie, 13 total. 235 ASSERT_EQ(getNumTries(), 13u); 236 // Check the root trie. Two slots and both are used. 237 ASSERT_EQ(getNumSlotUsed(getRoot()), 2u); 238 // Check last subtrie. 239 // Last allocated trie is the next node in the allocation chain. 240 auto LastAlloctedSubTrie = getNextTrie(getRoot()); 241 ASSERT_EQ(getTriePrefixAsString(LastAlloctedSubTrie), 242 "ffffffffffffff[1111110]"); 243 ASSERT_EQ(getStartBit(LastAlloctedSubTrie), 63u); 244 ASSERT_EQ(getNumBits(LastAlloctedSubTrie), 1u); 245 ASSERT_EQ(getNumSlotUsed(LastAlloctedSubTrie), 2u); 246 } 247 248 TEST_F(SmallNodeTrieTest, TrieDestructionLoop) { 249 // Test destroying large Trie. Make sure there is no recursion that can 250 // overflow the stack. 251 252 // Limit the tries to 2 slots (1 bit) to generate subtries at a higher rate. 253 auto &Trie = createTrie(/*NumRootBits=*/1, /*NumSubtrieBits=*/1); 254 255 // Fill them up. Pick a MaxN high enough to cause a stack overflow in debug 256 // builds. 257 static constexpr uint64_t MaxN = 100000; 258 for (uint64_t N = 0; N != MaxN; ++N) { 259 HashType Hash = hash(N); 260 Trie.insert(TrieType::pointer(), TrieType::value_type(Hash, NumType{N})); 261 } 262 263 // Destroy tries. If destruction is recursive and MaxN is high enough, these 264 // will both fail. 265 destroyTrie(); 266 } 267 268 struct NumWithDestructorT { 269 uint64_t Num; 270 llvm::function_ref<void()> DestructorCallback; 271 ~NumWithDestructorT() { DestructorCallback(); } 272 }; 273 274 using NodeWithDestructorTrieTest = SimpleTrieHashMapTest<NumWithDestructorT>; 275 276 TEST_F(NodeWithDestructorTrieTest, TrieDestructionLoop) { 277 // Test destroying large Trie. Make sure there is no recursion that can 278 // overflow the stack. 279 280 // Limit the tries to 2 slots (1 bit) to generate subtries at a higher rate. 281 auto &Trie = createTrie(/*NumRootBits=*/1, /*NumSubtrieBits=*/1); 282 283 // Fill them up. Pick a MaxN high enough to cause a stack overflow in debug 284 // builds. 285 static constexpr uint64_t MaxN = 100000; 286 287 uint64_t DestructorCalled = 0; 288 auto DtorCallback = [&DestructorCalled]() { ++DestructorCalled; }; 289 for (uint64_t N = 0; N != MaxN; ++N) { 290 HashType Hash = hash(N); 291 Trie.insert(TrieType::pointer(), 292 TrieType::value_type(Hash, NumType{N, DtorCallback})); 293 } 294 // Reset the count after all the temporaries get destroyed. 295 DestructorCalled = 0; 296 297 // Destroy tries. If destruction is recursive and MaxN is high enough, these 298 // will both fail. 299 destroyTrie(); 300 301 // Count the number of destructor calls during `destroyTrie()`. 302 ASSERT_EQ(DestructorCalled, MaxN); 303 } 304 305 using NumStrNodeTrieTest = SimpleTrieHashMapTest<std::string>; 306 307 TEST_F(NumStrNodeTrieTest, TrieInsertLazy) { 308 for (unsigned RootBits : {2, 3, 6, 10}) { 309 for (unsigned SubtrieBits : {2, 3, 4}) { 310 auto &Trie = createTrie(RootBits, SubtrieBits); 311 for (int I = 0, E = 1000; I != E; ++I) { 312 TrieType::pointer Lookup; 313 HashType H = hash(I); 314 if (I & 1) 315 Lookup = Trie.find(H); 316 317 auto insertNum = [&](uint64_t Num) { 318 std::string S = Twine(I).str(); 319 auto Hash = hash(Num); 320 return Trie.insertLazy( 321 Hash, [&](TrieType::LazyValueConstructor C) { C(std::move(S)); }); 322 }; 323 auto S1 = insertNum(I); 324 // The address of the Data should be the same. 325 EXPECT_EQ(&S1->Data, &insertNum(I)->Data); 326 327 auto insertStr = [&](std::string S) { 328 int Num = std::stoi(S); 329 return insertNum(Num); 330 }; 331 std::string S2 = S1->Data; 332 // The address of the Data should be the same. 333 EXPECT_EQ(&S1->Data, &insertStr(S2)->Data); 334 } 335 for (int I = 0, E = 1000; I != E; ++I) { 336 std::string S = Twine(I).str(); 337 TrieType::pointer Lookup = Trie.find(hash(I)); 338 EXPECT_TRUE(Lookup); 339 if (!Lookup) 340 continue; 341 EXPECT_EQ(S, Lookup->Data); 342 } 343 } 344 } 345 } 346 } // end anonymous namespace 347