xref: /llvm-project/llvm/lib/Support/TrieRawHashMap.cpp (revision 03948882d3bac33cf71a47df1c7ee0f87aad9fc2)
1b510cdb8SSteven Wu //===- TrieRawHashMap.cpp -------------------------------------------------===//
2b510cdb8SSteven Wu //
3b510cdb8SSteven Wu // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4b510cdb8SSteven Wu // See https://llvm.org/LICENSE.txt for license information.
5b510cdb8SSteven Wu // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b510cdb8SSteven Wu //
7b510cdb8SSteven Wu //===----------------------------------------------------------------------===//
8b510cdb8SSteven Wu 
9b510cdb8SSteven Wu #include "llvm/ADT/TrieRawHashMap.h"
10b510cdb8SSteven Wu #include "llvm/ADT/LazyAtomicPointer.h"
11b510cdb8SSteven Wu #include "llvm/ADT/StringExtras.h"
12b510cdb8SSteven Wu #include "llvm/ADT/TrieHashIndexGenerator.h"
13b510cdb8SSteven Wu #include "llvm/Support/Allocator.h"
14b510cdb8SSteven Wu #include "llvm/Support/Casting.h"
15b510cdb8SSteven Wu #include "llvm/Support/Debug.h"
16b510cdb8SSteven Wu #include "llvm/Support/ThreadSafeAllocator.h"
17b510cdb8SSteven Wu #include "llvm/Support/TrailingObjects.h"
18b510cdb8SSteven Wu #include "llvm/Support/raw_ostream.h"
19b510cdb8SSteven Wu #include <memory>
20b510cdb8SSteven Wu 
21b510cdb8SSteven Wu using namespace llvm;
22b510cdb8SSteven Wu 
23b510cdb8SSteven Wu namespace {
24b510cdb8SSteven Wu struct TrieNode {
25b510cdb8SSteven Wu   const bool IsSubtrie = false;
26b510cdb8SSteven Wu 
27b510cdb8SSteven Wu   TrieNode(bool IsSubtrie) : IsSubtrie(IsSubtrie) {}
28b510cdb8SSteven Wu 
29b510cdb8SSteven Wu   static void *operator new(size_t Size) { return ::operator new(Size); }
30b510cdb8SSteven Wu   void operator delete(void *Ptr) { ::operator delete(Ptr); }
31b510cdb8SSteven Wu };
32b510cdb8SSteven Wu 
33b510cdb8SSteven Wu struct TrieContent final : public TrieNode {
34b510cdb8SSteven Wu   const uint8_t ContentOffset;
35b510cdb8SSteven Wu   const uint8_t HashSize;
36b510cdb8SSteven Wu   const uint8_t HashOffset;
37b510cdb8SSteven Wu 
38b510cdb8SSteven Wu   void *getValuePointer() const {
39b510cdb8SSteven Wu     auto *Content = reinterpret_cast<const uint8_t *>(this) + ContentOffset;
40b510cdb8SSteven Wu     return const_cast<uint8_t *>(Content);
41b510cdb8SSteven Wu   }
42b510cdb8SSteven Wu 
43b510cdb8SSteven Wu   ArrayRef<uint8_t> getHash() const {
44b510cdb8SSteven Wu     auto *Begin = reinterpret_cast<const uint8_t *>(this) + HashOffset;
45b510cdb8SSteven Wu     return ArrayRef(Begin, Begin + HashSize);
46b510cdb8SSteven Wu   }
47b510cdb8SSteven Wu 
48b510cdb8SSteven Wu   TrieContent(size_t ContentOffset, size_t HashSize, size_t HashOffset)
49b510cdb8SSteven Wu       : TrieNode(/*IsSubtrie=*/false), ContentOffset(ContentOffset),
50b510cdb8SSteven Wu         HashSize(HashSize), HashOffset(HashOffset) {}
51b510cdb8SSteven Wu 
52b510cdb8SSteven Wu   static bool classof(const TrieNode *TN) { return !TN->IsSubtrie; }
53b510cdb8SSteven Wu };
54b510cdb8SSteven Wu 
55b510cdb8SSteven Wu static_assert(sizeof(TrieContent) ==
56b510cdb8SSteven Wu                   ThreadSafeTrieRawHashMapBase::TrieContentBaseSize,
57b510cdb8SSteven Wu               "Check header assumption!");
58b510cdb8SSteven Wu 
59b510cdb8SSteven Wu class TrieSubtrie final
60b510cdb8SSteven Wu     : public TrieNode,
61b510cdb8SSteven Wu       private TrailingObjects<TrieSubtrie, LazyAtomicPointer<TrieNode>> {
62b510cdb8SSteven Wu public:
63b510cdb8SSteven Wu   using Slot = LazyAtomicPointer<TrieNode>;
64b510cdb8SSteven Wu 
65b510cdb8SSteven Wu   Slot &get(size_t I) { return getTrailingObjects<Slot>()[I]; }
66b510cdb8SSteven Wu   TrieNode *load(size_t I) { return get(I).load(); }
67b510cdb8SSteven Wu 
68b510cdb8SSteven Wu   unsigned size() const { return Size; }
69b510cdb8SSteven Wu 
70b510cdb8SSteven Wu   TrieSubtrie *
71b510cdb8SSteven Wu   sink(size_t I, TrieContent &Content, size_t NumSubtrieBits, size_t NewI,
72b510cdb8SSteven Wu        function_ref<TrieSubtrie *(std::unique_ptr<TrieSubtrie>)> Saver);
73b510cdb8SSteven Wu 
74b510cdb8SSteven Wu   static std::unique_ptr<TrieSubtrie> create(size_t StartBit, size_t NumBits);
75b510cdb8SSteven Wu 
76b510cdb8SSteven Wu   explicit TrieSubtrie(size_t StartBit, size_t NumBits);
77b510cdb8SSteven Wu 
78b510cdb8SSteven Wu   static bool classof(const TrieNode *TN) { return TN->IsSubtrie; }
79b510cdb8SSteven Wu 
80b510cdb8SSteven Wu   static constexpr size_t sizeToAlloc(unsigned NumBits) {
81b510cdb8SSteven Wu     assert(NumBits < 20 && "Tries should have fewer than ~1M slots");
82*03948882SSimon Pilgrim     unsigned Count = 1u << NumBits;
83b510cdb8SSteven Wu     return totalSizeToAlloc<LazyAtomicPointer<TrieNode>>(Count);
84b510cdb8SSteven Wu   }
85b510cdb8SSteven Wu 
86b510cdb8SSteven Wu private:
87b510cdb8SSteven Wu   // FIXME: Use a bitset to speed up access:
88b510cdb8SSteven Wu   //
89b510cdb8SSteven Wu   //     std::array<std::atomic<uint64_t>, NumSlots/64> IsSet;
90b510cdb8SSteven Wu   //
91b510cdb8SSteven Wu   // This will avoid needing to visit sparsely filled slots in
92b510cdb8SSteven Wu   // \a ThreadSafeTrieRawHashMapBase::destroyImpl() when there's a non-trivial
93b510cdb8SSteven Wu   // destructor.
94b510cdb8SSteven Wu   //
95b510cdb8SSteven Wu   // It would also greatly speed up iteration, if we add that some day, and
96b510cdb8SSteven Wu   // allow get() to return one level sooner.
97b510cdb8SSteven Wu   //
98b510cdb8SSteven Wu   // This would be the algorithm for updating IsSet (after updating Slots):
99b510cdb8SSteven Wu   //
100b510cdb8SSteven Wu   //     std::atomic<uint64_t> &Bits = IsSet[I.High];
101b510cdb8SSteven Wu   //     const uint64_t NewBit = 1ULL << I.Low;
102b510cdb8SSteven Wu   //     uint64_t Old = 0;
103b510cdb8SSteven Wu   //     while (!Bits.compare_exchange_weak(Old, Old | NewBit))
104b510cdb8SSteven Wu   //       ;
105b510cdb8SSteven Wu 
106b510cdb8SSteven Wu   // For debugging.
107b510cdb8SSteven Wu   unsigned StartBit = 0;
108b510cdb8SSteven Wu   unsigned NumBits = 0;
109b510cdb8SSteven Wu   unsigned Size = 0;
110b510cdb8SSteven Wu   friend class llvm::ThreadSafeTrieRawHashMapBase;
111b510cdb8SSteven Wu   friend class TrailingObjects;
112b510cdb8SSteven Wu 
113b510cdb8SSteven Wu public:
114b510cdb8SSteven Wu   /// Linked list for ownership of tries. The pointer is owned by TrieSubtrie.
115b510cdb8SSteven Wu   std::atomic<TrieSubtrie *> Next;
116b510cdb8SSteven Wu };
117b510cdb8SSteven Wu } // end namespace
118b510cdb8SSteven Wu 
119b510cdb8SSteven Wu std::unique_ptr<TrieSubtrie> TrieSubtrie::create(size_t StartBit,
120b510cdb8SSteven Wu                                                  size_t NumBits) {
121b510cdb8SSteven Wu   void *Memory = ::operator new(sizeToAlloc(NumBits));
122b510cdb8SSteven Wu   TrieSubtrie *S = ::new (Memory) TrieSubtrie(StartBit, NumBits);
123b510cdb8SSteven Wu   return std::unique_ptr<TrieSubtrie>(S);
124b510cdb8SSteven Wu }
125b510cdb8SSteven Wu 
126b510cdb8SSteven Wu TrieSubtrie::TrieSubtrie(size_t StartBit, size_t NumBits)
127b510cdb8SSteven Wu     : TrieNode(true), StartBit(StartBit), NumBits(NumBits), Size(1u << NumBits),
128b510cdb8SSteven Wu       Next(nullptr) {
129b510cdb8SSteven Wu   for (unsigned I = 0; I < Size; ++I)
130b510cdb8SSteven Wu     new (&get(I)) Slot(nullptr);
131b510cdb8SSteven Wu 
132b510cdb8SSteven Wu   static_assert(
133b510cdb8SSteven Wu       std::is_trivially_destructible<LazyAtomicPointer<TrieNode>>::value,
134b510cdb8SSteven Wu       "Expected no work in destructor for TrieNode");
135b510cdb8SSteven Wu }
136b510cdb8SSteven Wu 
137b510cdb8SSteven Wu // Sink the nodes down sub-trie when the object being inserted collides with
138b510cdb8SSteven Wu // the index of existing object in the trie. In this case, a new sub-trie needs
139b510cdb8SSteven Wu // to be allocated to hold existing object.
140b510cdb8SSteven Wu TrieSubtrie *TrieSubtrie::sink(
141b510cdb8SSteven Wu     size_t I, TrieContent &Content, size_t NumSubtrieBits, size_t NewI,
142b510cdb8SSteven Wu     function_ref<TrieSubtrie *(std::unique_ptr<TrieSubtrie>)> Saver) {
143b510cdb8SSteven Wu   // Create a new sub-trie that points to the existing object with the new
144b510cdb8SSteven Wu   // index for the next level.
145b510cdb8SSteven Wu   assert(NumSubtrieBits > 0);
146b510cdb8SSteven Wu   std::unique_ptr<TrieSubtrie> S = create(StartBit + NumBits, NumSubtrieBits);
147b510cdb8SSteven Wu 
148b510cdb8SSteven Wu   assert(NewI < Size);
149b510cdb8SSteven Wu   S->get(NewI).store(&Content);
150b510cdb8SSteven Wu 
151b510cdb8SSteven Wu   // Using compare_exchange to atomically add back the new sub-trie to the trie
152b510cdb8SSteven Wu   // in the place of the exsiting object.
153b510cdb8SSteven Wu   TrieNode *ExistingNode = &Content;
154b510cdb8SSteven Wu   assert(I < Size);
155b510cdb8SSteven Wu   if (get(I).compare_exchange_strong(ExistingNode, S.get()))
156b510cdb8SSteven Wu     return Saver(std::move(S));
157b510cdb8SSteven Wu 
158b510cdb8SSteven Wu   // Another thread created a subtrie already. Return it and let "S" be
159b510cdb8SSteven Wu   // destructed.
160b510cdb8SSteven Wu   return cast<TrieSubtrie>(ExistingNode);
161b510cdb8SSteven Wu }
162b510cdb8SSteven Wu 
163b510cdb8SSteven Wu class ThreadSafeTrieRawHashMapBase::ImplType final
164b510cdb8SSteven Wu     : private TrailingObjects<ThreadSafeTrieRawHashMapBase::ImplType,
165b510cdb8SSteven Wu                               TrieSubtrie> {
166b510cdb8SSteven Wu public:
167b510cdb8SSteven Wu   static std::unique_ptr<ImplType> create(size_t StartBit, size_t NumBits) {
168b510cdb8SSteven Wu     size_t Size = sizeof(ImplType) + TrieSubtrie::sizeToAlloc(NumBits);
169b510cdb8SSteven Wu     void *Memory = ::operator new(Size);
170b510cdb8SSteven Wu     ImplType *Impl = ::new (Memory) ImplType(StartBit, NumBits);
171b510cdb8SSteven Wu     return std::unique_ptr<ImplType>(Impl);
172b510cdb8SSteven Wu   }
173b510cdb8SSteven Wu 
174b510cdb8SSteven Wu   // Save the Subtrie into the ownship list of the trie structure in a
175b510cdb8SSteven Wu   // thread-safe way. The ownership transfer is done by compare_exchange the
176b510cdb8SSteven Wu   // pointer value inside the unique_ptr.
177b510cdb8SSteven Wu   TrieSubtrie *save(std::unique_ptr<TrieSubtrie> S) {
178b510cdb8SSteven Wu     assert(!S->Next && "Expected S to a freshly-constructed leaf");
179b510cdb8SSteven Wu 
180b510cdb8SSteven Wu     TrieSubtrie *CurrentHead = nullptr;
181b510cdb8SSteven Wu     // Add ownership of "S" to front of the list, so that Root -> S ->
182b510cdb8SSteven Wu     // Root.Next. This works by repeatedly setting S->Next to a candidate value
183b510cdb8SSteven Wu     // of Root.Next (initially nullptr), then setting Root.Next to S once the
184b510cdb8SSteven Wu     // candidate matches reality.
185b510cdb8SSteven Wu     while (!getRoot()->Next.compare_exchange_weak(CurrentHead, S.get()))
186b510cdb8SSteven Wu       S->Next.exchange(CurrentHead);
187b510cdb8SSteven Wu 
188b510cdb8SSteven Wu     // Ownership transferred to subtrie successfully. Release the unique_ptr.
189b510cdb8SSteven Wu     return S.release();
190b510cdb8SSteven Wu   }
191b510cdb8SSteven Wu 
192b510cdb8SSteven Wu   // Get the root which is the trailing object.
193b510cdb8SSteven Wu   TrieSubtrie *getRoot() { return getTrailingObjects<TrieSubtrie>(); }
194b510cdb8SSteven Wu 
195b510cdb8SSteven Wu   static void *operator new(size_t Size) { return ::operator new(Size); }
196b510cdb8SSteven Wu   void operator delete(void *Ptr) { ::operator delete(Ptr); }
197b510cdb8SSteven Wu 
198b510cdb8SSteven Wu   /// FIXME: This should take a function that allocates and constructs the
199b510cdb8SSteven Wu   /// content lazily (taking the hash as a separate parameter), in case of
200b510cdb8SSteven Wu   /// collision.
201b510cdb8SSteven Wu   ThreadSafeAllocator<BumpPtrAllocator> ContentAlloc;
202b510cdb8SSteven Wu 
203b510cdb8SSteven Wu private:
204b510cdb8SSteven Wu   friend class TrailingObjects;
205b510cdb8SSteven Wu 
206b510cdb8SSteven Wu   ImplType(size_t StartBit, size_t NumBits) {
207b510cdb8SSteven Wu     ::new (getRoot()) TrieSubtrie(StartBit, NumBits);
208b510cdb8SSteven Wu   }
209b510cdb8SSteven Wu };
210b510cdb8SSteven Wu 
211b510cdb8SSteven Wu ThreadSafeTrieRawHashMapBase::ImplType &
212b510cdb8SSteven Wu ThreadSafeTrieRawHashMapBase::getOrCreateImpl() {
213b510cdb8SSteven Wu   if (ImplType *Impl = ImplPtr.load())
214b510cdb8SSteven Wu     return *Impl;
215b510cdb8SSteven Wu 
216b510cdb8SSteven Wu   // Create a new ImplType and store it if another thread doesn't do so first.
217b510cdb8SSteven Wu   // If another thread wins this one is destroyed locally.
218b510cdb8SSteven Wu   std::unique_ptr<ImplType> Impl = ImplType::create(0, NumRootBits);
219b510cdb8SSteven Wu   ImplType *ExistingImpl = nullptr;
220b510cdb8SSteven Wu 
221b510cdb8SSteven Wu   // If the ownership transferred succesfully, release unique_ptr and return
222b510cdb8SSteven Wu   // the pointer to the new ImplType.
223b510cdb8SSteven Wu   if (ImplPtr.compare_exchange_strong(ExistingImpl, Impl.get()))
224b510cdb8SSteven Wu     return *Impl.release();
225b510cdb8SSteven Wu 
226b510cdb8SSteven Wu   // Already created, return the existing ImplType.
227b510cdb8SSteven Wu   return *ExistingImpl;
228b510cdb8SSteven Wu }
229b510cdb8SSteven Wu 
230b510cdb8SSteven Wu ThreadSafeTrieRawHashMapBase::PointerBase
231b510cdb8SSteven Wu ThreadSafeTrieRawHashMapBase::find(ArrayRef<uint8_t> Hash) const {
232b510cdb8SSteven Wu   assert(!Hash.empty() && "Uninitialized hash");
233b510cdb8SSteven Wu 
234b510cdb8SSteven Wu   ImplType *Impl = ImplPtr.load();
235b510cdb8SSteven Wu   if (!Impl)
236b510cdb8SSteven Wu     return PointerBase();
237b510cdb8SSteven Wu 
238b510cdb8SSteven Wu   TrieSubtrie *S = Impl->getRoot();
239b510cdb8SSteven Wu   TrieHashIndexGenerator IndexGen{NumRootBits, NumSubtrieBits, Hash};
240b510cdb8SSteven Wu   size_t Index = IndexGen.next();
241b510cdb8SSteven Wu   while (Index != IndexGen.end()) {
242b510cdb8SSteven Wu     // Try to set the content.
243b510cdb8SSteven Wu     TrieNode *Existing = S->get(Index);
244b510cdb8SSteven Wu     if (!Existing)
245b510cdb8SSteven Wu       return PointerBase(S, Index, *IndexGen.StartBit);
246b510cdb8SSteven Wu 
247b510cdb8SSteven Wu     // Check for an exact match.
248b510cdb8SSteven Wu     if (auto *ExistingContent = dyn_cast<TrieContent>(Existing))
249b510cdb8SSteven Wu       return ExistingContent->getHash() == Hash
250b510cdb8SSteven Wu                  ? PointerBase(ExistingContent->getValuePointer())
251b510cdb8SSteven Wu                  : PointerBase(S, Index, *IndexGen.StartBit);
252b510cdb8SSteven Wu 
253b510cdb8SSteven Wu     Index = IndexGen.next();
254b510cdb8SSteven Wu     S = cast<TrieSubtrie>(Existing);
255b510cdb8SSteven Wu   }
256b510cdb8SSteven Wu   llvm_unreachable("failed to locate the node after consuming all hash bytes");
257b510cdb8SSteven Wu }
258b510cdb8SSteven Wu 
259b510cdb8SSteven Wu ThreadSafeTrieRawHashMapBase::PointerBase ThreadSafeTrieRawHashMapBase::insert(
260b510cdb8SSteven Wu     PointerBase Hint, ArrayRef<uint8_t> Hash,
261b510cdb8SSteven Wu     function_ref<const uint8_t *(void *Mem, ArrayRef<uint8_t> Hash)>
262b510cdb8SSteven Wu         Constructor) {
263b510cdb8SSteven Wu   assert(!Hash.empty() && "Uninitialized hash");
264b510cdb8SSteven Wu 
265b510cdb8SSteven Wu   ImplType &Impl = getOrCreateImpl();
266b510cdb8SSteven Wu   TrieSubtrie *S = Impl.getRoot();
267b510cdb8SSteven Wu   TrieHashIndexGenerator IndexGen{NumRootBits, NumSubtrieBits, Hash};
268b510cdb8SSteven Wu   size_t Index;
269b510cdb8SSteven Wu   if (Hint.isHint()) {
270b510cdb8SSteven Wu     S = static_cast<TrieSubtrie *>(Hint.P);
271b510cdb8SSteven Wu     Index = IndexGen.hint(Hint.I, Hint.B);
272b510cdb8SSteven Wu   } else {
273b510cdb8SSteven Wu     Index = IndexGen.next();
274b510cdb8SSteven Wu   }
275b510cdb8SSteven Wu 
276b510cdb8SSteven Wu   while (Index != IndexGen.end()) {
277b510cdb8SSteven Wu     // Load the node from the slot, allocating and calling the constructor if
278b510cdb8SSteven Wu     // the slot is empty.
279b510cdb8SSteven Wu     bool Generated = false;
280b510cdb8SSteven Wu     TrieNode &Existing = S->get(Index).loadOrGenerate([&]() {
281b510cdb8SSteven Wu       Generated = true;
282b510cdb8SSteven Wu 
283b510cdb8SSteven Wu       // Construct the value itself at the tail.
284b510cdb8SSteven Wu       uint8_t *Memory = reinterpret_cast<uint8_t *>(
285b510cdb8SSteven Wu           Impl.ContentAlloc.Allocate(ContentAllocSize, ContentAllocAlign));
286b510cdb8SSteven Wu       const uint8_t *HashStorage = Constructor(Memory + ContentOffset, Hash);
287b510cdb8SSteven Wu 
288b510cdb8SSteven Wu       // Construct the TrieContent header, passing in the offset to the hash.
289b510cdb8SSteven Wu       TrieContent *Content = ::new (Memory)
290b510cdb8SSteven Wu           TrieContent(ContentOffset, Hash.size(), HashStorage - Memory);
291b510cdb8SSteven Wu       assert(Hash == Content->getHash() && "Hash not properly initialized");
292b510cdb8SSteven Wu       return Content;
293b510cdb8SSteven Wu     });
294b510cdb8SSteven Wu     // If we just generated it, return it!
295b510cdb8SSteven Wu     if (Generated)
296b510cdb8SSteven Wu       return PointerBase(cast<TrieContent>(Existing).getValuePointer());
297b510cdb8SSteven Wu 
298b510cdb8SSteven Wu     if (auto *ST = dyn_cast<TrieSubtrie>(&Existing)) {
299b510cdb8SSteven Wu       S = ST;
300b510cdb8SSteven Wu       Index = IndexGen.next();
301b510cdb8SSteven Wu       continue;
302b510cdb8SSteven Wu     }
303b510cdb8SSteven Wu 
304b510cdb8SSteven Wu     // Return the existing content if it's an exact match!
305b510cdb8SSteven Wu     auto &ExistingContent = cast<TrieContent>(Existing);
306b510cdb8SSteven Wu     if (ExistingContent.getHash() == Hash)
307b510cdb8SSteven Wu       return PointerBase(ExistingContent.getValuePointer());
308b510cdb8SSteven Wu 
309b510cdb8SSteven Wu     // Sink the existing content as long as the indexes match.
310b510cdb8SSteven Wu     size_t NextIndex = IndexGen.next();
311b510cdb8SSteven Wu     while (NextIndex != IndexGen.end()) {
312b510cdb8SSteven Wu       size_t NewIndexForExistingContent =
313b510cdb8SSteven Wu           IndexGen.getCollidingBits(ExistingContent.getHash());
314b510cdb8SSteven Wu       S = S->sink(Index, ExistingContent, IndexGen.getNumBits(),
315b510cdb8SSteven Wu                   NewIndexForExistingContent,
316b510cdb8SSteven Wu                   [&Impl](std::unique_ptr<TrieSubtrie> S) {
317b510cdb8SSteven Wu                     return Impl.save(std::move(S));
318b510cdb8SSteven Wu                   });
319b510cdb8SSteven Wu       Index = NextIndex;
320b510cdb8SSteven Wu 
321b510cdb8SSteven Wu       // Found the difference.
322b510cdb8SSteven Wu       if (NextIndex != NewIndexForExistingContent)
323b510cdb8SSteven Wu         break;
324b510cdb8SSteven Wu 
325b510cdb8SSteven Wu       NextIndex = IndexGen.next();
326b510cdb8SSteven Wu     }
327b510cdb8SSteven Wu   }
328b510cdb8SSteven Wu   llvm_unreachable("failed to insert the node after consuming all hash bytes");
329b510cdb8SSteven Wu }
330b510cdb8SSteven Wu 
331b510cdb8SSteven Wu ThreadSafeTrieRawHashMapBase::ThreadSafeTrieRawHashMapBase(
332b510cdb8SSteven Wu     size_t ContentAllocSize, size_t ContentAllocAlign, size_t ContentOffset,
333b510cdb8SSteven Wu     std::optional<size_t> NumRootBits, std::optional<size_t> NumSubtrieBits)
334b510cdb8SSteven Wu     : ContentAllocSize(ContentAllocSize), ContentAllocAlign(ContentAllocAlign),
335b510cdb8SSteven Wu       ContentOffset(ContentOffset),
336b510cdb8SSteven Wu       NumRootBits(NumRootBits ? *NumRootBits : DefaultNumRootBits),
337b510cdb8SSteven Wu       NumSubtrieBits(NumSubtrieBits ? *NumSubtrieBits : DefaultNumSubtrieBits),
338b510cdb8SSteven Wu       ImplPtr(nullptr) {
339b510cdb8SSteven Wu   // Assertion checks for reasonable configuration. The settings below are not
340b510cdb8SSteven Wu   // hard limits on most platforms, but a reasonable configuration should fall
341b510cdb8SSteven Wu   // within those limits.
342b510cdb8SSteven Wu   assert((!NumRootBits || *NumRootBits < 20) &&
343b510cdb8SSteven Wu          "Root should have fewer than ~1M slots");
344b510cdb8SSteven Wu   assert((!NumSubtrieBits || *NumSubtrieBits < 10) &&
345b510cdb8SSteven Wu          "Subtries should have fewer than ~1K slots");
346b510cdb8SSteven Wu }
347b510cdb8SSteven Wu 
348b510cdb8SSteven Wu ThreadSafeTrieRawHashMapBase::ThreadSafeTrieRawHashMapBase(
349b510cdb8SSteven Wu     ThreadSafeTrieRawHashMapBase &&RHS)
350b510cdb8SSteven Wu     : ContentAllocSize(RHS.ContentAllocSize),
351b510cdb8SSteven Wu       ContentAllocAlign(RHS.ContentAllocAlign),
352b510cdb8SSteven Wu       ContentOffset(RHS.ContentOffset), NumRootBits(RHS.NumRootBits),
353b510cdb8SSteven Wu       NumSubtrieBits(RHS.NumSubtrieBits) {
354b510cdb8SSteven Wu   // Steal the root from RHS.
355b510cdb8SSteven Wu   ImplPtr = RHS.ImplPtr.exchange(nullptr);
356b510cdb8SSteven Wu }
357b510cdb8SSteven Wu 
358b510cdb8SSteven Wu ThreadSafeTrieRawHashMapBase::~ThreadSafeTrieRawHashMapBase() {
359b510cdb8SSteven Wu   assert(!ImplPtr.load() && "Expected subclass to call destroyImpl()");
360b510cdb8SSteven Wu }
361b510cdb8SSteven Wu 
362b510cdb8SSteven Wu void ThreadSafeTrieRawHashMapBase::destroyImpl(
363b510cdb8SSteven Wu     function_ref<void(void *)> Destructor) {
364b510cdb8SSteven Wu   std::unique_ptr<ImplType> Impl(ImplPtr.exchange(nullptr));
365b510cdb8SSteven Wu   if (!Impl)
366b510cdb8SSteven Wu     return;
367b510cdb8SSteven Wu 
368b510cdb8SSteven Wu   // Destroy content nodes throughout trie. Avoid destroying any subtries since
369b510cdb8SSteven Wu   // we need TrieNode::classof() to find the content nodes.
370b510cdb8SSteven Wu   //
371b510cdb8SSteven Wu   // FIXME: Once we have bitsets (see FIXME in TrieSubtrie class), use them
372b510cdb8SSteven Wu   // facilitate sparse iteration here.
373b510cdb8SSteven Wu   if (Destructor)
374b510cdb8SSteven Wu     for (TrieSubtrie *Trie = Impl->getRoot(); Trie; Trie = Trie->Next.load())
375b510cdb8SSteven Wu       for (unsigned I = 0; I < Trie->size(); ++I)
376b510cdb8SSteven Wu         if (auto *Content = dyn_cast_or_null<TrieContent>(Trie->load(I)))
377b510cdb8SSteven Wu           Destructor(Content->getValuePointer());
378b510cdb8SSteven Wu 
379b510cdb8SSteven Wu   // Destroy the subtries. Incidentally, this destroys them in the reverse order
380b510cdb8SSteven Wu   // of saving.
381b510cdb8SSteven Wu   TrieSubtrie *Trie = Impl->getRoot()->Next;
382b510cdb8SSteven Wu   while (Trie) {
383b510cdb8SSteven Wu     TrieSubtrie *Next = Trie->Next.exchange(nullptr);
384b510cdb8SSteven Wu     delete Trie;
385b510cdb8SSteven Wu     Trie = Next;
386b510cdb8SSteven Wu   }
387b510cdb8SSteven Wu }
388b510cdb8SSteven Wu 
389b510cdb8SSteven Wu ThreadSafeTrieRawHashMapBase::PointerBase
390b510cdb8SSteven Wu ThreadSafeTrieRawHashMapBase::getRoot() const {
391b510cdb8SSteven Wu   ImplType *Impl = ImplPtr.load();
392b510cdb8SSteven Wu   if (!Impl)
393b510cdb8SSteven Wu     return PointerBase();
394b510cdb8SSteven Wu   return PointerBase(Impl->getRoot());
395b510cdb8SSteven Wu }
396b510cdb8SSteven Wu 
397b510cdb8SSteven Wu unsigned ThreadSafeTrieRawHashMapBase::getStartBit(
398b510cdb8SSteven Wu     ThreadSafeTrieRawHashMapBase::PointerBase P) const {
399b510cdb8SSteven Wu   assert(!P.isHint() && "Not a valid trie");
400b510cdb8SSteven Wu   if (!P.P)
401b510cdb8SSteven Wu     return 0;
402b510cdb8SSteven Wu   if (auto *S = dyn_cast<TrieSubtrie>((TrieNode *)P.P))
403b510cdb8SSteven Wu     return S->StartBit;
404b510cdb8SSteven Wu   return 0;
405b510cdb8SSteven Wu }
406b510cdb8SSteven Wu 
407b510cdb8SSteven Wu unsigned ThreadSafeTrieRawHashMapBase::getNumBits(
408b510cdb8SSteven Wu     ThreadSafeTrieRawHashMapBase::PointerBase P) const {
409b510cdb8SSteven Wu   assert(!P.isHint() && "Not a valid trie");
410b510cdb8SSteven Wu   if (!P.P)
411b510cdb8SSteven Wu     return 0;
412b510cdb8SSteven Wu   if (auto *S = dyn_cast<TrieSubtrie>((TrieNode *)P.P))
413b510cdb8SSteven Wu     return S->NumBits;
414b510cdb8SSteven Wu   return 0;
415b510cdb8SSteven Wu }
416b510cdb8SSteven Wu 
417b510cdb8SSteven Wu unsigned ThreadSafeTrieRawHashMapBase::getNumSlotUsed(
418b510cdb8SSteven Wu     ThreadSafeTrieRawHashMapBase::PointerBase P) const {
419b510cdb8SSteven Wu   assert(!P.isHint() && "Not a valid trie");
420b510cdb8SSteven Wu   if (!P.P)
421b510cdb8SSteven Wu     return 0;
422b510cdb8SSteven Wu   auto *S = dyn_cast<TrieSubtrie>((TrieNode *)P.P);
423b510cdb8SSteven Wu   if (!S)
424b510cdb8SSteven Wu     return 0;
425b510cdb8SSteven Wu   unsigned Num = 0;
426b510cdb8SSteven Wu   for (unsigned I = 0, E = S->size(); I < E; ++I)
427ba8d9ce8SSteven Wu     if (S->load(I))
428b510cdb8SSteven Wu       ++Num;
429b510cdb8SSteven Wu   return Num;
430b510cdb8SSteven Wu }
431b510cdb8SSteven Wu 
432b510cdb8SSteven Wu std::string ThreadSafeTrieRawHashMapBase::getTriePrefixAsString(
433b510cdb8SSteven Wu     ThreadSafeTrieRawHashMapBase::PointerBase P) const {
434b510cdb8SSteven Wu   assert(!P.isHint() && "Not a valid trie");
435b510cdb8SSteven Wu   if (!P.P)
436b510cdb8SSteven Wu     return "";
437b510cdb8SSteven Wu 
438b510cdb8SSteven Wu   auto *S = dyn_cast<TrieSubtrie>((TrieNode *)P.P);
439b510cdb8SSteven Wu   if (!S || !S->IsSubtrie)
440b510cdb8SSteven Wu     return "";
441b510cdb8SSteven Wu 
442b510cdb8SSteven Wu   // Find a TrieContent node which has hash stored. Depth search following the
443b510cdb8SSteven Wu   // first used slot until a TrieContent node is found.
444b510cdb8SSteven Wu   TrieSubtrie *Current = S;
445b510cdb8SSteven Wu   TrieContent *Node = nullptr;
446b510cdb8SSteven Wu   while (Current) {
447b510cdb8SSteven Wu     TrieSubtrie *Next = nullptr;
448b510cdb8SSteven Wu     // Find first used slot in the trie.
449b510cdb8SSteven Wu     for (unsigned I = 0, E = Current->size(); I < E; ++I) {
450b510cdb8SSteven Wu       auto *S = Current->load(I);
451b510cdb8SSteven Wu       if (!S)
452b510cdb8SSteven Wu         continue;
453b510cdb8SSteven Wu 
454b510cdb8SSteven Wu       if (auto *Content = dyn_cast<TrieContent>(S))
455b510cdb8SSteven Wu         Node = Content;
456b510cdb8SSteven Wu       else if (auto *Sub = dyn_cast<TrieSubtrie>(S))
457b510cdb8SSteven Wu         Next = Sub;
458b510cdb8SSteven Wu       break;
459b510cdb8SSteven Wu     }
460b510cdb8SSteven Wu 
461b510cdb8SSteven Wu     // Found the node.
462b510cdb8SSteven Wu     if (Node)
463b510cdb8SSteven Wu       break;
464b510cdb8SSteven Wu 
465b510cdb8SSteven Wu     // Continue to the next level if the node is not found.
466b510cdb8SSteven Wu     Current = Next;
467b510cdb8SSteven Wu   }
468b510cdb8SSteven Wu 
469b510cdb8SSteven Wu   assert(Node && "malformed trie, cannot find TrieContent on leaf node");
470b510cdb8SSteven Wu   // The prefix for the current trie is the first `StartBit` of the content
471b510cdb8SSteven Wu   // stored underneath this subtrie.
472b510cdb8SSteven Wu   std::string Str;
473b510cdb8SSteven Wu   raw_string_ostream SS(Str);
474b510cdb8SSteven Wu 
475b510cdb8SSteven Wu   unsigned StartFullBytes = (S->StartBit + 1) / 8 - 1;
476b510cdb8SSteven Wu   SS << toHex(toStringRef(Node->getHash()).take_front(StartFullBytes),
477b510cdb8SSteven Wu               /*LowerCase=*/true);
478b510cdb8SSteven Wu 
479b510cdb8SSteven Wu   // For the part of the prefix that doesn't fill a byte, print raw bit values.
480b510cdb8SSteven Wu   std::string Bits;
481b510cdb8SSteven Wu   for (unsigned I = StartFullBytes * 8, E = S->StartBit; I < E; ++I) {
482b510cdb8SSteven Wu     unsigned Index = I / 8;
483b510cdb8SSteven Wu     unsigned Offset = 7 - I % 8;
484b510cdb8SSteven Wu     Bits.push_back('0' + ((Node->getHash()[Index] >> Offset) & 1));
485b510cdb8SSteven Wu   }
486b510cdb8SSteven Wu 
487b510cdb8SSteven Wu   if (!Bits.empty())
488b510cdb8SSteven Wu     SS << "[" << Bits << "]";
489b510cdb8SSteven Wu 
490b510cdb8SSteven Wu   return SS.str();
491b510cdb8SSteven Wu }
492b510cdb8SSteven Wu 
493b510cdb8SSteven Wu unsigned ThreadSafeTrieRawHashMapBase::getNumTries() const {
494b510cdb8SSteven Wu   ImplType *Impl = ImplPtr.load();
495b510cdb8SSteven Wu   if (!Impl)
496b510cdb8SSteven Wu     return 0;
497b510cdb8SSteven Wu   unsigned Num = 0;
498b510cdb8SSteven Wu   for (TrieSubtrie *Trie = Impl->getRoot(); Trie; Trie = Trie->Next.load())
499b510cdb8SSteven Wu     ++Num;
500b510cdb8SSteven Wu   return Num;
501b510cdb8SSteven Wu }
502b510cdb8SSteven Wu 
503b510cdb8SSteven Wu ThreadSafeTrieRawHashMapBase::PointerBase
504b510cdb8SSteven Wu ThreadSafeTrieRawHashMapBase::getNextTrie(
505b510cdb8SSteven Wu     ThreadSafeTrieRawHashMapBase::PointerBase P) const {
506b510cdb8SSteven Wu   assert(!P.isHint() && "Not a valid trie");
507b510cdb8SSteven Wu   if (!P.P)
508b510cdb8SSteven Wu     return PointerBase();
509b510cdb8SSteven Wu   auto *S = dyn_cast<TrieSubtrie>((TrieNode *)P.P);
510b510cdb8SSteven Wu   if (!S)
511b510cdb8SSteven Wu     return PointerBase();
512b510cdb8SSteven Wu   if (auto *E = S->Next.load())
513b510cdb8SSteven Wu     return PointerBase(E);
514b510cdb8SSteven Wu   return PointerBase();
515b510cdb8SSteven Wu }
516