xref: /llvm-project/llvm/include/llvm/ADT/SmallPtrSet.h (revision a8a494faab8af60754c4647dbb7b24bc86a80aab)
1 //===- llvm/ADT/SmallPtrSet.h - 'Normally small' pointer set ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the SmallPtrSet class.  See the doxygen comment for
11 /// SmallPtrSetImplBase for more details on the algorithm used.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_ADT_SMALLPTRSET_H
16 #define LLVM_ADT_SMALLPTRSET_H
17 
18 #include "llvm/ADT/EpochTracker.h"
19 #include "llvm/Support/MathExtras.h"
20 #include "llvm/Support/ReverseIteration.h"
21 #include "llvm/Support/type_traits.h"
22 #include <algorithm>
23 #include <cassert>
24 #include <cstddef>
25 #include <cstdlib>
26 #include <cstring>
27 #include <initializer_list>
28 #include <iterator>
29 #include <limits>
30 #include <utility>
31 
32 namespace llvm {
33 
34 /// SmallPtrSetImplBase - This is the common code shared among all the
35 /// SmallPtrSet<>'s, which is almost everything.  SmallPtrSet has two modes, one
36 /// for small and one for large sets.
37 ///
38 /// Small sets use an array of pointers allocated in the SmallPtrSet object,
39 /// which is treated as a simple array of pointers.  When a pointer is added to
40 /// the set, the array is scanned to see if the element already exists, if not
41 /// the element is 'pushed back' onto the array.  If we run out of space in the
42 /// array, we grow into the 'large set' case.  SmallSet should be used when the
43 /// sets are often small.  In this case, no memory allocation is used, and only
44 /// light-weight and cache-efficient scanning is used.
45 ///
46 /// Large sets use a classic exponentially-probed hash table.  Empty buckets are
47 /// represented with an illegal pointer value (-1) to allow null pointers to be
48 /// inserted.  Tombstones are represented with another illegal pointer value
49 /// (-2), to allow deletion.  The hash table is resized when the table is 3/4 or
50 /// more.  When this happens, the table is doubled in size.
51 ///
52 class SmallPtrSetImplBase : public DebugEpochBase {
53   friend class SmallPtrSetIteratorImpl;
54 
55 protected:
56   /// The current set of buckets, in either small or big representation.
57   const void **CurArray;
58   /// CurArraySize - The allocated size of CurArray, always a power of two.
59   unsigned CurArraySize;
60 
61   /// Number of elements in CurArray that contain a value or are a tombstone.
62   /// If small, all these elements are at the beginning of CurArray and the rest
63   /// is uninitialized.
64   unsigned NumNonEmpty;
65   /// Number of tombstones in CurArray.
66   unsigned NumTombstones;
67   /// Whether the set is in small representation.
68   bool IsSmall;
69 
70   // Helpers to copy and move construct a SmallPtrSet.
71   SmallPtrSetImplBase(const void **SmallStorage,
72                       const SmallPtrSetImplBase &that);
73   SmallPtrSetImplBase(const void **SmallStorage, unsigned SmallSize,
74                       const void **RHSSmallStorage, SmallPtrSetImplBase &&that);
75 
76   explicit SmallPtrSetImplBase(const void **SmallStorage, unsigned SmallSize)
77       : CurArray(SmallStorage), CurArraySize(SmallSize), NumNonEmpty(0),
78         NumTombstones(0), IsSmall(true) {
79     assert(SmallSize && (SmallSize & (SmallSize-1)) == 0 &&
80            "Initial size must be a power of two!");
81   }
82 
83   ~SmallPtrSetImplBase() {
84     if (!isSmall())
85       free(CurArray);
86   }
87 
88 public:
89   using size_type = unsigned;
90 
91   SmallPtrSetImplBase &operator=(const SmallPtrSetImplBase &) = delete;
92 
93   [[nodiscard]] bool empty() const { return size() == 0; }
94   size_type size() const { return NumNonEmpty - NumTombstones; }
95   size_type capacity() const { return CurArraySize; }
96 
97   void clear() {
98     incrementEpoch();
99     // If the capacity of the array is huge, and the # elements used is small,
100     // shrink the array.
101     if (!isSmall()) {
102       if (size() * 4 < CurArraySize && CurArraySize > 32)
103         return shrink_and_clear();
104       // Fill the array with empty markers.
105       memset(CurArray, -1, CurArraySize * sizeof(void *));
106     }
107 
108     NumNonEmpty = 0;
109     NumTombstones = 0;
110   }
111 
112   void reserve(size_type NumEntries) {
113     incrementEpoch();
114     // Do nothing if we're given zero as a reservation size.
115     if (NumEntries == 0)
116       return;
117     // No need to expand if we're small and NumEntries will fit in the space.
118     if (isSmall() && NumEntries <= CurArraySize)
119       return;
120     // insert_imp_big will reallocate if stores is more than 75% full, on the
121     // /final/ insertion.
122     if (!isSmall() && ((NumEntries - 1) * 4) < (CurArraySize * 3))
123       return;
124     // We must Grow -- find the size where we'd be 75% full, then round up to
125     // the next power of two.
126     size_type NewSize = NumEntries + (NumEntries / 3);
127     NewSize = 1 << (Log2_32_Ceil(NewSize) + 1);
128     // Like insert_imp_big, always allocate at least 128 elements.
129     NewSize = std::max(128u, NewSize);
130     Grow(NewSize);
131   }
132 
133 protected:
134   static void *getTombstoneMarker() { return reinterpret_cast<void*>(-2); }
135 
136   static void *getEmptyMarker() {
137     // Note that -1 is chosen to make clear() efficiently implementable with
138     // memset and because it's not a valid pointer value.
139     return reinterpret_cast<void*>(-1);
140   }
141 
142   const void **EndPointer() const {
143     return isSmall() ? CurArray + NumNonEmpty : CurArray + CurArraySize;
144   }
145 
146   /// insert_imp - This returns true if the pointer was new to the set, false if
147   /// it was already in the set.  This is hidden from the client so that the
148   /// derived class can check that the right type of pointer is passed in.
149   std::pair<const void *const *, bool> insert_imp(const void *Ptr) {
150     if (isSmall()) {
151       // Check to see if it is already in the set.
152       for (const void **APtr = CurArray, **E = CurArray + NumNonEmpty;
153            APtr != E; ++APtr) {
154         const void *Value = *APtr;
155         if (Value == Ptr)
156           return std::make_pair(APtr, false);
157       }
158 
159       // Nope, there isn't.  If we stay small, just 'pushback' now.
160       if (NumNonEmpty < CurArraySize) {
161         CurArray[NumNonEmpty++] = Ptr;
162         incrementEpoch();
163         return std::make_pair(CurArray + (NumNonEmpty - 1), true);
164       }
165       // Otherwise, hit the big set case, which will call grow.
166     }
167     return insert_imp_big(Ptr);
168   }
169 
170   /// erase_imp - If the set contains the specified pointer, remove it and
171   /// return true, otherwise return false.  This is hidden from the client so
172   /// that the derived class can check that the right type of pointer is passed
173   /// in.
174   bool erase_imp(const void * Ptr) {
175     if (isSmall()) {
176       for (const void **APtr = CurArray, **E = CurArray + NumNonEmpty;
177            APtr != E; ++APtr) {
178         if (*APtr == Ptr) {
179           *APtr = CurArray[--NumNonEmpty];
180           incrementEpoch();
181           return true;
182         }
183       }
184       return false;
185     }
186 
187     auto *Bucket = doFind(Ptr);
188     if (!Bucket)
189       return false;
190 
191     *const_cast<const void **>(Bucket) = getTombstoneMarker();
192     NumTombstones++;
193     // Treat this consistently from an API perspective, even if we don't
194     // actually invalidate iterators here.
195     incrementEpoch();
196     return true;
197   }
198 
199   /// Returns the raw pointer needed to construct an iterator.  If element not
200   /// found, this will be EndPointer.  Otherwise, it will be a pointer to the
201   /// slot which stores Ptr;
202   const void *const * find_imp(const void * Ptr) const {
203     if (isSmall()) {
204       // Linear search for the item.
205       for (const void *const *APtr = CurArray, *const *E =
206                                                    CurArray + NumNonEmpty;
207            APtr != E; ++APtr)
208         if (*APtr == Ptr)
209           return APtr;
210       return EndPointer();
211     }
212 
213     // Big set case.
214     if (auto *Bucket = doFind(Ptr))
215       return Bucket;
216     return EndPointer();
217   }
218 
219   bool contains_imp(const void *Ptr) const {
220     if (isSmall()) {
221       // Linear search for the item.
222       const void *const *APtr = CurArray;
223       const void *const *E = CurArray + NumNonEmpty;
224       for (; APtr != E; ++APtr)
225         if (*APtr == Ptr)
226           return true;
227       return false;
228     }
229 
230     return doFind(Ptr) != nullptr;
231   }
232 
233   bool isSmall() const { return IsSmall; }
234 
235 private:
236   std::pair<const void *const *, bool> insert_imp_big(const void *Ptr);
237 
238   const void *const *doFind(const void *Ptr) const;
239   const void * const *FindBucketFor(const void *Ptr) const;
240   void shrink_and_clear();
241 
242   /// Grow - Allocate a larger backing store for the buckets and move it over.
243   void Grow(unsigned NewSize);
244 
245 protected:
246   /// swap - Swaps the elements of two sets.
247   /// Note: This method assumes that both sets have the same small size.
248   void swap(const void **SmallStorage, const void **RHSSmallStorage,
249             SmallPtrSetImplBase &RHS);
250 
251   void copyFrom(const void **SmallStorage, const SmallPtrSetImplBase &RHS);
252   void moveFrom(const void **SmallStorage, unsigned SmallSize,
253                 const void **RHSSmallStorage, SmallPtrSetImplBase &&RHS);
254 
255 private:
256   /// Code shared by moveFrom() and move constructor.
257   void moveHelper(const void **SmallStorage, unsigned SmallSize,
258                   const void **RHSSmallStorage, SmallPtrSetImplBase &&RHS);
259   /// Code shared by copyFrom() and copy constructor.
260   void copyHelper(const SmallPtrSetImplBase &RHS);
261 };
262 
263 /// SmallPtrSetIteratorImpl - This is the common base class shared between all
264 /// instances of SmallPtrSetIterator.
265 class SmallPtrSetIteratorImpl {
266 protected:
267   const void *const *Bucket;
268   const void *const *End;
269 
270 public:
271   explicit SmallPtrSetIteratorImpl(const void *const *BP, const void*const *E)
272     : Bucket(BP), End(E) {
273     if (shouldReverseIterate()) {
274       RetreatIfNotValid();
275       return;
276     }
277     AdvanceIfNotValid();
278   }
279 
280   bool operator==(const SmallPtrSetIteratorImpl &RHS) const {
281     return Bucket == RHS.Bucket;
282   }
283   bool operator!=(const SmallPtrSetIteratorImpl &RHS) const {
284     return Bucket != RHS.Bucket;
285   }
286 
287 protected:
288   /// AdvanceIfNotValid - If the current bucket isn't valid, advance to a bucket
289   /// that is.   This is guaranteed to stop because the end() bucket is marked
290   /// valid.
291   void AdvanceIfNotValid() {
292     assert(Bucket <= End);
293     while (Bucket != End &&
294            (*Bucket == SmallPtrSetImplBase::getEmptyMarker() ||
295             *Bucket == SmallPtrSetImplBase::getTombstoneMarker()))
296       ++Bucket;
297   }
298   void RetreatIfNotValid() {
299     assert(Bucket >= End);
300     while (Bucket != End &&
301            (Bucket[-1] == SmallPtrSetImplBase::getEmptyMarker() ||
302             Bucket[-1] == SmallPtrSetImplBase::getTombstoneMarker())) {
303       --Bucket;
304     }
305   }
306 };
307 
308 /// SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet.
309 template <typename PtrTy>
310 class LLVM_DEBUGEPOCHBASE_HANDLEBASE_EMPTYBASE SmallPtrSetIterator
311     : public SmallPtrSetIteratorImpl,
312       DebugEpochBase::HandleBase {
313   using PtrTraits = PointerLikeTypeTraits<PtrTy>;
314 
315 public:
316   using value_type = PtrTy;
317   using reference = PtrTy;
318   using pointer = PtrTy;
319   using difference_type = std::ptrdiff_t;
320   using iterator_category = std::forward_iterator_tag;
321 
322   explicit SmallPtrSetIterator(const void *const *BP, const void *const *E,
323                                const DebugEpochBase &Epoch)
324       : SmallPtrSetIteratorImpl(BP, E), DebugEpochBase::HandleBase(&Epoch) {}
325 
326   // Most methods are provided by the base class.
327 
328   const PtrTy operator*() const {
329     assert(isHandleInSync() && "invalid iterator access!");
330     if (shouldReverseIterate()) {
331       assert(Bucket > End);
332       return PtrTraits::getFromVoidPointer(const_cast<void *>(Bucket[-1]));
333     }
334     assert(Bucket < End);
335     return PtrTraits::getFromVoidPointer(const_cast<void*>(*Bucket));
336   }
337 
338   inline SmallPtrSetIterator& operator++() {          // Preincrement
339     assert(isHandleInSync() && "invalid iterator access!");
340     if (shouldReverseIterate()) {
341       --Bucket;
342       RetreatIfNotValid();
343       return *this;
344     }
345     ++Bucket;
346     AdvanceIfNotValid();
347     return *this;
348   }
349 
350   SmallPtrSetIterator operator++(int) {        // Postincrement
351     SmallPtrSetIterator tmp = *this;
352     ++*this;
353     return tmp;
354   }
355 };
356 
357 /// A templated base class for \c SmallPtrSet which provides the
358 /// typesafe interface that is common across all small sizes.
359 ///
360 /// This is particularly useful for passing around between interface boundaries
361 /// to avoid encoding a particular small size in the interface boundary.
362 template <typename PtrType>
363 class SmallPtrSetImpl : public SmallPtrSetImplBase {
364   using ConstPtrType = typename add_const_past_pointer<PtrType>::type;
365   using PtrTraits = PointerLikeTypeTraits<PtrType>;
366   using ConstPtrTraits = PointerLikeTypeTraits<ConstPtrType>;
367 
368 protected:
369   // Forward constructors to the base.
370   using SmallPtrSetImplBase::SmallPtrSetImplBase;
371 
372 public:
373   using iterator = SmallPtrSetIterator<PtrType>;
374   using const_iterator = SmallPtrSetIterator<PtrType>;
375   using key_type = ConstPtrType;
376   using value_type = PtrType;
377 
378   SmallPtrSetImpl(const SmallPtrSetImpl &) = delete;
379 
380   /// Inserts Ptr if and only if there is no element in the container equal to
381   /// Ptr. The bool component of the returned pair is true if and only if the
382   /// insertion takes place, and the iterator component of the pair points to
383   /// the element equal to Ptr.
384   std::pair<iterator, bool> insert(PtrType Ptr) {
385     auto p = insert_imp(PtrTraits::getAsVoidPointer(Ptr));
386     return std::make_pair(makeIterator(p.first), p.second);
387   }
388 
389   /// Insert the given pointer with an iterator hint that is ignored. This is
390   /// identical to calling insert(Ptr), but allows SmallPtrSet to be used by
391   /// std::insert_iterator and std::inserter().
392   iterator insert(iterator, PtrType Ptr) {
393     return insert(Ptr).first;
394   }
395 
396   /// Remove pointer from the set.
397   ///
398   /// Returns whether the pointer was in the set. Invalidates iterators if
399   /// true is returned. To remove elements while iterating over the set, use
400   /// remove_if() instead.
401   bool erase(PtrType Ptr) {
402     return erase_imp(PtrTraits::getAsVoidPointer(Ptr));
403   }
404 
405   /// Remove elements that match the given predicate.
406   ///
407   /// This method is a safe replacement for the following pattern, which is not
408   /// valid, because the erase() calls would invalidate the iterator:
409   ///
410   ///     for (PtrType *Ptr : Set)
411   ///       if (Pred(P))
412   ///         Set.erase(P);
413   ///
414   /// Returns whether anything was removed. It is safe to read the set inside
415   /// the predicate function. However, the predicate must not modify the set
416   /// itself, only indicate a removal by returning true.
417   template <typename UnaryPredicate>
418   bool remove_if(UnaryPredicate P) {
419     bool Removed = false;
420     if (isSmall()) {
421       const void **APtr = CurArray, **E = CurArray + NumNonEmpty;
422       while (APtr != E) {
423         PtrType Ptr = PtrTraits::getFromVoidPointer(const_cast<void *>(*APtr));
424         if (P(Ptr)) {
425           *APtr = *--E;
426           --NumNonEmpty;
427           incrementEpoch();
428           Removed = true;
429         } else {
430           ++APtr;
431         }
432       }
433       return Removed;
434     }
435 
436     for (const void **APtr = CurArray, **E = EndPointer(); APtr != E; ++APtr) {
437       const void *Value = *APtr;
438       if (Value == getTombstoneMarker() || Value == getEmptyMarker())
439         continue;
440       PtrType Ptr = PtrTraits::getFromVoidPointer(const_cast<void *>(Value));
441       if (P(Ptr)) {
442         *APtr = getTombstoneMarker();
443         ++NumTombstones;
444         incrementEpoch();
445         Removed = true;
446       }
447     }
448     return Removed;
449   }
450 
451   /// count - Return 1 if the specified pointer is in the set, 0 otherwise.
452   size_type count(ConstPtrType Ptr) const {
453     return contains_imp(ConstPtrTraits::getAsVoidPointer(Ptr));
454   }
455   iterator find(ConstPtrType Ptr) const {
456     return makeIterator(find_imp(ConstPtrTraits::getAsVoidPointer(Ptr)));
457   }
458   bool contains(ConstPtrType Ptr) const {
459     return contains_imp(ConstPtrTraits::getAsVoidPointer(Ptr));
460   }
461 
462   template <typename IterT>
463   void insert(IterT I, IterT E) {
464     for (; I != E; ++I)
465       insert(*I);
466   }
467 
468   void insert(std::initializer_list<PtrType> IL) {
469     insert(IL.begin(), IL.end());
470   }
471 
472   iterator begin() const {
473     if (shouldReverseIterate())
474       return makeIterator(EndPointer() - 1);
475     return makeIterator(CurArray);
476   }
477   iterator end() const { return makeIterator(EndPointer()); }
478 
479 private:
480   /// Create an iterator that dereferences to same place as the given pointer.
481   iterator makeIterator(const void *const *P) const {
482     if (shouldReverseIterate())
483       return iterator(P == EndPointer() ? CurArray : P + 1, CurArray, *this);
484     return iterator(P, EndPointer(), *this);
485   }
486 };
487 
488 /// Equality comparison for SmallPtrSet.
489 ///
490 /// Iterates over elements of LHS confirming that each value from LHS is also in
491 /// RHS, and that no additional values are in RHS.
492 template <typename PtrType>
493 bool operator==(const SmallPtrSetImpl<PtrType> &LHS,
494                 const SmallPtrSetImpl<PtrType> &RHS) {
495   if (LHS.size() != RHS.size())
496     return false;
497 
498   for (const auto *KV : LHS)
499     if (!RHS.count(KV))
500       return false;
501 
502   return true;
503 }
504 
505 /// Inequality comparison for SmallPtrSet.
506 ///
507 /// Equivalent to !(LHS == RHS).
508 template <typename PtrType>
509 bool operator!=(const SmallPtrSetImpl<PtrType> &LHS,
510                 const SmallPtrSetImpl<PtrType> &RHS) {
511   return !(LHS == RHS);
512 }
513 
514 /// SmallPtrSet - This class implements a set which is optimized for holding
515 /// SmallSize or less elements.  This internally rounds up SmallSize to the next
516 /// power of two if it is not already a power of two.  See the comments above
517 /// SmallPtrSetImplBase for details of the algorithm.
518 template<class PtrType, unsigned SmallSize>
519 class SmallPtrSet : public SmallPtrSetImpl<PtrType> {
520   // In small mode SmallPtrSet uses linear search for the elements, so it is
521   // not a good idea to choose this value too high. You may consider using a
522   // DenseSet<> instead if you expect many elements in the set.
523   static_assert(SmallSize <= 32, "SmallSize should be small");
524 
525   using BaseT = SmallPtrSetImpl<PtrType>;
526 
527   // A constexpr version of llvm::bit_ceil.
528   // TODO: Replace this with std::bit_ceil once C++20 is available.
529   static constexpr size_t RoundUpToPowerOfTwo(size_t X) {
530     size_t C = 1;
531     size_t CMax = C << (std::numeric_limits<size_t>::digits - 1);
532     while (C < X && C < CMax)
533       C <<= 1;
534     return C;
535   }
536 
537   // Make sure that SmallSize is a power of two, round up if not.
538   static constexpr size_t SmallSizePowTwo = RoundUpToPowerOfTwo(SmallSize);
539   /// SmallStorage - Fixed size storage used in 'small mode'.
540   const void *SmallStorage[SmallSizePowTwo];
541 
542 public:
543   SmallPtrSet() : BaseT(SmallStorage, SmallSizePowTwo) {}
544   SmallPtrSet(const SmallPtrSet &that) : BaseT(SmallStorage, that) {}
545   SmallPtrSet(SmallPtrSet &&that)
546       : BaseT(SmallStorage, SmallSizePowTwo, that.SmallStorage,
547               std::move(that)) {}
548 
549   template<typename It>
550   SmallPtrSet(It I, It E) : BaseT(SmallStorage, SmallSizePowTwo) {
551     this->insert(I, E);
552   }
553 
554   SmallPtrSet(std::initializer_list<PtrType> IL)
555       : BaseT(SmallStorage, SmallSizePowTwo) {
556     this->insert(IL.begin(), IL.end());
557   }
558 
559   SmallPtrSet<PtrType, SmallSize> &
560   operator=(const SmallPtrSet<PtrType, SmallSize> &RHS) {
561     if (&RHS != this)
562       this->copyFrom(SmallStorage, RHS);
563     return *this;
564   }
565 
566   SmallPtrSet<PtrType, SmallSize> &
567   operator=(SmallPtrSet<PtrType, SmallSize> &&RHS) {
568     if (&RHS != this)
569       this->moveFrom(SmallStorage, SmallSizePowTwo, RHS.SmallStorage,
570                      std::move(RHS));
571     return *this;
572   }
573 
574   SmallPtrSet<PtrType, SmallSize> &
575   operator=(std::initializer_list<PtrType> IL) {
576     this->clear();
577     this->insert(IL.begin(), IL.end());
578     return *this;
579   }
580 
581   /// swap - Swaps the elements of two sets.
582   void swap(SmallPtrSet<PtrType, SmallSize> &RHS) {
583     SmallPtrSetImplBase::swap(SmallStorage, RHS.SmallStorage, RHS);
584   }
585 };
586 
587 } // end namespace llvm
588 
589 namespace std {
590 
591   /// Implement std::swap in terms of SmallPtrSet swap.
592   template<class T, unsigned N>
593   inline void swap(llvm::SmallPtrSet<T, N> &LHS, llvm::SmallPtrSet<T, N> &RHS) {
594     LHS.swap(RHS);
595   }
596 
597 } // end namespace std
598 
599 #endif // LLVM_ADT_SMALLPTRSET_H
600