1 //===- llvm/ADT/SmallPtrSet.h - 'Normally small' pointer set ----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file defines the SmallPtrSet class. See the doxygen comment for 11 /// SmallPtrSetImplBase for more details on the algorithm used. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_ADT_SMALLPTRSET_H 16 #define LLVM_ADT_SMALLPTRSET_H 17 18 #include "llvm/ADT/EpochTracker.h" 19 #include "llvm/Support/MathExtras.h" 20 #include "llvm/Support/ReverseIteration.h" 21 #include "llvm/Support/type_traits.h" 22 #include <algorithm> 23 #include <cassert> 24 #include <cstddef> 25 #include <cstdlib> 26 #include <cstring> 27 #include <initializer_list> 28 #include <iterator> 29 #include <limits> 30 #include <utility> 31 32 namespace llvm { 33 34 /// SmallPtrSetImplBase - This is the common code shared among all the 35 /// SmallPtrSet<>'s, which is almost everything. SmallPtrSet has two modes, one 36 /// for small and one for large sets. 37 /// 38 /// Small sets use an array of pointers allocated in the SmallPtrSet object, 39 /// which is treated as a simple array of pointers. When a pointer is added to 40 /// the set, the array is scanned to see if the element already exists, if not 41 /// the element is 'pushed back' onto the array. If we run out of space in the 42 /// array, we grow into the 'large set' case. SmallSet should be used when the 43 /// sets are often small. In this case, no memory allocation is used, and only 44 /// light-weight and cache-efficient scanning is used. 45 /// 46 /// Large sets use a classic exponentially-probed hash table. Empty buckets are 47 /// represented with an illegal pointer value (-1) to allow null pointers to be 48 /// inserted. Tombstones are represented with another illegal pointer value 49 /// (-2), to allow deletion. The hash table is resized when the table is 3/4 or 50 /// more. When this happens, the table is doubled in size. 51 /// 52 class SmallPtrSetImplBase : public DebugEpochBase { 53 friend class SmallPtrSetIteratorImpl; 54 55 protected: 56 /// The current set of buckets, in either small or big representation. 57 const void **CurArray; 58 /// CurArraySize - The allocated size of CurArray, always a power of two. 59 unsigned CurArraySize; 60 61 /// Number of elements in CurArray that contain a value or are a tombstone. 62 /// If small, all these elements are at the beginning of CurArray and the rest 63 /// is uninitialized. 64 unsigned NumNonEmpty; 65 /// Number of tombstones in CurArray. 66 unsigned NumTombstones; 67 /// Whether the set is in small representation. 68 bool IsSmall; 69 70 // Helpers to copy and move construct a SmallPtrSet. 71 SmallPtrSetImplBase(const void **SmallStorage, 72 const SmallPtrSetImplBase &that); 73 SmallPtrSetImplBase(const void **SmallStorage, unsigned SmallSize, 74 const void **RHSSmallStorage, SmallPtrSetImplBase &&that); 75 76 explicit SmallPtrSetImplBase(const void **SmallStorage, unsigned SmallSize) 77 : CurArray(SmallStorage), CurArraySize(SmallSize), NumNonEmpty(0), 78 NumTombstones(0), IsSmall(true) { 79 assert(SmallSize && (SmallSize & (SmallSize-1)) == 0 && 80 "Initial size must be a power of two!"); 81 } 82 83 ~SmallPtrSetImplBase() { 84 if (!isSmall()) 85 free(CurArray); 86 } 87 88 public: 89 using size_type = unsigned; 90 91 SmallPtrSetImplBase &operator=(const SmallPtrSetImplBase &) = delete; 92 93 [[nodiscard]] bool empty() const { return size() == 0; } 94 size_type size() const { return NumNonEmpty - NumTombstones; } 95 size_type capacity() const { return CurArraySize; } 96 97 void clear() { 98 incrementEpoch(); 99 // If the capacity of the array is huge, and the # elements used is small, 100 // shrink the array. 101 if (!isSmall()) { 102 if (size() * 4 < CurArraySize && CurArraySize > 32) 103 return shrink_and_clear(); 104 // Fill the array with empty markers. 105 memset(CurArray, -1, CurArraySize * sizeof(void *)); 106 } 107 108 NumNonEmpty = 0; 109 NumTombstones = 0; 110 } 111 112 void reserve(size_type NumEntries) { 113 incrementEpoch(); 114 // Do nothing if we're given zero as a reservation size. 115 if (NumEntries == 0) 116 return; 117 // No need to expand if we're small and NumEntries will fit in the space. 118 if (isSmall() && NumEntries <= CurArraySize) 119 return; 120 // insert_imp_big will reallocate if stores is more than 75% full, on the 121 // /final/ insertion. 122 if (!isSmall() && ((NumEntries - 1) * 4) < (CurArraySize * 3)) 123 return; 124 // We must Grow -- find the size where we'd be 75% full, then round up to 125 // the next power of two. 126 size_type NewSize = NumEntries + (NumEntries / 3); 127 NewSize = 1 << (Log2_32_Ceil(NewSize) + 1); 128 // Like insert_imp_big, always allocate at least 128 elements. 129 NewSize = std::max(128u, NewSize); 130 Grow(NewSize); 131 } 132 133 protected: 134 static void *getTombstoneMarker() { return reinterpret_cast<void*>(-2); } 135 136 static void *getEmptyMarker() { 137 // Note that -1 is chosen to make clear() efficiently implementable with 138 // memset and because it's not a valid pointer value. 139 return reinterpret_cast<void*>(-1); 140 } 141 142 const void **EndPointer() const { 143 return isSmall() ? CurArray + NumNonEmpty : CurArray + CurArraySize; 144 } 145 146 /// insert_imp - This returns true if the pointer was new to the set, false if 147 /// it was already in the set. This is hidden from the client so that the 148 /// derived class can check that the right type of pointer is passed in. 149 std::pair<const void *const *, bool> insert_imp(const void *Ptr) { 150 if (isSmall()) { 151 // Check to see if it is already in the set. 152 for (const void **APtr = CurArray, **E = CurArray + NumNonEmpty; 153 APtr != E; ++APtr) { 154 const void *Value = *APtr; 155 if (Value == Ptr) 156 return std::make_pair(APtr, false); 157 } 158 159 // Nope, there isn't. If we stay small, just 'pushback' now. 160 if (NumNonEmpty < CurArraySize) { 161 CurArray[NumNonEmpty++] = Ptr; 162 incrementEpoch(); 163 return std::make_pair(CurArray + (NumNonEmpty - 1), true); 164 } 165 // Otherwise, hit the big set case, which will call grow. 166 } 167 return insert_imp_big(Ptr); 168 } 169 170 /// erase_imp - If the set contains the specified pointer, remove it and 171 /// return true, otherwise return false. This is hidden from the client so 172 /// that the derived class can check that the right type of pointer is passed 173 /// in. 174 bool erase_imp(const void * Ptr) { 175 if (isSmall()) { 176 for (const void **APtr = CurArray, **E = CurArray + NumNonEmpty; 177 APtr != E; ++APtr) { 178 if (*APtr == Ptr) { 179 *APtr = CurArray[--NumNonEmpty]; 180 incrementEpoch(); 181 return true; 182 } 183 } 184 return false; 185 } 186 187 auto *Bucket = doFind(Ptr); 188 if (!Bucket) 189 return false; 190 191 *const_cast<const void **>(Bucket) = getTombstoneMarker(); 192 NumTombstones++; 193 // Treat this consistently from an API perspective, even if we don't 194 // actually invalidate iterators here. 195 incrementEpoch(); 196 return true; 197 } 198 199 /// Returns the raw pointer needed to construct an iterator. If element not 200 /// found, this will be EndPointer. Otherwise, it will be a pointer to the 201 /// slot which stores Ptr; 202 const void *const * find_imp(const void * Ptr) const { 203 if (isSmall()) { 204 // Linear search for the item. 205 for (const void *const *APtr = CurArray, *const *E = 206 CurArray + NumNonEmpty; 207 APtr != E; ++APtr) 208 if (*APtr == Ptr) 209 return APtr; 210 return EndPointer(); 211 } 212 213 // Big set case. 214 if (auto *Bucket = doFind(Ptr)) 215 return Bucket; 216 return EndPointer(); 217 } 218 219 bool contains_imp(const void *Ptr) const { 220 if (isSmall()) { 221 // Linear search for the item. 222 const void *const *APtr = CurArray; 223 const void *const *E = CurArray + NumNonEmpty; 224 for (; APtr != E; ++APtr) 225 if (*APtr == Ptr) 226 return true; 227 return false; 228 } 229 230 return doFind(Ptr) != nullptr; 231 } 232 233 bool isSmall() const { return IsSmall; } 234 235 private: 236 std::pair<const void *const *, bool> insert_imp_big(const void *Ptr); 237 238 const void *const *doFind(const void *Ptr) const; 239 const void * const *FindBucketFor(const void *Ptr) const; 240 void shrink_and_clear(); 241 242 /// Grow - Allocate a larger backing store for the buckets and move it over. 243 void Grow(unsigned NewSize); 244 245 protected: 246 /// swap - Swaps the elements of two sets. 247 /// Note: This method assumes that both sets have the same small size. 248 void swap(const void **SmallStorage, const void **RHSSmallStorage, 249 SmallPtrSetImplBase &RHS); 250 251 void copyFrom(const void **SmallStorage, const SmallPtrSetImplBase &RHS); 252 void moveFrom(const void **SmallStorage, unsigned SmallSize, 253 const void **RHSSmallStorage, SmallPtrSetImplBase &&RHS); 254 255 private: 256 /// Code shared by moveFrom() and move constructor. 257 void moveHelper(const void **SmallStorage, unsigned SmallSize, 258 const void **RHSSmallStorage, SmallPtrSetImplBase &&RHS); 259 /// Code shared by copyFrom() and copy constructor. 260 void copyHelper(const SmallPtrSetImplBase &RHS); 261 }; 262 263 /// SmallPtrSetIteratorImpl - This is the common base class shared between all 264 /// instances of SmallPtrSetIterator. 265 class SmallPtrSetIteratorImpl { 266 protected: 267 const void *const *Bucket; 268 const void *const *End; 269 270 public: 271 explicit SmallPtrSetIteratorImpl(const void *const *BP, const void*const *E) 272 : Bucket(BP), End(E) { 273 if (shouldReverseIterate()) { 274 RetreatIfNotValid(); 275 return; 276 } 277 AdvanceIfNotValid(); 278 } 279 280 bool operator==(const SmallPtrSetIteratorImpl &RHS) const { 281 return Bucket == RHS.Bucket; 282 } 283 bool operator!=(const SmallPtrSetIteratorImpl &RHS) const { 284 return Bucket != RHS.Bucket; 285 } 286 287 protected: 288 /// AdvanceIfNotValid - If the current bucket isn't valid, advance to a bucket 289 /// that is. This is guaranteed to stop because the end() bucket is marked 290 /// valid. 291 void AdvanceIfNotValid() { 292 assert(Bucket <= End); 293 while (Bucket != End && 294 (*Bucket == SmallPtrSetImplBase::getEmptyMarker() || 295 *Bucket == SmallPtrSetImplBase::getTombstoneMarker())) 296 ++Bucket; 297 } 298 void RetreatIfNotValid() { 299 assert(Bucket >= End); 300 while (Bucket != End && 301 (Bucket[-1] == SmallPtrSetImplBase::getEmptyMarker() || 302 Bucket[-1] == SmallPtrSetImplBase::getTombstoneMarker())) { 303 --Bucket; 304 } 305 } 306 }; 307 308 /// SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet. 309 template <typename PtrTy> 310 class LLVM_DEBUGEPOCHBASE_HANDLEBASE_EMPTYBASE SmallPtrSetIterator 311 : public SmallPtrSetIteratorImpl, 312 DebugEpochBase::HandleBase { 313 using PtrTraits = PointerLikeTypeTraits<PtrTy>; 314 315 public: 316 using value_type = PtrTy; 317 using reference = PtrTy; 318 using pointer = PtrTy; 319 using difference_type = std::ptrdiff_t; 320 using iterator_category = std::forward_iterator_tag; 321 322 explicit SmallPtrSetIterator(const void *const *BP, const void *const *E, 323 const DebugEpochBase &Epoch) 324 : SmallPtrSetIteratorImpl(BP, E), DebugEpochBase::HandleBase(&Epoch) {} 325 326 // Most methods are provided by the base class. 327 328 const PtrTy operator*() const { 329 assert(isHandleInSync() && "invalid iterator access!"); 330 if (shouldReverseIterate()) { 331 assert(Bucket > End); 332 return PtrTraits::getFromVoidPointer(const_cast<void *>(Bucket[-1])); 333 } 334 assert(Bucket < End); 335 return PtrTraits::getFromVoidPointer(const_cast<void*>(*Bucket)); 336 } 337 338 inline SmallPtrSetIterator& operator++() { // Preincrement 339 assert(isHandleInSync() && "invalid iterator access!"); 340 if (shouldReverseIterate()) { 341 --Bucket; 342 RetreatIfNotValid(); 343 return *this; 344 } 345 ++Bucket; 346 AdvanceIfNotValid(); 347 return *this; 348 } 349 350 SmallPtrSetIterator operator++(int) { // Postincrement 351 SmallPtrSetIterator tmp = *this; 352 ++*this; 353 return tmp; 354 } 355 }; 356 357 /// A templated base class for \c SmallPtrSet which provides the 358 /// typesafe interface that is common across all small sizes. 359 /// 360 /// This is particularly useful for passing around between interface boundaries 361 /// to avoid encoding a particular small size in the interface boundary. 362 template <typename PtrType> 363 class SmallPtrSetImpl : public SmallPtrSetImplBase { 364 using ConstPtrType = typename add_const_past_pointer<PtrType>::type; 365 using PtrTraits = PointerLikeTypeTraits<PtrType>; 366 using ConstPtrTraits = PointerLikeTypeTraits<ConstPtrType>; 367 368 protected: 369 // Forward constructors to the base. 370 using SmallPtrSetImplBase::SmallPtrSetImplBase; 371 372 public: 373 using iterator = SmallPtrSetIterator<PtrType>; 374 using const_iterator = SmallPtrSetIterator<PtrType>; 375 using key_type = ConstPtrType; 376 using value_type = PtrType; 377 378 SmallPtrSetImpl(const SmallPtrSetImpl &) = delete; 379 380 /// Inserts Ptr if and only if there is no element in the container equal to 381 /// Ptr. The bool component of the returned pair is true if and only if the 382 /// insertion takes place, and the iterator component of the pair points to 383 /// the element equal to Ptr. 384 std::pair<iterator, bool> insert(PtrType Ptr) { 385 auto p = insert_imp(PtrTraits::getAsVoidPointer(Ptr)); 386 return std::make_pair(makeIterator(p.first), p.second); 387 } 388 389 /// Insert the given pointer with an iterator hint that is ignored. This is 390 /// identical to calling insert(Ptr), but allows SmallPtrSet to be used by 391 /// std::insert_iterator and std::inserter(). 392 iterator insert(iterator, PtrType Ptr) { 393 return insert(Ptr).first; 394 } 395 396 /// Remove pointer from the set. 397 /// 398 /// Returns whether the pointer was in the set. Invalidates iterators if 399 /// true is returned. To remove elements while iterating over the set, use 400 /// remove_if() instead. 401 bool erase(PtrType Ptr) { 402 return erase_imp(PtrTraits::getAsVoidPointer(Ptr)); 403 } 404 405 /// Remove elements that match the given predicate. 406 /// 407 /// This method is a safe replacement for the following pattern, which is not 408 /// valid, because the erase() calls would invalidate the iterator: 409 /// 410 /// for (PtrType *Ptr : Set) 411 /// if (Pred(P)) 412 /// Set.erase(P); 413 /// 414 /// Returns whether anything was removed. It is safe to read the set inside 415 /// the predicate function. However, the predicate must not modify the set 416 /// itself, only indicate a removal by returning true. 417 template <typename UnaryPredicate> 418 bool remove_if(UnaryPredicate P) { 419 bool Removed = false; 420 if (isSmall()) { 421 const void **APtr = CurArray, **E = CurArray + NumNonEmpty; 422 while (APtr != E) { 423 PtrType Ptr = PtrTraits::getFromVoidPointer(const_cast<void *>(*APtr)); 424 if (P(Ptr)) { 425 *APtr = *--E; 426 --NumNonEmpty; 427 incrementEpoch(); 428 Removed = true; 429 } else { 430 ++APtr; 431 } 432 } 433 return Removed; 434 } 435 436 for (const void **APtr = CurArray, **E = EndPointer(); APtr != E; ++APtr) { 437 const void *Value = *APtr; 438 if (Value == getTombstoneMarker() || Value == getEmptyMarker()) 439 continue; 440 PtrType Ptr = PtrTraits::getFromVoidPointer(const_cast<void *>(Value)); 441 if (P(Ptr)) { 442 *APtr = getTombstoneMarker(); 443 ++NumTombstones; 444 incrementEpoch(); 445 Removed = true; 446 } 447 } 448 return Removed; 449 } 450 451 /// count - Return 1 if the specified pointer is in the set, 0 otherwise. 452 size_type count(ConstPtrType Ptr) const { 453 return contains_imp(ConstPtrTraits::getAsVoidPointer(Ptr)); 454 } 455 iterator find(ConstPtrType Ptr) const { 456 return makeIterator(find_imp(ConstPtrTraits::getAsVoidPointer(Ptr))); 457 } 458 bool contains(ConstPtrType Ptr) const { 459 return contains_imp(ConstPtrTraits::getAsVoidPointer(Ptr)); 460 } 461 462 template <typename IterT> 463 void insert(IterT I, IterT E) { 464 for (; I != E; ++I) 465 insert(*I); 466 } 467 468 void insert(std::initializer_list<PtrType> IL) { 469 insert(IL.begin(), IL.end()); 470 } 471 472 iterator begin() const { 473 if (shouldReverseIterate()) 474 return makeIterator(EndPointer() - 1); 475 return makeIterator(CurArray); 476 } 477 iterator end() const { return makeIterator(EndPointer()); } 478 479 private: 480 /// Create an iterator that dereferences to same place as the given pointer. 481 iterator makeIterator(const void *const *P) const { 482 if (shouldReverseIterate()) 483 return iterator(P == EndPointer() ? CurArray : P + 1, CurArray, *this); 484 return iterator(P, EndPointer(), *this); 485 } 486 }; 487 488 /// Equality comparison for SmallPtrSet. 489 /// 490 /// Iterates over elements of LHS confirming that each value from LHS is also in 491 /// RHS, and that no additional values are in RHS. 492 template <typename PtrType> 493 bool operator==(const SmallPtrSetImpl<PtrType> &LHS, 494 const SmallPtrSetImpl<PtrType> &RHS) { 495 if (LHS.size() != RHS.size()) 496 return false; 497 498 for (const auto *KV : LHS) 499 if (!RHS.count(KV)) 500 return false; 501 502 return true; 503 } 504 505 /// Inequality comparison for SmallPtrSet. 506 /// 507 /// Equivalent to !(LHS == RHS). 508 template <typename PtrType> 509 bool operator!=(const SmallPtrSetImpl<PtrType> &LHS, 510 const SmallPtrSetImpl<PtrType> &RHS) { 511 return !(LHS == RHS); 512 } 513 514 /// SmallPtrSet - This class implements a set which is optimized for holding 515 /// SmallSize or less elements. This internally rounds up SmallSize to the next 516 /// power of two if it is not already a power of two. See the comments above 517 /// SmallPtrSetImplBase for details of the algorithm. 518 template<class PtrType, unsigned SmallSize> 519 class SmallPtrSet : public SmallPtrSetImpl<PtrType> { 520 // In small mode SmallPtrSet uses linear search for the elements, so it is 521 // not a good idea to choose this value too high. You may consider using a 522 // DenseSet<> instead if you expect many elements in the set. 523 static_assert(SmallSize <= 32, "SmallSize should be small"); 524 525 using BaseT = SmallPtrSetImpl<PtrType>; 526 527 // A constexpr version of llvm::bit_ceil. 528 // TODO: Replace this with std::bit_ceil once C++20 is available. 529 static constexpr size_t RoundUpToPowerOfTwo(size_t X) { 530 size_t C = 1; 531 size_t CMax = C << (std::numeric_limits<size_t>::digits - 1); 532 while (C < X && C < CMax) 533 C <<= 1; 534 return C; 535 } 536 537 // Make sure that SmallSize is a power of two, round up if not. 538 static constexpr size_t SmallSizePowTwo = RoundUpToPowerOfTwo(SmallSize); 539 /// SmallStorage - Fixed size storage used in 'small mode'. 540 const void *SmallStorage[SmallSizePowTwo]; 541 542 public: 543 SmallPtrSet() : BaseT(SmallStorage, SmallSizePowTwo) {} 544 SmallPtrSet(const SmallPtrSet &that) : BaseT(SmallStorage, that) {} 545 SmallPtrSet(SmallPtrSet &&that) 546 : BaseT(SmallStorage, SmallSizePowTwo, that.SmallStorage, 547 std::move(that)) {} 548 549 template<typename It> 550 SmallPtrSet(It I, It E) : BaseT(SmallStorage, SmallSizePowTwo) { 551 this->insert(I, E); 552 } 553 554 SmallPtrSet(std::initializer_list<PtrType> IL) 555 : BaseT(SmallStorage, SmallSizePowTwo) { 556 this->insert(IL.begin(), IL.end()); 557 } 558 559 SmallPtrSet<PtrType, SmallSize> & 560 operator=(const SmallPtrSet<PtrType, SmallSize> &RHS) { 561 if (&RHS != this) 562 this->copyFrom(SmallStorage, RHS); 563 return *this; 564 } 565 566 SmallPtrSet<PtrType, SmallSize> & 567 operator=(SmallPtrSet<PtrType, SmallSize> &&RHS) { 568 if (&RHS != this) 569 this->moveFrom(SmallStorage, SmallSizePowTwo, RHS.SmallStorage, 570 std::move(RHS)); 571 return *this; 572 } 573 574 SmallPtrSet<PtrType, SmallSize> & 575 operator=(std::initializer_list<PtrType> IL) { 576 this->clear(); 577 this->insert(IL.begin(), IL.end()); 578 return *this; 579 } 580 581 /// swap - Swaps the elements of two sets. 582 void swap(SmallPtrSet<PtrType, SmallSize> &RHS) { 583 SmallPtrSetImplBase::swap(SmallStorage, RHS.SmallStorage, RHS); 584 } 585 }; 586 587 } // end namespace llvm 588 589 namespace std { 590 591 /// Implement std::swap in terms of SmallPtrSet swap. 592 template<class T, unsigned N> 593 inline void swap(llvm::SmallPtrSet<T, N> &LHS, llvm::SmallPtrSet<T, N> &RHS) { 594 LHS.swap(RHS); 595 } 596 597 } // end namespace std 598 599 #endif // LLVM_ADT_SMALLPTRSET_H 600