1 //===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a hash set that can be used to remove duplication of 10 // nodes in a graph. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/FoldingSet.h" 15 #include "llvm/ADT/Hashing.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/Support/Allocator.h" 18 #include "llvm/Support/ErrorHandling.h" 19 #include "llvm/Support/Host.h" 20 #include "llvm/Support/MathExtras.h" 21 #include <cassert> 22 #include <cstring> 23 using namespace llvm; 24 25 //===----------------------------------------------------------------------===// 26 // FoldingSetNodeIDRef Implementation 27 28 /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef, 29 /// used to lookup the node in the FoldingSetBase. 30 unsigned FoldingSetNodeIDRef::ComputeHash() const { 31 return static_cast<unsigned>(hash_combine_range(Data, Data+Size)); 32 } 33 34 bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const { 35 if (Size != RHS.Size) return false; 36 return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0; 37 } 38 39 /// Used to compare the "ordering" of two nodes as defined by the 40 /// profiled bits and their ordering defined by memcmp(). 41 bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const { 42 if (Size != RHS.Size) 43 return Size < RHS.Size; 44 return memcmp(Data, RHS.Data, Size*sizeof(*Data)) < 0; 45 } 46 47 //===----------------------------------------------------------------------===// 48 // FoldingSetNodeID Implementation 49 50 /// Add* - Add various data types to Bit data. 51 /// 52 void FoldingSetNodeID::AddString(StringRef String) { 53 unsigned Size = String.size(); 54 55 unsigned NumInserts = 1 + divideCeil(Size, 4); 56 Bits.reserve(Bits.size() + NumInserts); 57 58 Bits.push_back(Size); 59 if (!Size) return; 60 61 unsigned Units = Size / 4; 62 unsigned Pos = 0; 63 const unsigned *Base = (const unsigned*) String.data(); 64 65 // If the string is aligned do a bulk transfer. 66 if (!((intptr_t)Base & 3)) { 67 Bits.append(Base, Base + Units); 68 Pos = (Units + 1) * 4; 69 } else { 70 // Otherwise do it the hard way. 71 // To be compatible with above bulk transfer, we need to take endianness 72 // into account. 73 static_assert(sys::IsBigEndianHost || sys::IsLittleEndianHost, 74 "Unexpected host endianness"); 75 if (sys::IsBigEndianHost) { 76 for (Pos += 4; Pos <= Size; Pos += 4) { 77 unsigned V = ((unsigned char)String[Pos - 4] << 24) | 78 ((unsigned char)String[Pos - 3] << 16) | 79 ((unsigned char)String[Pos - 2] << 8) | 80 (unsigned char)String[Pos - 1]; 81 Bits.push_back(V); 82 } 83 } else { // Little-endian host 84 for (Pos += 4; Pos <= Size; Pos += 4) { 85 unsigned V = ((unsigned char)String[Pos - 1] << 24) | 86 ((unsigned char)String[Pos - 2] << 16) | 87 ((unsigned char)String[Pos - 3] << 8) | 88 (unsigned char)String[Pos - 4]; 89 Bits.push_back(V); 90 } 91 } 92 } 93 94 // With the leftover bits. 95 unsigned V = 0; 96 // Pos will have overshot size by 4 - #bytes left over. 97 // No need to take endianness into account here - this is always executed. 98 switch (Pos - Size) { 99 case 1: V = (V << 8) | (unsigned char)String[Size - 3]; LLVM_FALLTHROUGH; 100 case 2: V = (V << 8) | (unsigned char)String[Size - 2]; LLVM_FALLTHROUGH; 101 case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break; 102 default: return; // Nothing left. 103 } 104 105 Bits.push_back(V); 106 } 107 108 // AddNodeID - Adds the Bit data of another ID to *this. 109 void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) { 110 Bits.append(ID.Bits.begin(), ID.Bits.end()); 111 } 112 113 /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to 114 /// lookup the node in the FoldingSetBase. 115 unsigned FoldingSetNodeID::ComputeHash() const { 116 return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash(); 117 } 118 119 /// operator== - Used to compare two nodes to each other. 120 /// 121 bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const { 122 return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); 123 } 124 125 /// operator== - Used to compare two nodes to each other. 126 /// 127 bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const { 128 return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS; 129 } 130 131 /// Used to compare the "ordering" of two nodes as defined by the 132 /// profiled bits and their ordering defined by memcmp(). 133 bool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS) const { 134 return *this < FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); 135 } 136 137 bool FoldingSetNodeID::operator<(FoldingSetNodeIDRef RHS) const { 138 return FoldingSetNodeIDRef(Bits.data(), Bits.size()) < RHS; 139 } 140 141 /// Intern - Copy this node's data to a memory region allocated from the 142 /// given allocator and return a FoldingSetNodeIDRef describing the 143 /// interned data. 144 FoldingSetNodeIDRef 145 FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { 146 unsigned *New = Allocator.Allocate<unsigned>(Bits.size()); 147 std::uninitialized_copy(Bits.begin(), Bits.end(), New); 148 return FoldingSetNodeIDRef(New, Bits.size()); 149 } 150 151 //===----------------------------------------------------------------------===// 152 /// Helper functions for FoldingSetBase. 153 154 /// GetNextPtr - In order to save space, each bucket is a 155 /// singly-linked-list. In order to make deletion more efficient, we make 156 /// the list circular, so we can delete a node without computing its hash. 157 /// The problem with this is that the start of the hash buckets are not 158 /// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null: 159 /// use GetBucketPtr when this happens. 160 static FoldingSetBase::Node *GetNextPtr(void *NextInBucketPtr) { 161 // The low bit is set if this is the pointer back to the bucket. 162 if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1) 163 return nullptr; 164 165 return static_cast<FoldingSetBase::Node*>(NextInBucketPtr); 166 } 167 168 169 /// testing. 170 static void **GetBucketPtr(void *NextInBucketPtr) { 171 intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr); 172 assert((Ptr & 1) && "Not a bucket pointer"); 173 return reinterpret_cast<void**>(Ptr & ~intptr_t(1)); 174 } 175 176 /// GetBucketFor - Hash the specified node ID and return the hash bucket for 177 /// the specified ID. 178 static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) { 179 // NumBuckets is always a power of 2. 180 unsigned BucketNum = Hash & (NumBuckets-1); 181 return Buckets + BucketNum; 182 } 183 184 /// AllocateBuckets - Allocated initialized bucket memory. 185 static void **AllocateBuckets(unsigned NumBuckets) { 186 void **Buckets = static_cast<void**>(safe_calloc(NumBuckets + 1, 187 sizeof(void*))); 188 // Set the very last bucket to be a non-null "pointer". 189 Buckets[NumBuckets] = reinterpret_cast<void*>(-1); 190 return Buckets; 191 } 192 193 //===----------------------------------------------------------------------===// 194 // FoldingSetBase Implementation 195 196 FoldingSetBase::FoldingSetBase(unsigned Log2InitSize) { 197 assert(5 < Log2InitSize && Log2InitSize < 32 && 198 "Initial hash table size out of range"); 199 NumBuckets = 1 << Log2InitSize; 200 Buckets = AllocateBuckets(NumBuckets); 201 NumNodes = 0; 202 } 203 204 FoldingSetBase::FoldingSetBase(FoldingSetBase &&Arg) 205 : Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) { 206 Arg.Buckets = nullptr; 207 Arg.NumBuckets = 0; 208 Arg.NumNodes = 0; 209 } 210 211 FoldingSetBase &FoldingSetBase::operator=(FoldingSetBase &&RHS) { 212 free(Buckets); // This may be null if the set is in a moved-from state. 213 Buckets = RHS.Buckets; 214 NumBuckets = RHS.NumBuckets; 215 NumNodes = RHS.NumNodes; 216 RHS.Buckets = nullptr; 217 RHS.NumBuckets = 0; 218 RHS.NumNodes = 0; 219 return *this; 220 } 221 222 FoldingSetBase::~FoldingSetBase() { 223 free(Buckets); 224 } 225 226 void FoldingSetBase::clear() { 227 // Set all but the last bucket to null pointers. 228 memset(Buckets, 0, NumBuckets*sizeof(void*)); 229 230 // Set the very last bucket to be a non-null "pointer". 231 Buckets[NumBuckets] = reinterpret_cast<void*>(-1); 232 233 // Reset the node count to zero. 234 NumNodes = 0; 235 } 236 237 void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount, 238 const FoldingSetInfo &Info) { 239 assert((NewBucketCount > NumBuckets) && 240 "Can't shrink a folding set with GrowBucketCount"); 241 assert(isPowerOf2_32(NewBucketCount) && "Bad bucket count!"); 242 void **OldBuckets = Buckets; 243 unsigned OldNumBuckets = NumBuckets; 244 245 // Clear out new buckets. 246 Buckets = AllocateBuckets(NewBucketCount); 247 // Set NumBuckets only if allocation of new buckets was successful. 248 NumBuckets = NewBucketCount; 249 NumNodes = 0; 250 251 // Walk the old buckets, rehashing nodes into their new place. 252 FoldingSetNodeID TempID; 253 for (unsigned i = 0; i != OldNumBuckets; ++i) { 254 void *Probe = OldBuckets[i]; 255 if (!Probe) continue; 256 while (Node *NodeInBucket = GetNextPtr(Probe)) { 257 // Figure out the next link, remove NodeInBucket from the old link. 258 Probe = NodeInBucket->getNextInBucket(); 259 NodeInBucket->SetNextInBucket(nullptr); 260 261 // Insert the node into the new bucket, after recomputing the hash. 262 InsertNode(NodeInBucket, 263 GetBucketFor(Info.ComputeNodeHash(this, NodeInBucket, TempID), 264 Buckets, NumBuckets), 265 Info); 266 TempID.clear(); 267 } 268 } 269 270 free(OldBuckets); 271 } 272 273 /// GrowHashTable - Double the size of the hash table and rehash everything. 274 /// 275 void FoldingSetBase::GrowHashTable(const FoldingSetInfo &Info) { 276 GrowBucketCount(NumBuckets * 2, Info); 277 } 278 279 void FoldingSetBase::reserve(unsigned EltCount, const FoldingSetInfo &Info) { 280 // This will give us somewhere between EltCount / 2 and 281 // EltCount buckets. This puts us in the load factor 282 // range of 1.0 - 2.0. 283 if(EltCount < capacity()) 284 return; 285 GrowBucketCount(PowerOf2Floor(EltCount), Info); 286 } 287 288 /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, 289 /// return it. If not, return the insertion token that will make insertion 290 /// faster. 291 FoldingSetBase::Node *FoldingSetBase::FindNodeOrInsertPos( 292 const FoldingSetNodeID &ID, void *&InsertPos, const FoldingSetInfo &Info) { 293 unsigned IDHash = ID.ComputeHash(); 294 void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets); 295 void *Probe = *Bucket; 296 297 InsertPos = nullptr; 298 299 FoldingSetNodeID TempID; 300 while (Node *NodeInBucket = GetNextPtr(Probe)) { 301 if (Info.NodeEquals(this, NodeInBucket, ID, IDHash, TempID)) 302 return NodeInBucket; 303 TempID.clear(); 304 305 Probe = NodeInBucket->getNextInBucket(); 306 } 307 308 // Didn't find the node, return null with the bucket as the InsertPos. 309 InsertPos = Bucket; 310 return nullptr; 311 } 312 313 /// InsertNode - Insert the specified node into the folding set, knowing that it 314 /// is not already in the map. InsertPos must be obtained from 315 /// FindNodeOrInsertPos. 316 void FoldingSetBase::InsertNode(Node *N, void *InsertPos, 317 const FoldingSetInfo &Info) { 318 assert(!N->getNextInBucket()); 319 // Do we need to grow the hashtable? 320 if (NumNodes+1 > capacity()) { 321 GrowHashTable(Info); 322 FoldingSetNodeID TempID; 323 InsertPos = GetBucketFor(Info.ComputeNodeHash(this, N, TempID), Buckets, 324 NumBuckets); 325 } 326 327 ++NumNodes; 328 329 /// The insert position is actually a bucket pointer. 330 void **Bucket = static_cast<void**>(InsertPos); 331 332 void *Next = *Bucket; 333 334 // If this is the first insertion into this bucket, its next pointer will be 335 // null. Pretend as if it pointed to itself, setting the low bit to indicate 336 // that it is a pointer to the bucket. 337 if (!Next) 338 Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1); 339 340 // Set the node's next pointer, and make the bucket point to the node. 341 N->SetNextInBucket(Next); 342 *Bucket = N; 343 } 344 345 /// RemoveNode - Remove a node from the folding set, returning true if one was 346 /// removed or false if the node was not in the folding set. 347 bool FoldingSetBase::RemoveNode(Node *N) { 348 // Because each bucket is a circular list, we don't need to compute N's hash 349 // to remove it. 350 void *Ptr = N->getNextInBucket(); 351 if (!Ptr) return false; // Not in folding set. 352 353 --NumNodes; 354 N->SetNextInBucket(nullptr); 355 356 // Remember what N originally pointed to, either a bucket or another node. 357 void *NodeNextPtr = Ptr; 358 359 // Chase around the list until we find the node (or bucket) which points to N. 360 while (true) { 361 if (Node *NodeInBucket = GetNextPtr(Ptr)) { 362 // Advance pointer. 363 Ptr = NodeInBucket->getNextInBucket(); 364 365 // We found a node that points to N, change it to point to N's next node, 366 // removing N from the list. 367 if (Ptr == N) { 368 NodeInBucket->SetNextInBucket(NodeNextPtr); 369 return true; 370 } 371 } else { 372 void **Bucket = GetBucketPtr(Ptr); 373 Ptr = *Bucket; 374 375 // If we found that the bucket points to N, update the bucket to point to 376 // whatever is next. 377 if (Ptr == N) { 378 *Bucket = NodeNextPtr; 379 return true; 380 } 381 } 382 } 383 } 384 385 /// GetOrInsertNode - If there is an existing simple Node exactly 386 /// equal to the specified node, return it. Otherwise, insert 'N' and it 387 /// instead. 388 FoldingSetBase::Node * 389 FoldingSetBase::GetOrInsertNode(FoldingSetBase::Node *N, 390 const FoldingSetInfo &Info) { 391 FoldingSetNodeID ID; 392 Info.GetNodeProfile(this, N, ID); 393 void *IP; 394 if (Node *E = FindNodeOrInsertPos(ID, IP, Info)) 395 return E; 396 InsertNode(N, IP, Info); 397 return N; 398 } 399 400 //===----------------------------------------------------------------------===// 401 // FoldingSetIteratorImpl Implementation 402 403 FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) { 404 // Skip to the first non-null non-self-cycle bucket. 405 while (*Bucket != reinterpret_cast<void*>(-1) && 406 (!*Bucket || !GetNextPtr(*Bucket))) 407 ++Bucket; 408 409 NodePtr = static_cast<FoldingSetNode*>(*Bucket); 410 } 411 412 void FoldingSetIteratorImpl::advance() { 413 // If there is another link within this bucket, go to it. 414 void *Probe = NodePtr->getNextInBucket(); 415 416 if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe)) 417 NodePtr = NextNodeInBucket; 418 else { 419 // Otherwise, this is the last link in this bucket. 420 void **Bucket = GetBucketPtr(Probe); 421 422 // Skip to the next non-null non-self-cycle bucket. 423 do { 424 ++Bucket; 425 } while (*Bucket != reinterpret_cast<void*>(-1) && 426 (!*Bucket || !GetNextPtr(*Bucket))); 427 428 NodePtr = static_cast<FoldingSetNode*>(*Bucket); 429 } 430 } 431 432 //===----------------------------------------------------------------------===// 433 // FoldingSetBucketIteratorImpl Implementation 434 435 FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) { 436 Ptr = (!*Bucket || !GetNextPtr(*Bucket)) ? (void*) Bucket : *Bucket; 437 } 438