xref: /llvm-project/llvm/lib/Support/StringMap.cpp (revision 431502a675edc407791fecde0cf96b0d658b789a)
1 //===--- StringMap.cpp - String Hash table map implementation -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the StringMap class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/StringMap.h"
15 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/Support/Compiler.h"
17 #include "llvm/Support/MathExtras.h"
18 #include <cassert>
19 
20 using namespace llvm;
21 
22 /// Returns the number of buckets to allocate to ensure that the DenseMap can
23 /// accommodate \p NumEntries without need to grow().
24 static unsigned getMinBucketToReserveForEntries(unsigned NumEntries) {
25   // Ensure that "NumEntries * 4 < NumBuckets * 3"
26   if (NumEntries == 0)
27     return 0;
28   // +1 is required because of the strict equality.
29   // For example if NumEntries is 48, we need to return 401.
30   return NextPowerOf2(NumEntries * 4 / 3 + 1);
31 }
32 
33 StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
34   ItemSize = itemSize;
35 
36   // If a size is specified, initialize the table with that many buckets.
37   if (InitSize) {
38     // The table will grow when the number of entries reach 3/4 of the number of
39     // buckets. To guarantee that "InitSize" number of entries can be inserted
40     // in the table without growing, we allocate just what is needed here.
41     init(getMinBucketToReserveForEntries(InitSize));
42     return;
43   }
44 
45   // Otherwise, initialize it with zero buckets to avoid the allocation.
46   TheTable = nullptr;
47   NumBuckets = 0;
48   NumItems = 0;
49   NumTombstones = 0;
50 }
51 
52 void StringMapImpl::init(unsigned InitSize) {
53   assert((InitSize & (InitSize-1)) == 0 &&
54          "Init Size must be a power of 2 or zero!");
55 
56   unsigned NewNumBuckets = InitSize ? InitSize : 16;
57   NumItems = 0;
58   NumTombstones = 0;
59 
60   TheTable = static_cast<StringMapEntryBase **>(
61       std::calloc(NewNumBuckets+1,
62                   sizeof(StringMapEntryBase **) + sizeof(unsigned)));
63   if (TheTable == nullptr)
64     report_bad_alloc_error("Allocation of StringMap table failed.");
65 
66   // Set the member only if TheTable was successfully allocated
67   NumBuckets = NewNumBuckets;
68 
69   // Allocate one extra bucket, set it to look filled so the iterators stop at
70   // end.
71   TheTable[NumBuckets] = (StringMapEntryBase*)2;
72 }
73 
74 /// LookupBucketFor - Look up the bucket that the specified string should end
75 /// up in.  If it already exists as a key in the map, the Item pointer for the
76 /// specified bucket will be non-null.  Otherwise, it will be null.  In either
77 /// case, the FullHashValue field of the bucket will be set to the hash value
78 /// of the string.
79 unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
80   unsigned HTSize = NumBuckets;
81   if (HTSize == 0) {  // Hash table unallocated so far?
82     init(16);
83     HTSize = NumBuckets;
84   }
85   unsigned FullHashValue = HashString(Name);
86   unsigned BucketNo = FullHashValue & (HTSize-1);
87   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
88 
89   unsigned ProbeAmt = 1;
90   int FirstTombstone = -1;
91   while (true) {
92     StringMapEntryBase *BucketItem = TheTable[BucketNo];
93     // If we found an empty bucket, this key isn't in the table yet, return it.
94     if (LLVM_LIKELY(!BucketItem)) {
95       // If we found a tombstone, we want to reuse the tombstone instead of an
96       // empty bucket.  This reduces probing.
97       if (FirstTombstone != -1) {
98         HashTable[FirstTombstone] = FullHashValue;
99         return FirstTombstone;
100       }
101 
102       HashTable[BucketNo] = FullHashValue;
103       return BucketNo;
104     }
105 
106     if (BucketItem == getTombstoneVal()) {
107       // Skip over tombstones.  However, remember the first one we see.
108       if (FirstTombstone == -1) FirstTombstone = BucketNo;
109     } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
110       // If the full hash value matches, check deeply for a match.  The common
111       // case here is that we are only looking at the buckets (for item info
112       // being non-null and for the full hash value) not at the items.  This
113       // is important for cache locality.
114 
115       // Do the comparison like this because Name isn't necessarily
116       // null-terminated!
117       char *ItemStr = (char*)BucketItem+ItemSize;
118       if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) {
119         // We found a match!
120         return BucketNo;
121       }
122     }
123 
124     // Okay, we didn't find the item.  Probe to the next bucket.
125     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
126 
127     // Use quadratic probing, it has fewer clumping artifacts than linear
128     // probing and has good cache behavior in the common case.
129     ++ProbeAmt;
130   }
131 }
132 
133 /// FindKey - Look up the bucket that contains the specified key. If it exists
134 /// in the map, return the bucket number of the key.  Otherwise return -1.
135 /// This does not modify the map.
136 int StringMapImpl::FindKey(StringRef Key) const {
137   unsigned HTSize = NumBuckets;
138   if (HTSize == 0) return -1;  // Really empty table?
139   unsigned FullHashValue = HashString(Key);
140   unsigned BucketNo = FullHashValue & (HTSize-1);
141   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
142 
143   unsigned ProbeAmt = 1;
144   while (true) {
145     StringMapEntryBase *BucketItem = TheTable[BucketNo];
146     // If we found an empty bucket, this key isn't in the table yet, return.
147     if (LLVM_LIKELY(!BucketItem))
148       return -1;
149 
150     if (BucketItem == getTombstoneVal()) {
151       // Ignore tombstones.
152     } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
153       // If the full hash value matches, check deeply for a match.  The common
154       // case here is that we are only looking at the buckets (for item info
155       // being non-null and for the full hash value) not at the items.  This
156       // is important for cache locality.
157 
158       // Do the comparison like this because NameStart isn't necessarily
159       // null-terminated!
160       char *ItemStr = (char*)BucketItem+ItemSize;
161       if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) {
162         // We found a match!
163         return BucketNo;
164       }
165     }
166 
167     // Okay, we didn't find the item.  Probe to the next bucket.
168     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
169 
170     // Use quadratic probing, it has fewer clumping artifacts than linear
171     // probing and has good cache behavior in the common case.
172     ++ProbeAmt;
173   }
174 }
175 
176 /// RemoveKey - Remove the specified StringMapEntry from the table, but do not
177 /// delete it.  This aborts if the value isn't in the table.
178 void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
179   const char *VStr = (char*)V + ItemSize;
180   StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
181   (void)V2;
182   assert(V == V2 && "Didn't find key?");
183 }
184 
185 /// RemoveKey - Remove the StringMapEntry for the specified key from the
186 /// table, returning it.  If the key is not in the table, this returns null.
187 StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
188   int Bucket = FindKey(Key);
189   if (Bucket == -1) return nullptr;
190 
191   StringMapEntryBase *Result = TheTable[Bucket];
192   TheTable[Bucket] = getTombstoneVal();
193   --NumItems;
194   ++NumTombstones;
195   assert(NumItems + NumTombstones <= NumBuckets);
196 
197   return Result;
198 }
199 
200 /// RehashTable - Grow the table, redistributing values into the buckets with
201 /// the appropriate mod-of-hashtable-size.
202 unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
203   unsigned NewSize;
204   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
205 
206   // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
207   // the buckets are empty (meaning that many are filled with tombstones),
208   // grow/rehash the table.
209   if (LLVM_UNLIKELY(NumItems * 4 > NumBuckets * 3)) {
210     NewSize = NumBuckets*2;
211   } else if (LLVM_UNLIKELY(NumBuckets - (NumItems + NumTombstones) <=
212                            NumBuckets / 8)) {
213     NewSize = NumBuckets;
214   } else {
215     return BucketNo;
216   }
217 
218   unsigned NewBucketNo = BucketNo;
219   // Allocate one extra bucket which will always be non-empty.  This allows the
220   // iterators to stop at end.
221   StringMapEntryBase **NewTableArray = static_cast<StringMapEntryBase **>(
222       std::calloc(NewSize+1, sizeof(StringMapEntryBase *) + sizeof(unsigned)));
223   if (NewTableArray == nullptr)
224     report_bad_alloc_error("Allocation of StringMap hash table failed.");
225 
226   unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1);
227   NewTableArray[NewSize] = (StringMapEntryBase*)2;
228 
229   // Rehash all the items into their new buckets.  Luckily :) we already have
230   // the hash values available, so we don't have to rehash any strings.
231   for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
232     StringMapEntryBase *Bucket = TheTable[I];
233     if (Bucket && Bucket != getTombstoneVal()) {
234       // Fast case, bucket available.
235       unsigned FullHash = HashTable[I];
236       unsigned NewBucket = FullHash & (NewSize-1);
237       if (!NewTableArray[NewBucket]) {
238         NewTableArray[FullHash & (NewSize-1)] = Bucket;
239         NewHashArray[FullHash & (NewSize-1)] = FullHash;
240         if (I == BucketNo)
241           NewBucketNo = NewBucket;
242         continue;
243       }
244 
245       // Otherwise probe for a spot.
246       unsigned ProbeSize = 1;
247       do {
248         NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
249       } while (NewTableArray[NewBucket]);
250 
251       // Finally found a slot.  Fill it in.
252       NewTableArray[NewBucket] = Bucket;
253       NewHashArray[NewBucket] = FullHash;
254       if (I == BucketNo)
255         NewBucketNo = NewBucket;
256     }
257   }
258 
259   free(TheTable);
260 
261   TheTable = NewTableArray;
262   NumBuckets = NewSize;
263   NumTombstones = 0;
264   return NewBucketNo;
265 }
266