xref: /llvm-project/llvm/lib/DebugInfo/PDB/Native/HashTable.cpp (revision 6b6b8c4fb9de06aecef40b8aadcc92dfbfec2e33)
1*6b6b8c4fSAdrian McCarthy //===- HashTable.cpp - PDB Hash Table ---------------------------*- C++ -*-===//
2*6b6b8c4fSAdrian McCarthy //
3*6b6b8c4fSAdrian McCarthy //                     The LLVM Compiler Infrastructure
4*6b6b8c4fSAdrian McCarthy //
5*6b6b8c4fSAdrian McCarthy // This file is distributed under the University of Illinois Open Source
6*6b6b8c4fSAdrian McCarthy // License. See LICENSE.TXT for details.
7*6b6b8c4fSAdrian McCarthy //
8*6b6b8c4fSAdrian McCarthy //===----------------------------------------------------------------------===//
9*6b6b8c4fSAdrian McCarthy 
10*6b6b8c4fSAdrian McCarthy #include "llvm/DebugInfo/PDB/Native/HashTable.h"
11*6b6b8c4fSAdrian McCarthy 
12*6b6b8c4fSAdrian McCarthy #include "llvm/ADT/Optional.h"
13*6b6b8c4fSAdrian McCarthy #include "llvm/ADT/SparseBitVector.h"
14*6b6b8c4fSAdrian McCarthy #include "llvm/DebugInfo/PDB/Native/RawError.h"
15*6b6b8c4fSAdrian McCarthy 
16*6b6b8c4fSAdrian McCarthy #include <assert.h>
17*6b6b8c4fSAdrian McCarthy 
18*6b6b8c4fSAdrian McCarthy using namespace llvm;
19*6b6b8c4fSAdrian McCarthy using namespace llvm::pdb;
20*6b6b8c4fSAdrian McCarthy 
21*6b6b8c4fSAdrian McCarthy HashTable::HashTable() : HashTable(8) {}
22*6b6b8c4fSAdrian McCarthy 
23*6b6b8c4fSAdrian McCarthy HashTable::HashTable(uint32_t Capacity) { Buckets.resize(Capacity); }
24*6b6b8c4fSAdrian McCarthy 
25*6b6b8c4fSAdrian McCarthy Error HashTable::load(msf::StreamReader &Stream) {
26*6b6b8c4fSAdrian McCarthy   const Header *H;
27*6b6b8c4fSAdrian McCarthy   if (auto EC = Stream.readObject(H))
28*6b6b8c4fSAdrian McCarthy     return EC;
29*6b6b8c4fSAdrian McCarthy   if (H->Capacity == 0)
30*6b6b8c4fSAdrian McCarthy     return make_error<RawError>(raw_error_code::corrupt_file,
31*6b6b8c4fSAdrian McCarthy                                 "Invalid Hash Table Capacity");
32*6b6b8c4fSAdrian McCarthy   if (H->Size > maxLoad(H->Capacity))
33*6b6b8c4fSAdrian McCarthy     return make_error<RawError>(raw_error_code::corrupt_file,
34*6b6b8c4fSAdrian McCarthy                                 "Invalid Hash Table Size");
35*6b6b8c4fSAdrian McCarthy 
36*6b6b8c4fSAdrian McCarthy   Buckets.resize(H->Capacity);
37*6b6b8c4fSAdrian McCarthy 
38*6b6b8c4fSAdrian McCarthy   if (auto EC = readSparseBitVector(Stream, Present))
39*6b6b8c4fSAdrian McCarthy     return EC;
40*6b6b8c4fSAdrian McCarthy   if (Present.count() != H->Size)
41*6b6b8c4fSAdrian McCarthy     return make_error<RawError>(raw_error_code::corrupt_file,
42*6b6b8c4fSAdrian McCarthy                                 "Present bit vector does not match size!");
43*6b6b8c4fSAdrian McCarthy 
44*6b6b8c4fSAdrian McCarthy   if (auto EC = readSparseBitVector(Stream, Deleted))
45*6b6b8c4fSAdrian McCarthy     return EC;
46*6b6b8c4fSAdrian McCarthy   if (Present.intersects(Deleted))
47*6b6b8c4fSAdrian McCarthy     return make_error<RawError>(raw_error_code::corrupt_file,
48*6b6b8c4fSAdrian McCarthy                                 "Present bit vector interesects deleted!");
49*6b6b8c4fSAdrian McCarthy 
50*6b6b8c4fSAdrian McCarthy   for (uint32_t P : Present) {
51*6b6b8c4fSAdrian McCarthy     if (auto EC = Stream.readInteger(Buckets[P].first))
52*6b6b8c4fSAdrian McCarthy       return EC;
53*6b6b8c4fSAdrian McCarthy     if (auto EC = Stream.readInteger(Buckets[P].second))
54*6b6b8c4fSAdrian McCarthy       return EC;
55*6b6b8c4fSAdrian McCarthy   }
56*6b6b8c4fSAdrian McCarthy 
57*6b6b8c4fSAdrian McCarthy   return Error::success();
58*6b6b8c4fSAdrian McCarthy }
59*6b6b8c4fSAdrian McCarthy 
60*6b6b8c4fSAdrian McCarthy uint32_t HashTable::calculateSerializedLength() const {
61*6b6b8c4fSAdrian McCarthy   uint32_t Size = sizeof(Header);
62*6b6b8c4fSAdrian McCarthy 
63*6b6b8c4fSAdrian McCarthy   int NumBitsP = Present.find_last() + 1;
64*6b6b8c4fSAdrian McCarthy   int NumBitsD = Deleted.find_last() + 1;
65*6b6b8c4fSAdrian McCarthy 
66*6b6b8c4fSAdrian McCarthy   // Present bit set number of words, followed by that many actual words.
67*6b6b8c4fSAdrian McCarthy   Size += sizeof(uint32_t);
68*6b6b8c4fSAdrian McCarthy   Size += alignTo(NumBitsP, sizeof(uint32_t));
69*6b6b8c4fSAdrian McCarthy 
70*6b6b8c4fSAdrian McCarthy   // Deleted bit set number of words, followed by that many actual words.
71*6b6b8c4fSAdrian McCarthy   Size += sizeof(uint32_t);
72*6b6b8c4fSAdrian McCarthy   Size += alignTo(NumBitsD, sizeof(uint32_t));
73*6b6b8c4fSAdrian McCarthy 
74*6b6b8c4fSAdrian McCarthy   // One (Key, Value) pair for each entry Present.
75*6b6b8c4fSAdrian McCarthy   Size += 2 * sizeof(uint32_t) * size();
76*6b6b8c4fSAdrian McCarthy 
77*6b6b8c4fSAdrian McCarthy   return Size;
78*6b6b8c4fSAdrian McCarthy }
79*6b6b8c4fSAdrian McCarthy 
80*6b6b8c4fSAdrian McCarthy Error HashTable::commit(msf::StreamWriter &Writer) const {
81*6b6b8c4fSAdrian McCarthy   Header H;
82*6b6b8c4fSAdrian McCarthy   H.Size = size();
83*6b6b8c4fSAdrian McCarthy   H.Capacity = capacity();
84*6b6b8c4fSAdrian McCarthy   if (auto EC = Writer.writeObject(H))
85*6b6b8c4fSAdrian McCarthy     return EC;
86*6b6b8c4fSAdrian McCarthy 
87*6b6b8c4fSAdrian McCarthy   if (auto EC = writeSparseBitVector(Writer, Present))
88*6b6b8c4fSAdrian McCarthy     return EC;
89*6b6b8c4fSAdrian McCarthy 
90*6b6b8c4fSAdrian McCarthy   if (auto EC = writeSparseBitVector(Writer, Deleted))
91*6b6b8c4fSAdrian McCarthy     return EC;
92*6b6b8c4fSAdrian McCarthy 
93*6b6b8c4fSAdrian McCarthy   for (const auto &Entry : *this) {
94*6b6b8c4fSAdrian McCarthy     if (auto EC = Writer.writeInteger(Entry.first))
95*6b6b8c4fSAdrian McCarthy       return EC;
96*6b6b8c4fSAdrian McCarthy     if (auto EC = Writer.writeInteger(Entry.second))
97*6b6b8c4fSAdrian McCarthy       return EC;
98*6b6b8c4fSAdrian McCarthy   }
99*6b6b8c4fSAdrian McCarthy   return Error::success();
100*6b6b8c4fSAdrian McCarthy }
101*6b6b8c4fSAdrian McCarthy 
102*6b6b8c4fSAdrian McCarthy void HashTable::clear() {
103*6b6b8c4fSAdrian McCarthy   Buckets.resize(8);
104*6b6b8c4fSAdrian McCarthy   Present.clear();
105*6b6b8c4fSAdrian McCarthy   Deleted.clear();
106*6b6b8c4fSAdrian McCarthy }
107*6b6b8c4fSAdrian McCarthy 
108*6b6b8c4fSAdrian McCarthy uint32_t HashTable::capacity() const { return Buckets.size(); }
109*6b6b8c4fSAdrian McCarthy uint32_t HashTable::size() const { return Present.count(); }
110*6b6b8c4fSAdrian McCarthy 
111*6b6b8c4fSAdrian McCarthy HashTableIterator HashTable::begin() const { return HashTableIterator(*this); }
112*6b6b8c4fSAdrian McCarthy HashTableIterator HashTable::end() const {
113*6b6b8c4fSAdrian McCarthy   return HashTableIterator(*this, 0, true);
114*6b6b8c4fSAdrian McCarthy }
115*6b6b8c4fSAdrian McCarthy 
116*6b6b8c4fSAdrian McCarthy HashTableIterator HashTable::find(uint32_t K) {
117*6b6b8c4fSAdrian McCarthy   uint32_t H = K % capacity();
118*6b6b8c4fSAdrian McCarthy   uint32_t I = H;
119*6b6b8c4fSAdrian McCarthy   Optional<uint32_t> FirstUnused;
120*6b6b8c4fSAdrian McCarthy   do {
121*6b6b8c4fSAdrian McCarthy     if (isPresent(I)) {
122*6b6b8c4fSAdrian McCarthy       if (Buckets[I].first == K)
123*6b6b8c4fSAdrian McCarthy         return HashTableIterator(*this, I, false);
124*6b6b8c4fSAdrian McCarthy     } else {
125*6b6b8c4fSAdrian McCarthy       if (!FirstUnused)
126*6b6b8c4fSAdrian McCarthy         FirstUnused = I;
127*6b6b8c4fSAdrian McCarthy       // Insertion occurs via linear probing from the slot hint, and will be
128*6b6b8c4fSAdrian McCarthy       // inserted at the first empty / deleted location.  Therefore, if we are
129*6b6b8c4fSAdrian McCarthy       // probing and find a location that is neither present nor deleted, then
130*6b6b8c4fSAdrian McCarthy       // nothing must have EVER been inserted at this location, and thus it is
131*6b6b8c4fSAdrian McCarthy       // not possible for a matching value to occur later.
132*6b6b8c4fSAdrian McCarthy       if (!isDeleted(I))
133*6b6b8c4fSAdrian McCarthy         break;
134*6b6b8c4fSAdrian McCarthy     }
135*6b6b8c4fSAdrian McCarthy     I = (I + 1) % capacity();
136*6b6b8c4fSAdrian McCarthy   } while (I != H);
137*6b6b8c4fSAdrian McCarthy 
138*6b6b8c4fSAdrian McCarthy   // The only way FirstUnused would not be set is if every single entry in the
139*6b6b8c4fSAdrian McCarthy   // table were Present.  But this would violate the load factor constraints
140*6b6b8c4fSAdrian McCarthy   // that we impose, so it should never happen.
141*6b6b8c4fSAdrian McCarthy   assert(FirstUnused);
142*6b6b8c4fSAdrian McCarthy   return HashTableIterator(*this, *FirstUnused, true);
143*6b6b8c4fSAdrian McCarthy }
144*6b6b8c4fSAdrian McCarthy 
145*6b6b8c4fSAdrian McCarthy void HashTable::set(uint32_t K, uint32_t V) {
146*6b6b8c4fSAdrian McCarthy   auto Entry = find(K);
147*6b6b8c4fSAdrian McCarthy   if (Entry != end()) {
148*6b6b8c4fSAdrian McCarthy     assert(isPresent(Entry.index()));
149*6b6b8c4fSAdrian McCarthy     assert(Buckets[Entry.index()].first == K);
150*6b6b8c4fSAdrian McCarthy     // We're updating, no need to do anything special.
151*6b6b8c4fSAdrian McCarthy     Buckets[Entry.index()].second = V;
152*6b6b8c4fSAdrian McCarthy     return;
153*6b6b8c4fSAdrian McCarthy   }
154*6b6b8c4fSAdrian McCarthy 
155*6b6b8c4fSAdrian McCarthy   auto &B = Buckets[Entry.index()];
156*6b6b8c4fSAdrian McCarthy   assert(!isPresent(Entry.index()));
157*6b6b8c4fSAdrian McCarthy   assert(Entry.isEnd());
158*6b6b8c4fSAdrian McCarthy   B.first = K;
159*6b6b8c4fSAdrian McCarthy   B.second = V;
160*6b6b8c4fSAdrian McCarthy   Present.set(Entry.index());
161*6b6b8c4fSAdrian McCarthy   Deleted.reset(Entry.index());
162*6b6b8c4fSAdrian McCarthy 
163*6b6b8c4fSAdrian McCarthy   grow();
164*6b6b8c4fSAdrian McCarthy 
165*6b6b8c4fSAdrian McCarthy   assert(find(K) != end());
166*6b6b8c4fSAdrian McCarthy }
167*6b6b8c4fSAdrian McCarthy 
168*6b6b8c4fSAdrian McCarthy void HashTable::remove(uint32_t K) {
169*6b6b8c4fSAdrian McCarthy   auto Iter = find(K);
170*6b6b8c4fSAdrian McCarthy   // It wasn't here to begin with, just exit.
171*6b6b8c4fSAdrian McCarthy   if (Iter == end())
172*6b6b8c4fSAdrian McCarthy     return;
173*6b6b8c4fSAdrian McCarthy 
174*6b6b8c4fSAdrian McCarthy   assert(Present.test(Iter.index()));
175*6b6b8c4fSAdrian McCarthy   assert(!Deleted.test(Iter.index()));
176*6b6b8c4fSAdrian McCarthy   Deleted.set(Iter.index());
177*6b6b8c4fSAdrian McCarthy   Present.reset(Iter.index());
178*6b6b8c4fSAdrian McCarthy }
179*6b6b8c4fSAdrian McCarthy 
180*6b6b8c4fSAdrian McCarthy uint32_t HashTable::get(uint32_t K) {
181*6b6b8c4fSAdrian McCarthy   auto I = find(K);
182*6b6b8c4fSAdrian McCarthy   assert(I != end());
183*6b6b8c4fSAdrian McCarthy   return (*I).second;
184*6b6b8c4fSAdrian McCarthy }
185*6b6b8c4fSAdrian McCarthy 
186*6b6b8c4fSAdrian McCarthy uint32_t HashTable::maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
187*6b6b8c4fSAdrian McCarthy 
188*6b6b8c4fSAdrian McCarthy void HashTable::grow() {
189*6b6b8c4fSAdrian McCarthy   uint32_t S = size();
190*6b6b8c4fSAdrian McCarthy   if (S < maxLoad(capacity()))
191*6b6b8c4fSAdrian McCarthy     return;
192*6b6b8c4fSAdrian McCarthy   assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
193*6b6b8c4fSAdrian McCarthy 
194*6b6b8c4fSAdrian McCarthy   uint32_t NewCapacity =
195*6b6b8c4fSAdrian McCarthy       (capacity() <= INT32_MAX) ? capacity() * 2 : UINT32_MAX;
196*6b6b8c4fSAdrian McCarthy 
197*6b6b8c4fSAdrian McCarthy   // Growing requires rebuilding the table and re-hashing every item.  Make a
198*6b6b8c4fSAdrian McCarthy   // copy with a larger capacity, insert everything into the copy, then swap
199*6b6b8c4fSAdrian McCarthy   // it in.
200*6b6b8c4fSAdrian McCarthy   HashTable NewMap(NewCapacity);
201*6b6b8c4fSAdrian McCarthy   for (auto I : Present) {
202*6b6b8c4fSAdrian McCarthy     NewMap.set(Buckets[I].first, Buckets[I].second);
203*6b6b8c4fSAdrian McCarthy   }
204*6b6b8c4fSAdrian McCarthy 
205*6b6b8c4fSAdrian McCarthy   Buckets.swap(NewMap.Buckets);
206*6b6b8c4fSAdrian McCarthy   std::swap(Present, NewMap.Present);
207*6b6b8c4fSAdrian McCarthy   std::swap(Deleted, NewMap.Deleted);
208*6b6b8c4fSAdrian McCarthy   assert(capacity() == NewCapacity);
209*6b6b8c4fSAdrian McCarthy   assert(size() == S);
210*6b6b8c4fSAdrian McCarthy }
211*6b6b8c4fSAdrian McCarthy 
212*6b6b8c4fSAdrian McCarthy Error HashTable::readSparseBitVector(msf::StreamReader &Stream,
213*6b6b8c4fSAdrian McCarthy                                      SparseBitVector<> &V) {
214*6b6b8c4fSAdrian McCarthy   uint32_t NumWords;
215*6b6b8c4fSAdrian McCarthy   if (auto EC = Stream.readInteger(NumWords))
216*6b6b8c4fSAdrian McCarthy     return joinErrors(
217*6b6b8c4fSAdrian McCarthy         std::move(EC),
218*6b6b8c4fSAdrian McCarthy         make_error<RawError>(raw_error_code::corrupt_file,
219*6b6b8c4fSAdrian McCarthy                              "Expected hash table number of words"));
220*6b6b8c4fSAdrian McCarthy 
221*6b6b8c4fSAdrian McCarthy   for (uint32_t I = 0; I != NumWords; ++I) {
222*6b6b8c4fSAdrian McCarthy     uint32_t Word;
223*6b6b8c4fSAdrian McCarthy     if (auto EC = Stream.readInteger(Word))
224*6b6b8c4fSAdrian McCarthy       return joinErrors(std::move(EC),
225*6b6b8c4fSAdrian McCarthy                         make_error<RawError>(raw_error_code::corrupt_file,
226*6b6b8c4fSAdrian McCarthy                                              "Expected hash table word"));
227*6b6b8c4fSAdrian McCarthy     for (unsigned Idx = 0; Idx < 32; ++Idx)
228*6b6b8c4fSAdrian McCarthy       if (Word & (1U << Idx))
229*6b6b8c4fSAdrian McCarthy         V.set((I * 32) + Idx);
230*6b6b8c4fSAdrian McCarthy   }
231*6b6b8c4fSAdrian McCarthy   return Error::success();
232*6b6b8c4fSAdrian McCarthy }
233*6b6b8c4fSAdrian McCarthy 
234*6b6b8c4fSAdrian McCarthy Error HashTable::writeSparseBitVector(msf::StreamWriter &Writer,
235*6b6b8c4fSAdrian McCarthy                                       SparseBitVector<> &Vec) {
236*6b6b8c4fSAdrian McCarthy   int ReqBits = Vec.find_last() + 1;
237*6b6b8c4fSAdrian McCarthy   uint32_t NumWords = alignTo(ReqBits, sizeof(uint32_t)) / sizeof(uint32_t);
238*6b6b8c4fSAdrian McCarthy   if (auto EC = Writer.writeInteger(NumWords))
239*6b6b8c4fSAdrian McCarthy     return joinErrors(
240*6b6b8c4fSAdrian McCarthy         std::move(EC),
241*6b6b8c4fSAdrian McCarthy         make_error<RawError>(raw_error_code::corrupt_file,
242*6b6b8c4fSAdrian McCarthy                              "Could not write linear map number of words"));
243*6b6b8c4fSAdrian McCarthy 
244*6b6b8c4fSAdrian McCarthy   uint32_t Idx = 0;
245*6b6b8c4fSAdrian McCarthy   for (uint32_t I = 0; I != NumWords; ++I) {
246*6b6b8c4fSAdrian McCarthy     uint32_t Word = 0;
247*6b6b8c4fSAdrian McCarthy     for (uint32_t WordIdx = 0; WordIdx < 32; ++WordIdx, ++Idx) {
248*6b6b8c4fSAdrian McCarthy       if (Vec.test(Idx))
249*6b6b8c4fSAdrian McCarthy         Word |= (1 << WordIdx);
250*6b6b8c4fSAdrian McCarthy     }
251*6b6b8c4fSAdrian McCarthy     if (auto EC = Writer.writeInteger(Word))
252*6b6b8c4fSAdrian McCarthy       return joinErrors(std::move(EC), make_error<RawError>(
253*6b6b8c4fSAdrian McCarthy                                            raw_error_code::corrupt_file,
254*6b6b8c4fSAdrian McCarthy                                            "Could not write linear map word"));
255*6b6b8c4fSAdrian McCarthy   }
256*6b6b8c4fSAdrian McCarthy   return Error::success();
257*6b6b8c4fSAdrian McCarthy }
258*6b6b8c4fSAdrian McCarthy 
259*6b6b8c4fSAdrian McCarthy HashTableIterator::HashTableIterator(const HashTable &Map, uint32_t Index,
260*6b6b8c4fSAdrian McCarthy                                      bool IsEnd)
261*6b6b8c4fSAdrian McCarthy     : Map(&Map), Index(Index), IsEnd(IsEnd) {}
262*6b6b8c4fSAdrian McCarthy 
263*6b6b8c4fSAdrian McCarthy HashTableIterator::HashTableIterator(const HashTable &Map) : Map(&Map) {
264*6b6b8c4fSAdrian McCarthy   int I = Map.Present.find_first();
265*6b6b8c4fSAdrian McCarthy   if (I == -1) {
266*6b6b8c4fSAdrian McCarthy     Index = 0;
267*6b6b8c4fSAdrian McCarthy     IsEnd = true;
268*6b6b8c4fSAdrian McCarthy   } else {
269*6b6b8c4fSAdrian McCarthy     Index = static_cast<uint32_t>(I);
270*6b6b8c4fSAdrian McCarthy     IsEnd = false;
271*6b6b8c4fSAdrian McCarthy   }
272*6b6b8c4fSAdrian McCarthy }
273*6b6b8c4fSAdrian McCarthy 
274*6b6b8c4fSAdrian McCarthy HashTableIterator &HashTableIterator::operator=(const HashTableIterator &R) {
275*6b6b8c4fSAdrian McCarthy   Map = R.Map;
276*6b6b8c4fSAdrian McCarthy   return *this;
277*6b6b8c4fSAdrian McCarthy }
278*6b6b8c4fSAdrian McCarthy 
279*6b6b8c4fSAdrian McCarthy bool HashTableIterator::operator==(const HashTableIterator &R) const {
280*6b6b8c4fSAdrian McCarthy   if (IsEnd && R.IsEnd)
281*6b6b8c4fSAdrian McCarthy     return true;
282*6b6b8c4fSAdrian McCarthy   if (IsEnd != R.IsEnd)
283*6b6b8c4fSAdrian McCarthy     return false;
284*6b6b8c4fSAdrian McCarthy 
285*6b6b8c4fSAdrian McCarthy   return (Map == R.Map) && (Index == R.Index);
286*6b6b8c4fSAdrian McCarthy }
287*6b6b8c4fSAdrian McCarthy 
288*6b6b8c4fSAdrian McCarthy const std::pair<uint32_t, uint32_t> &HashTableIterator::operator*() const {
289*6b6b8c4fSAdrian McCarthy   assert(Map->Present.test(Index));
290*6b6b8c4fSAdrian McCarthy   return Map->Buckets[Index];
291*6b6b8c4fSAdrian McCarthy }
292*6b6b8c4fSAdrian McCarthy 
293*6b6b8c4fSAdrian McCarthy HashTableIterator &HashTableIterator::operator++() {
294*6b6b8c4fSAdrian McCarthy   while (Index < Map->Buckets.size()) {
295*6b6b8c4fSAdrian McCarthy     ++Index;
296*6b6b8c4fSAdrian McCarthy     if (Map->Present.test(Index))
297*6b6b8c4fSAdrian McCarthy       return *this;
298*6b6b8c4fSAdrian McCarthy   }
299*6b6b8c4fSAdrian McCarthy 
300*6b6b8c4fSAdrian McCarthy   IsEnd = true;
301*6b6b8c4fSAdrian McCarthy   return *this;
302*6b6b8c4fSAdrian McCarthy }
303