1*6b6b8c4fSAdrian McCarthy //===- HashTable.cpp - PDB Hash Table ---------------------------*- C++ -*-===// 2*6b6b8c4fSAdrian McCarthy // 3*6b6b8c4fSAdrian McCarthy // The LLVM Compiler Infrastructure 4*6b6b8c4fSAdrian McCarthy // 5*6b6b8c4fSAdrian McCarthy // This file is distributed under the University of Illinois Open Source 6*6b6b8c4fSAdrian McCarthy // License. See LICENSE.TXT for details. 7*6b6b8c4fSAdrian McCarthy // 8*6b6b8c4fSAdrian McCarthy //===----------------------------------------------------------------------===// 9*6b6b8c4fSAdrian McCarthy 10*6b6b8c4fSAdrian McCarthy #include "llvm/DebugInfo/PDB/Native/HashTable.h" 11*6b6b8c4fSAdrian McCarthy 12*6b6b8c4fSAdrian McCarthy #include "llvm/ADT/Optional.h" 13*6b6b8c4fSAdrian McCarthy #include "llvm/ADT/SparseBitVector.h" 14*6b6b8c4fSAdrian McCarthy #include "llvm/DebugInfo/PDB/Native/RawError.h" 15*6b6b8c4fSAdrian McCarthy 16*6b6b8c4fSAdrian McCarthy #include <assert.h> 17*6b6b8c4fSAdrian McCarthy 18*6b6b8c4fSAdrian McCarthy using namespace llvm; 19*6b6b8c4fSAdrian McCarthy using namespace llvm::pdb; 20*6b6b8c4fSAdrian McCarthy 21*6b6b8c4fSAdrian McCarthy HashTable::HashTable() : HashTable(8) {} 22*6b6b8c4fSAdrian McCarthy 23*6b6b8c4fSAdrian McCarthy HashTable::HashTable(uint32_t Capacity) { Buckets.resize(Capacity); } 24*6b6b8c4fSAdrian McCarthy 25*6b6b8c4fSAdrian McCarthy Error HashTable::load(msf::StreamReader &Stream) { 26*6b6b8c4fSAdrian McCarthy const Header *H; 27*6b6b8c4fSAdrian McCarthy if (auto EC = Stream.readObject(H)) 28*6b6b8c4fSAdrian McCarthy return EC; 29*6b6b8c4fSAdrian McCarthy if (H->Capacity == 0) 30*6b6b8c4fSAdrian McCarthy return make_error<RawError>(raw_error_code::corrupt_file, 31*6b6b8c4fSAdrian McCarthy "Invalid Hash Table Capacity"); 32*6b6b8c4fSAdrian McCarthy if (H->Size > maxLoad(H->Capacity)) 33*6b6b8c4fSAdrian McCarthy return make_error<RawError>(raw_error_code::corrupt_file, 34*6b6b8c4fSAdrian McCarthy "Invalid Hash Table Size"); 35*6b6b8c4fSAdrian McCarthy 36*6b6b8c4fSAdrian McCarthy Buckets.resize(H->Capacity); 37*6b6b8c4fSAdrian McCarthy 38*6b6b8c4fSAdrian McCarthy if (auto EC = readSparseBitVector(Stream, Present)) 39*6b6b8c4fSAdrian McCarthy return EC; 40*6b6b8c4fSAdrian McCarthy if (Present.count() != H->Size) 41*6b6b8c4fSAdrian McCarthy return make_error<RawError>(raw_error_code::corrupt_file, 42*6b6b8c4fSAdrian McCarthy "Present bit vector does not match size!"); 43*6b6b8c4fSAdrian McCarthy 44*6b6b8c4fSAdrian McCarthy if (auto EC = readSparseBitVector(Stream, Deleted)) 45*6b6b8c4fSAdrian McCarthy return EC; 46*6b6b8c4fSAdrian McCarthy if (Present.intersects(Deleted)) 47*6b6b8c4fSAdrian McCarthy return make_error<RawError>(raw_error_code::corrupt_file, 48*6b6b8c4fSAdrian McCarthy "Present bit vector interesects deleted!"); 49*6b6b8c4fSAdrian McCarthy 50*6b6b8c4fSAdrian McCarthy for (uint32_t P : Present) { 51*6b6b8c4fSAdrian McCarthy if (auto EC = Stream.readInteger(Buckets[P].first)) 52*6b6b8c4fSAdrian McCarthy return EC; 53*6b6b8c4fSAdrian McCarthy if (auto EC = Stream.readInteger(Buckets[P].second)) 54*6b6b8c4fSAdrian McCarthy return EC; 55*6b6b8c4fSAdrian McCarthy } 56*6b6b8c4fSAdrian McCarthy 57*6b6b8c4fSAdrian McCarthy return Error::success(); 58*6b6b8c4fSAdrian McCarthy } 59*6b6b8c4fSAdrian McCarthy 60*6b6b8c4fSAdrian McCarthy uint32_t HashTable::calculateSerializedLength() const { 61*6b6b8c4fSAdrian McCarthy uint32_t Size = sizeof(Header); 62*6b6b8c4fSAdrian McCarthy 63*6b6b8c4fSAdrian McCarthy int NumBitsP = Present.find_last() + 1; 64*6b6b8c4fSAdrian McCarthy int NumBitsD = Deleted.find_last() + 1; 65*6b6b8c4fSAdrian McCarthy 66*6b6b8c4fSAdrian McCarthy // Present bit set number of words, followed by that many actual words. 67*6b6b8c4fSAdrian McCarthy Size += sizeof(uint32_t); 68*6b6b8c4fSAdrian McCarthy Size += alignTo(NumBitsP, sizeof(uint32_t)); 69*6b6b8c4fSAdrian McCarthy 70*6b6b8c4fSAdrian McCarthy // Deleted bit set number of words, followed by that many actual words. 71*6b6b8c4fSAdrian McCarthy Size += sizeof(uint32_t); 72*6b6b8c4fSAdrian McCarthy Size += alignTo(NumBitsD, sizeof(uint32_t)); 73*6b6b8c4fSAdrian McCarthy 74*6b6b8c4fSAdrian McCarthy // One (Key, Value) pair for each entry Present. 75*6b6b8c4fSAdrian McCarthy Size += 2 * sizeof(uint32_t) * size(); 76*6b6b8c4fSAdrian McCarthy 77*6b6b8c4fSAdrian McCarthy return Size; 78*6b6b8c4fSAdrian McCarthy } 79*6b6b8c4fSAdrian McCarthy 80*6b6b8c4fSAdrian McCarthy Error HashTable::commit(msf::StreamWriter &Writer) const { 81*6b6b8c4fSAdrian McCarthy Header H; 82*6b6b8c4fSAdrian McCarthy H.Size = size(); 83*6b6b8c4fSAdrian McCarthy H.Capacity = capacity(); 84*6b6b8c4fSAdrian McCarthy if (auto EC = Writer.writeObject(H)) 85*6b6b8c4fSAdrian McCarthy return EC; 86*6b6b8c4fSAdrian McCarthy 87*6b6b8c4fSAdrian McCarthy if (auto EC = writeSparseBitVector(Writer, Present)) 88*6b6b8c4fSAdrian McCarthy return EC; 89*6b6b8c4fSAdrian McCarthy 90*6b6b8c4fSAdrian McCarthy if (auto EC = writeSparseBitVector(Writer, Deleted)) 91*6b6b8c4fSAdrian McCarthy return EC; 92*6b6b8c4fSAdrian McCarthy 93*6b6b8c4fSAdrian McCarthy for (const auto &Entry : *this) { 94*6b6b8c4fSAdrian McCarthy if (auto EC = Writer.writeInteger(Entry.first)) 95*6b6b8c4fSAdrian McCarthy return EC; 96*6b6b8c4fSAdrian McCarthy if (auto EC = Writer.writeInteger(Entry.second)) 97*6b6b8c4fSAdrian McCarthy return EC; 98*6b6b8c4fSAdrian McCarthy } 99*6b6b8c4fSAdrian McCarthy return Error::success(); 100*6b6b8c4fSAdrian McCarthy } 101*6b6b8c4fSAdrian McCarthy 102*6b6b8c4fSAdrian McCarthy void HashTable::clear() { 103*6b6b8c4fSAdrian McCarthy Buckets.resize(8); 104*6b6b8c4fSAdrian McCarthy Present.clear(); 105*6b6b8c4fSAdrian McCarthy Deleted.clear(); 106*6b6b8c4fSAdrian McCarthy } 107*6b6b8c4fSAdrian McCarthy 108*6b6b8c4fSAdrian McCarthy uint32_t HashTable::capacity() const { return Buckets.size(); } 109*6b6b8c4fSAdrian McCarthy uint32_t HashTable::size() const { return Present.count(); } 110*6b6b8c4fSAdrian McCarthy 111*6b6b8c4fSAdrian McCarthy HashTableIterator HashTable::begin() const { return HashTableIterator(*this); } 112*6b6b8c4fSAdrian McCarthy HashTableIterator HashTable::end() const { 113*6b6b8c4fSAdrian McCarthy return HashTableIterator(*this, 0, true); 114*6b6b8c4fSAdrian McCarthy } 115*6b6b8c4fSAdrian McCarthy 116*6b6b8c4fSAdrian McCarthy HashTableIterator HashTable::find(uint32_t K) { 117*6b6b8c4fSAdrian McCarthy uint32_t H = K % capacity(); 118*6b6b8c4fSAdrian McCarthy uint32_t I = H; 119*6b6b8c4fSAdrian McCarthy Optional<uint32_t> FirstUnused; 120*6b6b8c4fSAdrian McCarthy do { 121*6b6b8c4fSAdrian McCarthy if (isPresent(I)) { 122*6b6b8c4fSAdrian McCarthy if (Buckets[I].first == K) 123*6b6b8c4fSAdrian McCarthy return HashTableIterator(*this, I, false); 124*6b6b8c4fSAdrian McCarthy } else { 125*6b6b8c4fSAdrian McCarthy if (!FirstUnused) 126*6b6b8c4fSAdrian McCarthy FirstUnused = I; 127*6b6b8c4fSAdrian McCarthy // Insertion occurs via linear probing from the slot hint, and will be 128*6b6b8c4fSAdrian McCarthy // inserted at the first empty / deleted location. Therefore, if we are 129*6b6b8c4fSAdrian McCarthy // probing and find a location that is neither present nor deleted, then 130*6b6b8c4fSAdrian McCarthy // nothing must have EVER been inserted at this location, and thus it is 131*6b6b8c4fSAdrian McCarthy // not possible for a matching value to occur later. 132*6b6b8c4fSAdrian McCarthy if (!isDeleted(I)) 133*6b6b8c4fSAdrian McCarthy break; 134*6b6b8c4fSAdrian McCarthy } 135*6b6b8c4fSAdrian McCarthy I = (I + 1) % capacity(); 136*6b6b8c4fSAdrian McCarthy } while (I != H); 137*6b6b8c4fSAdrian McCarthy 138*6b6b8c4fSAdrian McCarthy // The only way FirstUnused would not be set is if every single entry in the 139*6b6b8c4fSAdrian McCarthy // table were Present. But this would violate the load factor constraints 140*6b6b8c4fSAdrian McCarthy // that we impose, so it should never happen. 141*6b6b8c4fSAdrian McCarthy assert(FirstUnused); 142*6b6b8c4fSAdrian McCarthy return HashTableIterator(*this, *FirstUnused, true); 143*6b6b8c4fSAdrian McCarthy } 144*6b6b8c4fSAdrian McCarthy 145*6b6b8c4fSAdrian McCarthy void HashTable::set(uint32_t K, uint32_t V) { 146*6b6b8c4fSAdrian McCarthy auto Entry = find(K); 147*6b6b8c4fSAdrian McCarthy if (Entry != end()) { 148*6b6b8c4fSAdrian McCarthy assert(isPresent(Entry.index())); 149*6b6b8c4fSAdrian McCarthy assert(Buckets[Entry.index()].first == K); 150*6b6b8c4fSAdrian McCarthy // We're updating, no need to do anything special. 151*6b6b8c4fSAdrian McCarthy Buckets[Entry.index()].second = V; 152*6b6b8c4fSAdrian McCarthy return; 153*6b6b8c4fSAdrian McCarthy } 154*6b6b8c4fSAdrian McCarthy 155*6b6b8c4fSAdrian McCarthy auto &B = Buckets[Entry.index()]; 156*6b6b8c4fSAdrian McCarthy assert(!isPresent(Entry.index())); 157*6b6b8c4fSAdrian McCarthy assert(Entry.isEnd()); 158*6b6b8c4fSAdrian McCarthy B.first = K; 159*6b6b8c4fSAdrian McCarthy B.second = V; 160*6b6b8c4fSAdrian McCarthy Present.set(Entry.index()); 161*6b6b8c4fSAdrian McCarthy Deleted.reset(Entry.index()); 162*6b6b8c4fSAdrian McCarthy 163*6b6b8c4fSAdrian McCarthy grow(); 164*6b6b8c4fSAdrian McCarthy 165*6b6b8c4fSAdrian McCarthy assert(find(K) != end()); 166*6b6b8c4fSAdrian McCarthy } 167*6b6b8c4fSAdrian McCarthy 168*6b6b8c4fSAdrian McCarthy void HashTable::remove(uint32_t K) { 169*6b6b8c4fSAdrian McCarthy auto Iter = find(K); 170*6b6b8c4fSAdrian McCarthy // It wasn't here to begin with, just exit. 171*6b6b8c4fSAdrian McCarthy if (Iter == end()) 172*6b6b8c4fSAdrian McCarthy return; 173*6b6b8c4fSAdrian McCarthy 174*6b6b8c4fSAdrian McCarthy assert(Present.test(Iter.index())); 175*6b6b8c4fSAdrian McCarthy assert(!Deleted.test(Iter.index())); 176*6b6b8c4fSAdrian McCarthy Deleted.set(Iter.index()); 177*6b6b8c4fSAdrian McCarthy Present.reset(Iter.index()); 178*6b6b8c4fSAdrian McCarthy } 179*6b6b8c4fSAdrian McCarthy 180*6b6b8c4fSAdrian McCarthy uint32_t HashTable::get(uint32_t K) { 181*6b6b8c4fSAdrian McCarthy auto I = find(K); 182*6b6b8c4fSAdrian McCarthy assert(I != end()); 183*6b6b8c4fSAdrian McCarthy return (*I).second; 184*6b6b8c4fSAdrian McCarthy } 185*6b6b8c4fSAdrian McCarthy 186*6b6b8c4fSAdrian McCarthy uint32_t HashTable::maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; } 187*6b6b8c4fSAdrian McCarthy 188*6b6b8c4fSAdrian McCarthy void HashTable::grow() { 189*6b6b8c4fSAdrian McCarthy uint32_t S = size(); 190*6b6b8c4fSAdrian McCarthy if (S < maxLoad(capacity())) 191*6b6b8c4fSAdrian McCarthy return; 192*6b6b8c4fSAdrian McCarthy assert(capacity() != UINT32_MAX && "Can't grow Hash table!"); 193*6b6b8c4fSAdrian McCarthy 194*6b6b8c4fSAdrian McCarthy uint32_t NewCapacity = 195*6b6b8c4fSAdrian McCarthy (capacity() <= INT32_MAX) ? capacity() * 2 : UINT32_MAX; 196*6b6b8c4fSAdrian McCarthy 197*6b6b8c4fSAdrian McCarthy // Growing requires rebuilding the table and re-hashing every item. Make a 198*6b6b8c4fSAdrian McCarthy // copy with a larger capacity, insert everything into the copy, then swap 199*6b6b8c4fSAdrian McCarthy // it in. 200*6b6b8c4fSAdrian McCarthy HashTable NewMap(NewCapacity); 201*6b6b8c4fSAdrian McCarthy for (auto I : Present) { 202*6b6b8c4fSAdrian McCarthy NewMap.set(Buckets[I].first, Buckets[I].second); 203*6b6b8c4fSAdrian McCarthy } 204*6b6b8c4fSAdrian McCarthy 205*6b6b8c4fSAdrian McCarthy Buckets.swap(NewMap.Buckets); 206*6b6b8c4fSAdrian McCarthy std::swap(Present, NewMap.Present); 207*6b6b8c4fSAdrian McCarthy std::swap(Deleted, NewMap.Deleted); 208*6b6b8c4fSAdrian McCarthy assert(capacity() == NewCapacity); 209*6b6b8c4fSAdrian McCarthy assert(size() == S); 210*6b6b8c4fSAdrian McCarthy } 211*6b6b8c4fSAdrian McCarthy 212*6b6b8c4fSAdrian McCarthy Error HashTable::readSparseBitVector(msf::StreamReader &Stream, 213*6b6b8c4fSAdrian McCarthy SparseBitVector<> &V) { 214*6b6b8c4fSAdrian McCarthy uint32_t NumWords; 215*6b6b8c4fSAdrian McCarthy if (auto EC = Stream.readInteger(NumWords)) 216*6b6b8c4fSAdrian McCarthy return joinErrors( 217*6b6b8c4fSAdrian McCarthy std::move(EC), 218*6b6b8c4fSAdrian McCarthy make_error<RawError>(raw_error_code::corrupt_file, 219*6b6b8c4fSAdrian McCarthy "Expected hash table number of words")); 220*6b6b8c4fSAdrian McCarthy 221*6b6b8c4fSAdrian McCarthy for (uint32_t I = 0; I != NumWords; ++I) { 222*6b6b8c4fSAdrian McCarthy uint32_t Word; 223*6b6b8c4fSAdrian McCarthy if (auto EC = Stream.readInteger(Word)) 224*6b6b8c4fSAdrian McCarthy return joinErrors(std::move(EC), 225*6b6b8c4fSAdrian McCarthy make_error<RawError>(raw_error_code::corrupt_file, 226*6b6b8c4fSAdrian McCarthy "Expected hash table word")); 227*6b6b8c4fSAdrian McCarthy for (unsigned Idx = 0; Idx < 32; ++Idx) 228*6b6b8c4fSAdrian McCarthy if (Word & (1U << Idx)) 229*6b6b8c4fSAdrian McCarthy V.set((I * 32) + Idx); 230*6b6b8c4fSAdrian McCarthy } 231*6b6b8c4fSAdrian McCarthy return Error::success(); 232*6b6b8c4fSAdrian McCarthy } 233*6b6b8c4fSAdrian McCarthy 234*6b6b8c4fSAdrian McCarthy Error HashTable::writeSparseBitVector(msf::StreamWriter &Writer, 235*6b6b8c4fSAdrian McCarthy SparseBitVector<> &Vec) { 236*6b6b8c4fSAdrian McCarthy int ReqBits = Vec.find_last() + 1; 237*6b6b8c4fSAdrian McCarthy uint32_t NumWords = alignTo(ReqBits, sizeof(uint32_t)) / sizeof(uint32_t); 238*6b6b8c4fSAdrian McCarthy if (auto EC = Writer.writeInteger(NumWords)) 239*6b6b8c4fSAdrian McCarthy return joinErrors( 240*6b6b8c4fSAdrian McCarthy std::move(EC), 241*6b6b8c4fSAdrian McCarthy make_error<RawError>(raw_error_code::corrupt_file, 242*6b6b8c4fSAdrian McCarthy "Could not write linear map number of words")); 243*6b6b8c4fSAdrian McCarthy 244*6b6b8c4fSAdrian McCarthy uint32_t Idx = 0; 245*6b6b8c4fSAdrian McCarthy for (uint32_t I = 0; I != NumWords; ++I) { 246*6b6b8c4fSAdrian McCarthy uint32_t Word = 0; 247*6b6b8c4fSAdrian McCarthy for (uint32_t WordIdx = 0; WordIdx < 32; ++WordIdx, ++Idx) { 248*6b6b8c4fSAdrian McCarthy if (Vec.test(Idx)) 249*6b6b8c4fSAdrian McCarthy Word |= (1 << WordIdx); 250*6b6b8c4fSAdrian McCarthy } 251*6b6b8c4fSAdrian McCarthy if (auto EC = Writer.writeInteger(Word)) 252*6b6b8c4fSAdrian McCarthy return joinErrors(std::move(EC), make_error<RawError>( 253*6b6b8c4fSAdrian McCarthy raw_error_code::corrupt_file, 254*6b6b8c4fSAdrian McCarthy "Could not write linear map word")); 255*6b6b8c4fSAdrian McCarthy } 256*6b6b8c4fSAdrian McCarthy return Error::success(); 257*6b6b8c4fSAdrian McCarthy } 258*6b6b8c4fSAdrian McCarthy 259*6b6b8c4fSAdrian McCarthy HashTableIterator::HashTableIterator(const HashTable &Map, uint32_t Index, 260*6b6b8c4fSAdrian McCarthy bool IsEnd) 261*6b6b8c4fSAdrian McCarthy : Map(&Map), Index(Index), IsEnd(IsEnd) {} 262*6b6b8c4fSAdrian McCarthy 263*6b6b8c4fSAdrian McCarthy HashTableIterator::HashTableIterator(const HashTable &Map) : Map(&Map) { 264*6b6b8c4fSAdrian McCarthy int I = Map.Present.find_first(); 265*6b6b8c4fSAdrian McCarthy if (I == -1) { 266*6b6b8c4fSAdrian McCarthy Index = 0; 267*6b6b8c4fSAdrian McCarthy IsEnd = true; 268*6b6b8c4fSAdrian McCarthy } else { 269*6b6b8c4fSAdrian McCarthy Index = static_cast<uint32_t>(I); 270*6b6b8c4fSAdrian McCarthy IsEnd = false; 271*6b6b8c4fSAdrian McCarthy } 272*6b6b8c4fSAdrian McCarthy } 273*6b6b8c4fSAdrian McCarthy 274*6b6b8c4fSAdrian McCarthy HashTableIterator &HashTableIterator::operator=(const HashTableIterator &R) { 275*6b6b8c4fSAdrian McCarthy Map = R.Map; 276*6b6b8c4fSAdrian McCarthy return *this; 277*6b6b8c4fSAdrian McCarthy } 278*6b6b8c4fSAdrian McCarthy 279*6b6b8c4fSAdrian McCarthy bool HashTableIterator::operator==(const HashTableIterator &R) const { 280*6b6b8c4fSAdrian McCarthy if (IsEnd && R.IsEnd) 281*6b6b8c4fSAdrian McCarthy return true; 282*6b6b8c4fSAdrian McCarthy if (IsEnd != R.IsEnd) 283*6b6b8c4fSAdrian McCarthy return false; 284*6b6b8c4fSAdrian McCarthy 285*6b6b8c4fSAdrian McCarthy return (Map == R.Map) && (Index == R.Index); 286*6b6b8c4fSAdrian McCarthy } 287*6b6b8c4fSAdrian McCarthy 288*6b6b8c4fSAdrian McCarthy const std::pair<uint32_t, uint32_t> &HashTableIterator::operator*() const { 289*6b6b8c4fSAdrian McCarthy assert(Map->Present.test(Index)); 290*6b6b8c4fSAdrian McCarthy return Map->Buckets[Index]; 291*6b6b8c4fSAdrian McCarthy } 292*6b6b8c4fSAdrian McCarthy 293*6b6b8c4fSAdrian McCarthy HashTableIterator &HashTableIterator::operator++() { 294*6b6b8c4fSAdrian McCarthy while (Index < Map->Buckets.size()) { 295*6b6b8c4fSAdrian McCarthy ++Index; 296*6b6b8c4fSAdrian McCarthy if (Map->Present.test(Index)) 297*6b6b8c4fSAdrian McCarthy return *this; 298*6b6b8c4fSAdrian McCarthy } 299*6b6b8c4fSAdrian McCarthy 300*6b6b8c4fSAdrian McCarthy IsEnd = true; 301*6b6b8c4fSAdrian McCarthy return *this; 302*6b6b8c4fSAdrian McCarthy } 303