1 //===- HashTable.cpp - PDB Hash Table ---------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/DebugInfo/PDB/Native/HashTable.h" 11 12 #include "llvm/ADT/Optional.h" 13 #include "llvm/ADT/SparseBitVector.h" 14 #include "llvm/DebugInfo/PDB/Native/RawError.h" 15 16 #include <assert.h> 17 18 using namespace llvm; 19 using namespace llvm::pdb; 20 21 HashTable::HashTable() : HashTable(8) {} 22 23 HashTable::HashTable(uint32_t Capacity) { Buckets.resize(Capacity); } 24 25 Error HashTable::load(msf::StreamReader &Stream) { 26 const Header *H; 27 if (auto EC = Stream.readObject(H)) 28 return EC; 29 if (H->Capacity == 0) 30 return make_error<RawError>(raw_error_code::corrupt_file, 31 "Invalid Hash Table Capacity"); 32 if (H->Size > maxLoad(H->Capacity)) 33 return make_error<RawError>(raw_error_code::corrupt_file, 34 "Invalid Hash Table Size"); 35 36 Buckets.resize(H->Capacity); 37 38 if (auto EC = readSparseBitVector(Stream, Present)) 39 return EC; 40 if (Present.count() != H->Size) 41 return make_error<RawError>(raw_error_code::corrupt_file, 42 "Present bit vector does not match size!"); 43 44 if (auto EC = readSparseBitVector(Stream, Deleted)) 45 return EC; 46 if (Present.intersects(Deleted)) 47 return make_error<RawError>(raw_error_code::corrupt_file, 48 "Present bit vector interesects deleted!"); 49 50 for (uint32_t P : Present) { 51 if (auto EC = Stream.readInteger(Buckets[P].first, llvm::support::little)) 52 return EC; 53 if (auto EC = Stream.readInteger(Buckets[P].second, llvm::support::little)) 54 return EC; 55 } 56 57 return Error::success(); 58 } 59 60 uint32_t HashTable::calculateSerializedLength() const { 61 uint32_t Size = sizeof(Header); 62 63 int NumBitsP = Present.find_last() + 1; 64 int NumBitsD = Deleted.find_last() + 1; 65 66 // Present bit set number of words, followed by that many actual words. 67 Size += sizeof(uint32_t); 68 Size += alignTo(NumBitsP, sizeof(uint32_t)); 69 70 // Deleted bit set number of words, followed by that many actual words. 71 Size += sizeof(uint32_t); 72 Size += alignTo(NumBitsD, sizeof(uint32_t)); 73 74 // One (Key, Value) pair for each entry Present. 75 Size += 2 * sizeof(uint32_t) * size(); 76 77 return Size; 78 } 79 80 Error HashTable::commit(msf::StreamWriter &Writer) const { 81 Header H; 82 H.Size = size(); 83 H.Capacity = capacity(); 84 if (auto EC = Writer.writeObject(H)) 85 return EC; 86 87 if (auto EC = writeSparseBitVector(Writer, Present)) 88 return EC; 89 90 if (auto EC = writeSparseBitVector(Writer, Deleted)) 91 return EC; 92 93 for (const auto &Entry : *this) { 94 if (auto EC = Writer.writeInteger(Entry.first, llvm::support::little)) 95 return EC; 96 if (auto EC = Writer.writeInteger(Entry.second, llvm::support::little)) 97 return EC; 98 } 99 return Error::success(); 100 } 101 102 void HashTable::clear() { 103 Buckets.resize(8); 104 Present.clear(); 105 Deleted.clear(); 106 } 107 108 uint32_t HashTable::capacity() const { return Buckets.size(); } 109 uint32_t HashTable::size() const { return Present.count(); } 110 111 HashTableIterator HashTable::begin() const { return HashTableIterator(*this); } 112 HashTableIterator HashTable::end() const { 113 return HashTableIterator(*this, 0, true); 114 } 115 116 HashTableIterator HashTable::find(uint32_t K) { 117 uint32_t H = K % capacity(); 118 uint32_t I = H; 119 Optional<uint32_t> FirstUnused; 120 do { 121 if (isPresent(I)) { 122 if (Buckets[I].first == K) 123 return HashTableIterator(*this, I, false); 124 } else { 125 if (!FirstUnused) 126 FirstUnused = I; 127 // Insertion occurs via linear probing from the slot hint, and will be 128 // inserted at the first empty / deleted location. Therefore, if we are 129 // probing and find a location that is neither present nor deleted, then 130 // nothing must have EVER been inserted at this location, and thus it is 131 // not possible for a matching value to occur later. 132 if (!isDeleted(I)) 133 break; 134 } 135 I = (I + 1) % capacity(); 136 } while (I != H); 137 138 // The only way FirstUnused would not be set is if every single entry in the 139 // table were Present. But this would violate the load factor constraints 140 // that we impose, so it should never happen. 141 assert(FirstUnused); 142 return HashTableIterator(*this, *FirstUnused, true); 143 } 144 145 void HashTable::set(uint32_t K, uint32_t V) { 146 auto Entry = find(K); 147 if (Entry != end()) { 148 assert(isPresent(Entry.index())); 149 assert(Buckets[Entry.index()].first == K); 150 // We're updating, no need to do anything special. 151 Buckets[Entry.index()].second = V; 152 return; 153 } 154 155 auto &B = Buckets[Entry.index()]; 156 assert(!isPresent(Entry.index())); 157 assert(Entry.isEnd()); 158 B.first = K; 159 B.second = V; 160 Present.set(Entry.index()); 161 Deleted.reset(Entry.index()); 162 163 grow(); 164 165 assert(find(K) != end()); 166 } 167 168 void HashTable::remove(uint32_t K) { 169 auto Iter = find(K); 170 // It wasn't here to begin with, just exit. 171 if (Iter == end()) 172 return; 173 174 assert(Present.test(Iter.index())); 175 assert(!Deleted.test(Iter.index())); 176 Deleted.set(Iter.index()); 177 Present.reset(Iter.index()); 178 } 179 180 uint32_t HashTable::get(uint32_t K) { 181 auto I = find(K); 182 assert(I != end()); 183 return (*I).second; 184 } 185 186 uint32_t HashTable::maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; } 187 188 void HashTable::grow() { 189 uint32_t S = size(); 190 if (S < maxLoad(capacity())) 191 return; 192 assert(capacity() != UINT32_MAX && "Can't grow Hash table!"); 193 194 uint32_t NewCapacity = 195 (capacity() <= INT32_MAX) ? capacity() * 2 : UINT32_MAX; 196 197 // Growing requires rebuilding the table and re-hashing every item. Make a 198 // copy with a larger capacity, insert everything into the copy, then swap 199 // it in. 200 HashTable NewMap(NewCapacity); 201 for (auto I : Present) { 202 NewMap.set(Buckets[I].first, Buckets[I].second); 203 } 204 205 Buckets.swap(NewMap.Buckets); 206 std::swap(Present, NewMap.Present); 207 std::swap(Deleted, NewMap.Deleted); 208 assert(capacity() == NewCapacity); 209 assert(size() == S); 210 } 211 212 Error HashTable::readSparseBitVector(msf::StreamReader &Stream, 213 SparseBitVector<> &V) { 214 uint32_t NumWords; 215 if (auto EC = Stream.readInteger(NumWords, llvm::support::little)) 216 return joinErrors( 217 std::move(EC), 218 make_error<RawError>(raw_error_code::corrupt_file, 219 "Expected hash table number of words")); 220 221 for (uint32_t I = 0; I != NumWords; ++I) { 222 uint32_t Word; 223 if (auto EC = Stream.readInteger(Word, llvm::support::little)) 224 return joinErrors(std::move(EC), 225 make_error<RawError>(raw_error_code::corrupt_file, 226 "Expected hash table word")); 227 for (unsigned Idx = 0; Idx < 32; ++Idx) 228 if (Word & (1U << Idx)) 229 V.set((I * 32) + Idx); 230 } 231 return Error::success(); 232 } 233 234 Error HashTable::writeSparseBitVector(msf::StreamWriter &Writer, 235 SparseBitVector<> &Vec) { 236 int ReqBits = Vec.find_last() + 1; 237 uint32_t NumWords = alignTo(ReqBits, sizeof(uint32_t)) / sizeof(uint32_t); 238 if (auto EC = Writer.writeInteger(NumWords, llvm::support::little)) 239 return joinErrors( 240 std::move(EC), 241 make_error<RawError>(raw_error_code::corrupt_file, 242 "Could not write linear map number of words")); 243 244 uint32_t Idx = 0; 245 for (uint32_t I = 0; I != NumWords; ++I) { 246 uint32_t Word = 0; 247 for (uint32_t WordIdx = 0; WordIdx < 32; ++WordIdx, ++Idx) { 248 if (Vec.test(Idx)) 249 Word |= (1 << WordIdx); 250 } 251 if (auto EC = Writer.writeInteger(Word, llvm::support::little)) 252 return joinErrors(std::move(EC), make_error<RawError>( 253 raw_error_code::corrupt_file, 254 "Could not write linear map word")); 255 } 256 return Error::success(); 257 } 258 259 HashTableIterator::HashTableIterator(const HashTable &Map, uint32_t Index, 260 bool IsEnd) 261 : Map(&Map), Index(Index), IsEnd(IsEnd) {} 262 263 HashTableIterator::HashTableIterator(const HashTable &Map) : Map(&Map) { 264 int I = Map.Present.find_first(); 265 if (I == -1) { 266 Index = 0; 267 IsEnd = true; 268 } else { 269 Index = static_cast<uint32_t>(I); 270 IsEnd = false; 271 } 272 } 273 274 HashTableIterator &HashTableIterator::operator=(const HashTableIterator &R) { 275 Map = R.Map; 276 return *this; 277 } 278 279 bool HashTableIterator::operator==(const HashTableIterator &R) const { 280 if (IsEnd && R.IsEnd) 281 return true; 282 if (IsEnd != R.IsEnd) 283 return false; 284 285 return (Map == R.Map) && (Index == R.Index); 286 } 287 288 const std::pair<uint32_t, uint32_t> &HashTableIterator::operator*() const { 289 assert(Map->Present.test(Index)); 290 return Map->Buckets[Index]; 291 } 292 293 HashTableIterator &HashTableIterator::operator++() { 294 while (Index < Map->Buckets.size()) { 295 ++Index; 296 if (Map->Present.test(Index)) 297 return *this; 298 } 299 300 IsEnd = true; 301 return *this; 302 } 303