10b57cec5SDimitry Andric //===--- StringMap.cpp - String Hash table map implementation -------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements the StringMap class. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "llvm/ADT/StringMap.h" 140b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h" 1506c3fb27SDimitry Andric #include "llvm/Support/ReverseIteration.h" 1606c3fb27SDimitry Andric #include "llvm/Support/xxhash.h" 170b57cec5SDimitry Andric 180b57cec5SDimitry Andric using namespace llvm; 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric /// Returns the number of buckets to allocate to ensure that the DenseMap can 210b57cec5SDimitry Andric /// accommodate \p NumEntries without need to grow(). 2281ad6265SDimitry Andric static inline unsigned getMinBucketToReserveForEntries(unsigned NumEntries) { 230b57cec5SDimitry Andric // Ensure that "NumEntries * 4 < NumBuckets * 3" 240b57cec5SDimitry Andric if (NumEntries == 0) 250b57cec5SDimitry Andric return 0; 260b57cec5SDimitry Andric // +1 is required because of the strict equality. 270b57cec5SDimitry Andric // For example if NumEntries is 48, we need to return 401. 280b57cec5SDimitry Andric return NextPowerOf2(NumEntries * 4 / 3 + 1); 290b57cec5SDimitry Andric } 300b57cec5SDimitry Andric 3181ad6265SDimitry Andric static inline StringMapEntryBase **createTable(unsigned NewNumBuckets) { 3281ad6265SDimitry Andric auto **Table = static_cast<StringMapEntryBase **>(safe_calloc( 3381ad6265SDimitry Andric NewNumBuckets + 1, sizeof(StringMapEntryBase **) + sizeof(unsigned))); 3481ad6265SDimitry Andric 3581ad6265SDimitry Andric // Allocate one extra bucket, set it to look filled so the iterators stop at 3681ad6265SDimitry Andric // end. 3781ad6265SDimitry Andric Table[NewNumBuckets] = (StringMapEntryBase *)2; 3881ad6265SDimitry Andric return Table; 3981ad6265SDimitry Andric } 4081ad6265SDimitry Andric 4181ad6265SDimitry Andric static inline unsigned *getHashTable(StringMapEntryBase **TheTable, 4281ad6265SDimitry Andric unsigned NumBuckets) { 4381ad6265SDimitry Andric return reinterpret_cast<unsigned *>(TheTable + NumBuckets + 1); 4481ad6265SDimitry Andric } 4581ad6265SDimitry Andric 46*0fca6ea1SDimitry Andric uint32_t StringMapImpl::hash(StringRef Key) { return xxh3_64bits(Key); } 47*0fca6ea1SDimitry Andric 480b57cec5SDimitry Andric StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) { 490b57cec5SDimitry Andric ItemSize = itemSize; 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric // If a size is specified, initialize the table with that many buckets. 520b57cec5SDimitry Andric if (InitSize) { 530b57cec5SDimitry Andric // The table will grow when the number of entries reach 3/4 of the number of 540b57cec5SDimitry Andric // buckets. To guarantee that "InitSize" number of entries can be inserted 550b57cec5SDimitry Andric // in the table without growing, we allocate just what is needed here. 560b57cec5SDimitry Andric init(getMinBucketToReserveForEntries(InitSize)); 570b57cec5SDimitry Andric return; 580b57cec5SDimitry Andric } 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric // Otherwise, initialize it with zero buckets to avoid the allocation. 610b57cec5SDimitry Andric TheTable = nullptr; 620b57cec5SDimitry Andric NumBuckets = 0; 630b57cec5SDimitry Andric NumItems = 0; 640b57cec5SDimitry Andric NumTombstones = 0; 650b57cec5SDimitry Andric } 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric void StringMapImpl::init(unsigned InitSize) { 680b57cec5SDimitry Andric assert((InitSize & (InitSize - 1)) == 0 && 690b57cec5SDimitry Andric "Init Size must be a power of 2 or zero!"); 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric unsigned NewNumBuckets = InitSize ? InitSize : 16; 720b57cec5SDimitry Andric NumItems = 0; 730b57cec5SDimitry Andric NumTombstones = 0; 740b57cec5SDimitry Andric 7581ad6265SDimitry Andric TheTable = createTable(NewNumBuckets); 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric // Set the member only if TheTable was successfully allocated 780b57cec5SDimitry Andric NumBuckets = NewNumBuckets; 790b57cec5SDimitry Andric } 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric /// LookupBucketFor - Look up the bucket that the specified string should end 820b57cec5SDimitry Andric /// up in. If it already exists as a key in the map, the Item pointer for the 830b57cec5SDimitry Andric /// specified bucket will be non-null. Otherwise, it will be null. In either 840b57cec5SDimitry Andric /// case, the FullHashValue field of the bucket will be set to the hash value 850b57cec5SDimitry Andric /// of the string. 86*0fca6ea1SDimitry Andric unsigned StringMapImpl::LookupBucketFor(StringRef Name, 87*0fca6ea1SDimitry Andric uint32_t FullHashValue) { 88*0fca6ea1SDimitry Andric #ifdef EXPENSIVE_CHECKS 89*0fca6ea1SDimitry Andric assert(FullHashValue == hash(Name)); 90*0fca6ea1SDimitry Andric #endif 9181ad6265SDimitry Andric // Hash table unallocated so far? 9281ad6265SDimitry Andric if (NumBuckets == 0) 930b57cec5SDimitry Andric init(16); 9406c3fb27SDimitry Andric if (shouldReverseIterate()) 9506c3fb27SDimitry Andric FullHashValue = ~FullHashValue; 9681ad6265SDimitry Andric unsigned BucketNo = FullHashValue & (NumBuckets - 1); 9781ad6265SDimitry Andric unsigned *HashTable = getHashTable(TheTable, NumBuckets); 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric unsigned ProbeAmt = 1; 1000b57cec5SDimitry Andric int FirstTombstone = -1; 1010b57cec5SDimitry Andric while (true) { 1020b57cec5SDimitry Andric StringMapEntryBase *BucketItem = TheTable[BucketNo]; 1030b57cec5SDimitry Andric // If we found an empty bucket, this key isn't in the table yet, return it. 1040b57cec5SDimitry Andric if (LLVM_LIKELY(!BucketItem)) { 1050b57cec5SDimitry Andric // If we found a tombstone, we want to reuse the tombstone instead of an 1060b57cec5SDimitry Andric // empty bucket. This reduces probing. 1070b57cec5SDimitry Andric if (FirstTombstone != -1) { 1080b57cec5SDimitry Andric HashTable[FirstTombstone] = FullHashValue; 1090b57cec5SDimitry Andric return FirstTombstone; 1100b57cec5SDimitry Andric } 1110b57cec5SDimitry Andric 1120b57cec5SDimitry Andric HashTable[BucketNo] = FullHashValue; 1130b57cec5SDimitry Andric return BucketNo; 1140b57cec5SDimitry Andric } 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric if (BucketItem == getTombstoneVal()) { 1170b57cec5SDimitry Andric // Skip over tombstones. However, remember the first one we see. 1185ffd83dbSDimitry Andric if (FirstTombstone == -1) 1195ffd83dbSDimitry Andric FirstTombstone = BucketNo; 1200b57cec5SDimitry Andric } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) { 1210b57cec5SDimitry Andric // If the full hash value matches, check deeply for a match. The common 1220b57cec5SDimitry Andric // case here is that we are only looking at the buckets (for item info 1230b57cec5SDimitry Andric // being non-null and for the full hash value) not at the items. This 1240b57cec5SDimitry Andric // is important for cache locality. 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric // Do the comparison like this because Name isn't necessarily 1270b57cec5SDimitry Andric // null-terminated! 1280b57cec5SDimitry Andric char *ItemStr = (char *)BucketItem + ItemSize; 1290b57cec5SDimitry Andric if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) { 1300b57cec5SDimitry Andric // We found a match! 1310b57cec5SDimitry Andric return BucketNo; 1320b57cec5SDimitry Andric } 1330b57cec5SDimitry Andric } 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric // Okay, we didn't find the item. Probe to the next bucket. 13681ad6265SDimitry Andric BucketNo = (BucketNo + ProbeAmt) & (NumBuckets - 1); 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric // Use quadratic probing, it has fewer clumping artifacts than linear 1390b57cec5SDimitry Andric // probing and has good cache behavior in the common case. 1400b57cec5SDimitry Andric ++ProbeAmt; 1410b57cec5SDimitry Andric } 1420b57cec5SDimitry Andric } 1430b57cec5SDimitry Andric 1440b57cec5SDimitry Andric /// FindKey - Look up the bucket that contains the specified key. If it exists 1450b57cec5SDimitry Andric /// in the map, return the bucket number of the key. Otherwise return -1. 1460b57cec5SDimitry Andric /// This does not modify the map. 147*0fca6ea1SDimitry Andric int StringMapImpl::FindKey(StringRef Key, uint32_t FullHashValue) const { 14881ad6265SDimitry Andric if (NumBuckets == 0) 1495ffd83dbSDimitry Andric return -1; // Really empty table? 150*0fca6ea1SDimitry Andric #ifdef EXPENSIVE_CHECKS 151*0fca6ea1SDimitry Andric assert(FullHashValue == hash(Key)); 152*0fca6ea1SDimitry Andric #endif 15306c3fb27SDimitry Andric if (shouldReverseIterate()) 15406c3fb27SDimitry Andric FullHashValue = ~FullHashValue; 15581ad6265SDimitry Andric unsigned BucketNo = FullHashValue & (NumBuckets - 1); 15681ad6265SDimitry Andric unsigned *HashTable = getHashTable(TheTable, NumBuckets); 1570b57cec5SDimitry Andric 1580b57cec5SDimitry Andric unsigned ProbeAmt = 1; 1590b57cec5SDimitry Andric while (true) { 1600b57cec5SDimitry Andric StringMapEntryBase *BucketItem = TheTable[BucketNo]; 1610b57cec5SDimitry Andric // If we found an empty bucket, this key isn't in the table yet, return. 1620b57cec5SDimitry Andric if (LLVM_LIKELY(!BucketItem)) 1630b57cec5SDimitry Andric return -1; 1640b57cec5SDimitry Andric 1650b57cec5SDimitry Andric if (BucketItem == getTombstoneVal()) { 1660b57cec5SDimitry Andric // Ignore tombstones. 1670b57cec5SDimitry Andric } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) { 1680b57cec5SDimitry Andric // If the full hash value matches, check deeply for a match. The common 1690b57cec5SDimitry Andric // case here is that we are only looking at the buckets (for item info 1700b57cec5SDimitry Andric // being non-null and for the full hash value) not at the items. This 1710b57cec5SDimitry Andric // is important for cache locality. 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric // Do the comparison like this because NameStart isn't necessarily 1740b57cec5SDimitry Andric // null-terminated! 1750b57cec5SDimitry Andric char *ItemStr = (char *)BucketItem + ItemSize; 1760b57cec5SDimitry Andric if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) { 1770b57cec5SDimitry Andric // We found a match! 1780b57cec5SDimitry Andric return BucketNo; 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric } 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric // Okay, we didn't find the item. Probe to the next bucket. 18381ad6265SDimitry Andric BucketNo = (BucketNo + ProbeAmt) & (NumBuckets - 1); 1840b57cec5SDimitry Andric 1850b57cec5SDimitry Andric // Use quadratic probing, it has fewer clumping artifacts than linear 1860b57cec5SDimitry Andric // probing and has good cache behavior in the common case. 1870b57cec5SDimitry Andric ++ProbeAmt; 1880b57cec5SDimitry Andric } 1890b57cec5SDimitry Andric } 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric /// RemoveKey - Remove the specified StringMapEntry from the table, but do not 1920b57cec5SDimitry Andric /// delete it. This aborts if the value isn't in the table. 1930b57cec5SDimitry Andric void StringMapImpl::RemoveKey(StringMapEntryBase *V) { 1940b57cec5SDimitry Andric const char *VStr = (char *)V + ItemSize; 1950b57cec5SDimitry Andric StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength())); 1960b57cec5SDimitry Andric (void)V2; 1970b57cec5SDimitry Andric assert(V == V2 && "Didn't find key?"); 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric 2000b57cec5SDimitry Andric /// RemoveKey - Remove the StringMapEntry for the specified key from the 2010b57cec5SDimitry Andric /// table, returning it. If the key is not in the table, this returns null. 2020b57cec5SDimitry Andric StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) { 2030b57cec5SDimitry Andric int Bucket = FindKey(Key); 2045ffd83dbSDimitry Andric if (Bucket == -1) 2055ffd83dbSDimitry Andric return nullptr; 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric StringMapEntryBase *Result = TheTable[Bucket]; 2080b57cec5SDimitry Andric TheTable[Bucket] = getTombstoneVal(); 2090b57cec5SDimitry Andric --NumItems; 2100b57cec5SDimitry Andric ++NumTombstones; 2110b57cec5SDimitry Andric assert(NumItems + NumTombstones <= NumBuckets); 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric return Result; 2140b57cec5SDimitry Andric } 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric /// RehashTable - Grow the table, redistributing values into the buckets with 2170b57cec5SDimitry Andric /// the appropriate mod-of-hashtable-size. 2180b57cec5SDimitry Andric unsigned StringMapImpl::RehashTable(unsigned BucketNo) { 2190b57cec5SDimitry Andric unsigned NewSize; 2200b57cec5SDimitry Andric // If the hash table is now more than 3/4 full, or if fewer than 1/8 of 2210b57cec5SDimitry Andric // the buckets are empty (meaning that many are filled with tombstones), 2220b57cec5SDimitry Andric // grow/rehash the table. 2230b57cec5SDimitry Andric if (LLVM_UNLIKELY(NumItems * 4 > NumBuckets * 3)) { 2240b57cec5SDimitry Andric NewSize = NumBuckets * 2; 2250b57cec5SDimitry Andric } else if (LLVM_UNLIKELY(NumBuckets - (NumItems + NumTombstones) <= 2260b57cec5SDimitry Andric NumBuckets / 8)) { 2270b57cec5SDimitry Andric NewSize = NumBuckets; 2280b57cec5SDimitry Andric } else { 2290b57cec5SDimitry Andric return BucketNo; 2300b57cec5SDimitry Andric } 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric unsigned NewBucketNo = BucketNo; 23381ad6265SDimitry Andric auto **NewTableArray = createTable(NewSize); 23481ad6265SDimitry Andric unsigned *NewHashArray = getHashTable(NewTableArray, NewSize); 23581ad6265SDimitry Andric unsigned *HashTable = getHashTable(TheTable, NumBuckets); 2360b57cec5SDimitry Andric 2370b57cec5SDimitry Andric // Rehash all the items into their new buckets. Luckily :) we already have 2380b57cec5SDimitry Andric // the hash values available, so we don't have to rehash any strings. 2390b57cec5SDimitry Andric for (unsigned I = 0, E = NumBuckets; I != E; ++I) { 2400b57cec5SDimitry Andric StringMapEntryBase *Bucket = TheTable[I]; 2410b57cec5SDimitry Andric if (Bucket && Bucket != getTombstoneVal()) { 24281ad6265SDimitry Andric // If the bucket is not available, probe for a spot. 2430b57cec5SDimitry Andric unsigned FullHash = HashTable[I]; 2440b57cec5SDimitry Andric unsigned NewBucket = FullHash & (NewSize - 1); 24581ad6265SDimitry Andric if (NewTableArray[NewBucket]) { 2460b57cec5SDimitry Andric unsigned ProbeSize = 1; 2470b57cec5SDimitry Andric do { 2480b57cec5SDimitry Andric NewBucket = (NewBucket + ProbeSize++) & (NewSize - 1); 2490b57cec5SDimitry Andric } while (NewTableArray[NewBucket]); 25081ad6265SDimitry Andric } 2510b57cec5SDimitry Andric 2520b57cec5SDimitry Andric // Finally found a slot. Fill it in. 2530b57cec5SDimitry Andric NewTableArray[NewBucket] = Bucket; 2540b57cec5SDimitry Andric NewHashArray[NewBucket] = FullHash; 2550b57cec5SDimitry Andric if (I == BucketNo) 2560b57cec5SDimitry Andric NewBucketNo = NewBucket; 2570b57cec5SDimitry Andric } 2580b57cec5SDimitry Andric } 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andric free(TheTable); 2610b57cec5SDimitry Andric 2620b57cec5SDimitry Andric TheTable = NewTableArray; 2630b57cec5SDimitry Andric NumBuckets = NewSize; 2640b57cec5SDimitry Andric NumTombstones = 0; 2650b57cec5SDimitry Andric return NewBucketNo; 2660b57cec5SDimitry Andric } 267