xref: /openbsd-src/gnu/llvm/lldb/include/lldb/Core/MappedHash.h (revision be691f3bb6417f04a68938fadbcaee2d5795e764)
1 //===-- MappedHash.h --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLDB_CORE_MAPPEDHASH_H
10 #define LLDB_CORE_MAPPEDHASH_H
11 
12 #include <cassert>
13 #include <cstdint>
14 
15 #include <algorithm>
16 #include <functional>
17 #include <map>
18 #include <vector>
19 
20 #include "lldb/Utility/DataExtractor.h"
21 #include "lldb/Utility/Stream.h"
22 #include "llvm/Support/DJB.h"
23 
24 class MappedHash {
25 public:
26   enum HashFunctionType {
27     eHashFunctionDJB = 0u // Daniel J Bernstein hash function that is also used
28                           // by the ELF GNU_HASH sections
29   };
30 
HashString(uint32_t hash_function,llvm::StringRef s)31   static uint32_t HashString(uint32_t hash_function, llvm::StringRef s) {
32     switch (hash_function) {
33     case MappedHash::eHashFunctionDJB:
34       return llvm::djbHash(s);
35 
36     default:
37       break;
38     }
39     llvm_unreachable("Invalid hash function index");
40   }
41 
42   static const uint32_t HASH_MAGIC = 0x48415348u;
43   static const uint32_t HASH_CIGAM = 0x48534148u;
44 
45   template <typename T> struct Header {
46     typedef T HeaderData;
47 
48     uint32_t
49         magic; // HASH_MAGIC or HASH_CIGAM magic value to allow endian detection
50     uint16_t version = 1; // Version number
51     uint16_t hash_function =
52         eHashFunctionDJB;      // The hash function enumeration that was used
53     uint32_t bucket_count = 0; // The number of buckets in this hash table
54     uint32_t hashes_count = 0; // The total number of unique hash values and
55                                // hash data offsets in this table
56     uint32_t header_data_len; // The size in bytes of the "header_data" template
57                               // member below
58     HeaderData header_data;   //
59 
HeaderHeader60     Header() : magic(HASH_MAGIC), header_data_len(sizeof(T)), header_data() {}
61 
62     virtual ~Header() = default;
63 
GetByteSizeHeader64     size_t GetByteSize() const {
65       return sizeof(magic) + sizeof(version) + sizeof(hash_function) +
66              sizeof(bucket_count) + sizeof(hashes_count) +
67              sizeof(header_data_len) + header_data_len;
68     }
69 
70     virtual size_t GetByteSize(const HeaderData &header_data) = 0;
71 
SetHeaderDataByteSizeHeader72     void SetHeaderDataByteSize(uint32_t header_data_byte_size) {
73       header_data_len = header_data_byte_size;
74     }
75 
DumpHeader76     void Dump(lldb_private::Stream &s) {
77       s.Printf("header.magic              = 0x%8.8x\n", magic);
78       s.Printf("header.version            = 0x%4.4x\n", version);
79       s.Printf("header.hash_function      = 0x%4.4x\n", hash_function);
80       s.Printf("header.bucket_count       = 0x%8.8x %u\n", bucket_count,
81                bucket_count);
82       s.Printf("header.hashes_count       = 0x%8.8x %u\n", hashes_count,
83                hashes_count);
84       s.Printf("header.header_data_len    = 0x%8.8x %u\n", header_data_len,
85                header_data_len);
86     }
87 
ReadHeader88     virtual lldb::offset_t Read(lldb_private::DataExtractor &data,
89                                 lldb::offset_t offset) {
90       if (data.ValidOffsetForDataOfSize(
91               offset, sizeof(magic) + sizeof(version) + sizeof(hash_function) +
92                           sizeof(bucket_count) + sizeof(hashes_count) +
93                           sizeof(header_data_len))) {
94         magic = data.GetU32(&offset);
95         if (magic != HASH_MAGIC) {
96           if (magic == HASH_CIGAM) {
97             switch (data.GetByteOrder()) {
98             case lldb::eByteOrderBig:
99               data.SetByteOrder(lldb::eByteOrderLittle);
100               break;
101             case lldb::eByteOrderLittle:
102               data.SetByteOrder(lldb::eByteOrderBig);
103               break;
104             default:
105               return LLDB_INVALID_OFFSET;
106             }
107           } else {
108             // Magic bytes didn't match
109             version = 0;
110             return LLDB_INVALID_OFFSET;
111           }
112         }
113 
114         version = data.GetU16(&offset);
115         if (version != 1) {
116           // Unsupported version
117           return LLDB_INVALID_OFFSET;
118         }
119         hash_function = data.GetU16(&offset);
120         if (hash_function == 4)
121           hash_function = 0; // Deal with pre-release version of this table...
122         bucket_count = data.GetU32(&offset);
123         hashes_count = data.GetU32(&offset);
124         header_data_len = data.GetU32(&offset);
125         return offset;
126       }
127       return LLDB_INVALID_OFFSET;
128     }
129     //
130     //        // Returns a buffer that contains a serialized version of this
131     //        table
132     //        // that must be freed with free().
133     //        virtual void *
134     //        Write (int fd);
135   };
136 
137   // A class for reading and using a saved hash table from a block of data
138   // in memory
139   template <typename __KeyType, class __HeaderType, class __HashData>
140   class MemoryTable {
141   public:
142     typedef __HeaderType HeaderType;
143     typedef __KeyType KeyType;
144     typedef __HashData HashData;
145 
146     enum Result {
147       eResultKeyMatch = 0u, // The entry was found, key matched and "pair" was
148                             // filled in successfully
149       eResultKeyMismatch =
150           1u, // Bucket hash data collision, but key didn't match
151       eResultEndOfHashData = 2u, // The chain of items for this hash data in
152                                  // this bucket is terminated, search no more
153       eResultError = 3u          // Status parsing the hash data, abort
154     };
155 
156     struct Pair {
157       KeyType key;
158       HashData value;
159     };
160 
MemoryTable(lldb_private::DataExtractor & data)161     MemoryTable(lldb_private::DataExtractor &data)
162         : m_header(), m_hash_indexes(nullptr), m_hash_values(nullptr),
163           m_hash_offsets(nullptr) {
164       lldb::offset_t offset = m_header.Read(data, 0);
165       if (offset != LLDB_INVALID_OFFSET && IsValid()) {
166         m_hash_indexes = (const uint32_t *)data.GetData(
167             &offset, m_header.bucket_count * sizeof(uint32_t));
168         m_hash_values = (const uint32_t *)data.GetData(
169             &offset, m_header.hashes_count * sizeof(uint32_t));
170         m_hash_offsets = (const uint32_t *)data.GetData(
171             &offset, m_header.hashes_count * sizeof(uint32_t));
172       }
173     }
174 
175     virtual ~MemoryTable() = default;
176 
IsValid()177     bool IsValid() const {
178       return m_header.version == 1 &&
179              m_header.hash_function == eHashFunctionDJB &&
180              m_header.bucket_count > 0;
181     }
182 
GetHashIndex(uint32_t bucket_idx)183     uint32_t GetHashIndex(uint32_t bucket_idx) const {
184       uint32_t result = UINT32_MAX;
185       if (m_hash_indexes && bucket_idx < m_header.bucket_count)
186         memcpy(&result, m_hash_indexes + bucket_idx, sizeof(uint32_t));
187       return result;
188     }
189 
GetHashValue(uint32_t hash_idx)190     uint32_t GetHashValue(uint32_t hash_idx) const {
191       uint32_t result = UINT32_MAX;
192       if (m_hash_values && hash_idx < m_header.hashes_count)
193         memcpy(&result, m_hash_values + hash_idx, sizeof(uint32_t));
194       return result;
195     }
196 
GetHashDataOffset(uint32_t hash_idx)197     uint32_t GetHashDataOffset(uint32_t hash_idx) const {
198       uint32_t result = UINT32_MAX;
199       if (m_hash_offsets && hash_idx < m_header.hashes_count)
200         memcpy(&result, m_hash_offsets + hash_idx, sizeof(uint32_t));
201       return result;
202     }
203 
Find(llvm::StringRef name,Pair & pair)204     bool Find(llvm::StringRef name, Pair &pair) const {
205       if (name.empty())
206         return false;
207 
208       if (IsValid()) {
209         const uint32_t bucket_count = m_header.bucket_count;
210         const uint32_t hash_count = m_header.hashes_count;
211         const uint32_t hash_value =
212             MappedHash::HashString(m_header.hash_function, name);
213         const uint32_t bucket_idx = hash_value % bucket_count;
214         uint32_t hash_idx = GetHashIndex(bucket_idx);
215         if (hash_idx < hash_count) {
216           for (; hash_idx < hash_count; ++hash_idx) {
217             const uint32_t curr_hash_value = GetHashValue(hash_idx);
218             if (curr_hash_value == hash_value) {
219               lldb::offset_t hash_data_offset = GetHashDataOffset(hash_idx);
220               while (hash_data_offset != UINT32_MAX) {
221                 const lldb::offset_t prev_hash_data_offset = hash_data_offset;
222                 Result hash_result =
223                     GetHashDataForName(name, &hash_data_offset, pair);
224                 // Check the result of getting our hash data
225                 switch (hash_result) {
226                 case eResultKeyMatch:
227                   return true;
228 
229                 case eResultKeyMismatch:
230                   if (prev_hash_data_offset == hash_data_offset)
231                     return false;
232                   break;
233 
234                 case eResultEndOfHashData:
235                   // The last HashData for this key has been reached, stop
236                   // searching
237                   return false;
238                 case eResultError:
239                   // Status parsing the hash data, abort
240                   return false;
241                 }
242               }
243             }
244             if ((curr_hash_value % bucket_count) != bucket_idx)
245               break;
246           }
247         }
248       }
249       return false;
250     }
251 
252     // This method must be implemented in any subclasses. The KeyType is user
253     // specified and must somehow result in a string value. For example, the
254     // KeyType might be a string offset in a string table and subclasses can
255     // store their string table as a member of the subclass and return a valie
256     // "const char *" given a "key". The value could also be a C string
257     // pointer, in which case just returning "key" will suffice.
258     virtual const char *GetStringForKeyType(KeyType key) const = 0;
259 
260     virtual bool ReadHashData(uint32_t hash_data_offset,
261                               HashData &hash_data) const = 0;
262 
263     // This method must be implemented in any subclasses and it must try to
264     // read one "Pair" at the offset pointed to by the "hash_data_offset_ptr"
265     // parameter. This offset should be updated as bytes are consumed and a
266     // value "Result" enum should be returned. If the "name" matches the full
267     // name for the "pair.key" (which must be filled in by this call), then the
268     // HashData in the pair ("pair.value") should be extracted and filled in
269     // and "eResultKeyMatch" should be returned. If "name" doesn't match this
270     // string for the key, then "eResultKeyMismatch" should be returned and all
271     // data for the current HashData must be consumed or skipped and the
272     // "hash_data_offset_ptr" offset needs to be updated to point to the next
273     // HashData. If the end of the HashData objects for a given hash value have
274     // been reached, then "eResultEndOfHashData" should be returned. If
275     // anything else goes wrong during parsing, return "eResultError" and the
276     // corresponding "Find()" function will be canceled and return false.
277     virtual Result GetHashDataForName(llvm::StringRef name,
278                                       lldb::offset_t *hash_data_offset_ptr,
279                                       Pair &pair) const = 0;
280 
GetHeader()281     const HeaderType &GetHeader() { return m_header; }
282 
ForEach(std::function<bool (const HashData & hash_data)> const & callback)283     void ForEach(
284         std::function<bool(const HashData &hash_data)> const &callback) const {
285       const size_t num_hash_offsets = m_header.hashes_count;
286       for (size_t i = 0; i < num_hash_offsets; ++i) {
287         uint32_t hash_data_offset = GetHashDataOffset(i);
288         if (hash_data_offset != UINT32_MAX) {
289           HashData hash_data;
290           if (ReadHashData(hash_data_offset, hash_data)) {
291             // If the callback returns false, then we are done and should stop
292             if (callback(hash_data) == false)
293               return;
294           }
295         }
296       }
297     }
298 
299   protected:
300     // Implementation agnostic information
301     HeaderType m_header;
302     const uint32_t *m_hash_indexes;
303     const uint32_t *m_hash_values;
304     const uint32_t *m_hash_offsets;
305   };
306 };
307 
308 #endif // LLDB_CORE_MAPPEDHASH_H
309