xref: /llvm-project/clang/lib/Lex/HeaderMap.cpp (revision 46dc91e7d9a1b6dd0144e628519d06954b7b4e53)
1 //===--- HeaderMap.cpp - A file that acts like dir of symlinks ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the HeaderMap interface.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Lex/HeaderMap.h"
14 #include "clang/Basic/CharInfo.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Lex/HeaderMapTypes.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/Support/Compiler.h"
19 #include "llvm/Support/DataTypes.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/MathExtras.h"
22 #include "llvm/Support/MemoryBuffer.h"
23 #include "llvm/Support/SwapByteOrder.h"
24 #include "llvm/Support/SystemZ/zOSSupport.h"
25 #include <cstring>
26 #include <memory>
27 #include <optional>
28 using namespace clang;
29 
30 /// HashHMapKey - This is the 'well known' hash function required by the file
31 /// format, used to look up keys in the hash table.  The hash table uses simple
32 /// linear probing based on this function.
33 static inline unsigned HashHMapKey(StringRef Str) {
34   unsigned Result = 0;
35   const char *S = Str.begin(), *End = Str.end();
36 
37   for (; S != End; S++)
38     Result += toLowercase(*S) * 13;
39   return Result;
40 }
41 
42 
43 
44 //===----------------------------------------------------------------------===//
45 // Verification and Construction
46 //===----------------------------------------------------------------------===//
47 
48 /// HeaderMap::Create - This attempts to load the specified file as a header
49 /// map.  If it doesn't look like a HeaderMap, it gives up and returns null.
50 /// If it looks like a HeaderMap but is obviously corrupted, it puts a reason
51 /// into the string error argument and returns null.
52 std::unique_ptr<HeaderMap> HeaderMap::Create(FileEntryRef FE, FileManager &FM) {
53   // If the file is too small to be a header map, ignore it.
54   unsigned FileSize = FE.getSize();
55   if (FileSize <= sizeof(HMapHeader)) return nullptr;
56 
57   auto FileBuffer =
58       FM.getBufferForFile(FE, /*IsVolatile=*/false,
59                           /*RequiresNullTerminator=*/true,
60                           /*MaybeList=*/std::nullopt, /*IsText=*/false);
61   if (!FileBuffer || !*FileBuffer)
62     return nullptr;
63   bool NeedsByteSwap;
64   if (!checkHeader(**FileBuffer, NeedsByteSwap))
65     return nullptr;
66   return std::unique_ptr<HeaderMap>(new HeaderMap(std::move(*FileBuffer), NeedsByteSwap));
67 }
68 
69 bool HeaderMapImpl::checkHeader(const llvm::MemoryBuffer &File,
70                                 bool &NeedsByteSwap) {
71   if (File.getBufferSize() <= sizeof(HMapHeader))
72     return false;
73   const char *FileStart = File.getBufferStart();
74 
75   // We know the file is at least as big as the header, check it now.
76   const HMapHeader *Header = reinterpret_cast<const HMapHeader*>(FileStart);
77 
78   // Sniff it to see if it's a headermap by checking the magic number and
79   // version.
80   if (Header->Magic == HMAP_HeaderMagicNumber &&
81       Header->Version == HMAP_HeaderVersion)
82     NeedsByteSwap = false;
83   else if (Header->Magic == llvm::byteswap<uint32_t>(HMAP_HeaderMagicNumber) &&
84            Header->Version == llvm::byteswap<uint16_t>(HMAP_HeaderVersion))
85     NeedsByteSwap = true;  // Mixed endianness headermap.
86   else
87     return false;  // Not a header map.
88 
89   if (Header->Reserved != 0)
90     return false;
91 
92   // Check the number of buckets.  It should be a power of two, and there
93   // should be enough space in the file for all of them.
94   uint32_t NumBuckets =
95       NeedsByteSwap ? llvm::byteswap(Header->NumBuckets) : Header->NumBuckets;
96   if (!llvm::isPowerOf2_32(NumBuckets))
97     return false;
98   if (File.getBufferSize() <
99       sizeof(HMapHeader) + sizeof(HMapBucket) * NumBuckets)
100     return false;
101 
102   // Okay, everything looks good.
103   return true;
104 }
105 
106 //===----------------------------------------------------------------------===//
107 //  Utility Methods
108 //===----------------------------------------------------------------------===//
109 
110 
111 /// getFileName - Return the filename of the headermap.
112 StringRef HeaderMapImpl::getFileName() const {
113   return FileBuffer->getBufferIdentifier();
114 }
115 
116 unsigned HeaderMapImpl::getEndianAdjustedWord(unsigned X) const {
117   if (!NeedsBSwap) return X;
118   return llvm::byteswap<uint32_t>(X);
119 }
120 
121 /// getHeader - Return a reference to the file header, in unbyte-swapped form.
122 /// This method cannot fail.
123 const HMapHeader &HeaderMapImpl::getHeader() const {
124   // We know the file is at least as big as the header.  Return it.
125   return *reinterpret_cast<const HMapHeader*>(FileBuffer->getBufferStart());
126 }
127 
128 /// getBucket - Return the specified hash table bucket from the header map,
129 /// bswap'ing its fields as appropriate.  If the bucket number is not valid,
130 /// this return a bucket with an empty key (0).
131 HMapBucket HeaderMapImpl::getBucket(unsigned BucketNo) const {
132   assert(FileBuffer->getBufferSize() >=
133              sizeof(HMapHeader) + sizeof(HMapBucket) * BucketNo &&
134          "Expected bucket to be in range");
135 
136   HMapBucket Result;
137   Result.Key = HMAP_EmptyBucketKey;
138 
139   const HMapBucket *BucketArray =
140     reinterpret_cast<const HMapBucket*>(FileBuffer->getBufferStart() +
141                                         sizeof(HMapHeader));
142   const HMapBucket *BucketPtr = BucketArray+BucketNo;
143 
144   // Load the values, bswapping as needed.
145   Result.Key    = getEndianAdjustedWord(BucketPtr->Key);
146   Result.Prefix = getEndianAdjustedWord(BucketPtr->Prefix);
147   Result.Suffix = getEndianAdjustedWord(BucketPtr->Suffix);
148   return Result;
149 }
150 
151 std::optional<StringRef> HeaderMapImpl::getString(unsigned StrTabIdx) const {
152   // Add the start of the string table to the idx.
153   StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset);
154 
155   // Check for invalid index.
156   if (StrTabIdx >= FileBuffer->getBufferSize())
157     return std::nullopt;
158 
159   const char *Data = FileBuffer->getBufferStart() + StrTabIdx;
160   unsigned MaxLen = FileBuffer->getBufferSize() - StrTabIdx;
161   unsigned Len = strnlen(Data, MaxLen);
162 
163   // Check whether the buffer is null-terminated.
164   if (Len == MaxLen && Data[Len - 1])
165     return std::nullopt;
166 
167   return StringRef(Data, Len);
168 }
169 
170 //===----------------------------------------------------------------------===//
171 // The Main Drivers
172 //===----------------------------------------------------------------------===//
173 
174 /// dump - Print the contents of this headermap to stderr.
175 LLVM_DUMP_METHOD void HeaderMapImpl::dump() const {
176   const HMapHeader &Hdr = getHeader();
177   unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
178 
179   llvm::dbgs() << "Header Map " << getFileName() << ":\n  " << NumBuckets
180                << ", " << getEndianAdjustedWord(Hdr.NumEntries) << "\n";
181 
182   auto getStringOrInvalid = [this](unsigned Id) -> StringRef {
183     if (std::optional<StringRef> S = getString(Id))
184       return *S;
185     return "<invalid>";
186   };
187 
188   for (unsigned i = 0; i != NumBuckets; ++i) {
189     HMapBucket B = getBucket(i);
190     if (B.Key == HMAP_EmptyBucketKey) continue;
191 
192     StringRef Key = getStringOrInvalid(B.Key);
193     StringRef Prefix = getStringOrInvalid(B.Prefix);
194     StringRef Suffix = getStringOrInvalid(B.Suffix);
195     llvm::dbgs() << "  " << i << ". " << Key << " -> '" << Prefix << "' '"
196                  << Suffix << "'\n";
197   }
198 }
199 
200 StringRef HeaderMapImpl::lookupFilename(StringRef Filename,
201                                         SmallVectorImpl<char> &DestPath) const {
202   const HMapHeader &Hdr = getHeader();
203   unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
204 
205   // Don't probe infinitely.  This should be checked before constructing.
206   assert(llvm::isPowerOf2_32(NumBuckets) && "Expected power of 2");
207 
208   // Linearly probe the hash table.
209   for (unsigned Bucket = HashHMapKey(Filename);; ++Bucket) {
210     HMapBucket B = getBucket(Bucket & (NumBuckets-1));
211     if (B.Key == HMAP_EmptyBucketKey) return StringRef(); // Hash miss.
212 
213     // See if the key matches.  If not, probe on.
214     std::optional<StringRef> Key = getString(B.Key);
215     if (LLVM_UNLIKELY(!Key))
216       continue;
217     if (!Filename.equals_insensitive(*Key))
218       continue;
219 
220     // If so, we have a match in the hash table.  Construct the destination
221     // path.
222     std::optional<StringRef> Prefix = getString(B.Prefix);
223     std::optional<StringRef> Suffix = getString(B.Suffix);
224 
225     DestPath.clear();
226     if (LLVM_LIKELY(Prefix && Suffix)) {
227       DestPath.append(Prefix->begin(), Prefix->end());
228       DestPath.append(Suffix->begin(), Suffix->end());
229     }
230     return StringRef(DestPath.begin(), DestPath.size());
231   }
232 }
233 
234 StringRef HeaderMapImpl::reverseLookupFilename(StringRef DestPath) const {
235   if (!ReverseMap.empty())
236     return ReverseMap.lookup(DestPath);
237 
238   const HMapHeader &Hdr = getHeader();
239   unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
240   StringRef RetKey;
241   for (unsigned i = 0; i != NumBuckets; ++i) {
242     HMapBucket B = getBucket(i);
243     if (B.Key == HMAP_EmptyBucketKey)
244       continue;
245 
246     std::optional<StringRef> Key = getString(B.Key);
247     std::optional<StringRef> Prefix = getString(B.Prefix);
248     std::optional<StringRef> Suffix = getString(B.Suffix);
249     if (LLVM_LIKELY(Key && Prefix && Suffix)) {
250       SmallVector<char, 1024> Buf;
251       Buf.append(Prefix->begin(), Prefix->end());
252       Buf.append(Suffix->begin(), Suffix->end());
253       StringRef Value(Buf.begin(), Buf.size());
254       ReverseMap[Value] = *Key;
255 
256       if (DestPath == Value)
257         RetKey = *Key;
258     }
259   }
260   return RetKey;
261 }
262