xref: /llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp (revision 97ec8fa5bb07e3f5bf25ddcb216b545cd3d03b65)
1 //===- CoverageMappingReader.cpp - Code coverage mapping reader -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading coverage mapping data for
10 // instrumentation based coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ProfileData/Coverage/CoverageMappingReader.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/Triple.h"
22 #include "llvm/Object/Binary.h"
23 #include "llvm/Object/Error.h"
24 #include "llvm/Object/MachOUniversal.h"
25 #include "llvm/Object/ObjectFile.h"
26 #include "llvm/Object/COFF.h"
27 #include "llvm/ProfileData/InstrProf.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/Compression.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/Endian.h"
32 #include "llvm/Support/Error.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/LEB128.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include <vector>
38 
39 using namespace llvm;
40 using namespace coverage;
41 using namespace object;
42 
43 #define DEBUG_TYPE "coverage-mapping"
44 
45 STATISTIC(CovMapNumRecords, "The # of coverage function records");
46 STATISTIC(CovMapNumUsedRecords, "The # of used coverage function records");
47 
48 void CoverageMappingIterator::increment() {
49   if (ReadErr != coveragemap_error::success)
50     return;
51 
52   // Check if all the records were read or if an error occurred while reading
53   // the next record.
54   if (auto E = Reader->readNextRecord(Record))
55     handleAllErrors(std::move(E), [&](const CoverageMapError &CME) {
56       if (CME.get() == coveragemap_error::eof)
57         *this = CoverageMappingIterator();
58       else
59         ReadErr = CME.get();
60     });
61 }
62 
63 Error RawCoverageReader::readULEB128(uint64_t &Result) {
64   if (Data.empty())
65     return make_error<CoverageMapError>(coveragemap_error::truncated);
66   unsigned N = 0;
67   Result = decodeULEB128(Data.bytes_begin(), &N);
68   if (N > Data.size())
69     return make_error<CoverageMapError>(coveragemap_error::malformed);
70   Data = Data.substr(N);
71   return Error::success();
72 }
73 
74 Error RawCoverageReader::readIntMax(uint64_t &Result, uint64_t MaxPlus1) {
75   if (auto Err = readULEB128(Result))
76     return Err;
77   if (Result >= MaxPlus1)
78     return make_error<CoverageMapError>(coveragemap_error::malformed);
79   return Error::success();
80 }
81 
82 Error RawCoverageReader::readSize(uint64_t &Result) {
83   if (auto Err = readULEB128(Result))
84     return Err;
85   // Sanity check the number.
86   if (Result > Data.size())
87     return make_error<CoverageMapError>(coveragemap_error::malformed);
88   return Error::success();
89 }
90 
91 Error RawCoverageReader::readString(StringRef &Result) {
92   uint64_t Length;
93   if (auto Err = readSize(Length))
94     return Err;
95   Result = Data.substr(0, Length);
96   Data = Data.substr(Length);
97   return Error::success();
98 }
99 
100 Error RawCoverageFilenamesReader::read(CovMapVersion Version) {
101   uint64_t NumFilenames;
102   if (auto Err = readSize(NumFilenames))
103     return Err;
104   if (!NumFilenames)
105     return make_error<CoverageMapError>(coveragemap_error::malformed);
106 
107   if (Version < CovMapVersion::Version4)
108     return readUncompressed(Version, NumFilenames);
109 
110   // The uncompressed length may exceed the size of the encoded filenames.
111   // Skip size validation.
112   uint64_t UncompressedLen;
113   if (auto Err = readULEB128(UncompressedLen))
114     return Err;
115 
116   uint64_t CompressedLen;
117   if (auto Err = readSize(CompressedLen))
118     return Err;
119 
120   if (CompressedLen > 0) {
121     if (!zlib::isAvailable())
122       return make_error<CoverageMapError>(
123           coveragemap_error::decompression_failed);
124 
125     // Allocate memory for the decompressed filenames.
126     SmallVector<char, 0> StorageBuf;
127 
128     // Read compressed filenames.
129     StringRef CompressedFilenames = Data.substr(0, CompressedLen);
130     Data = Data.substr(CompressedLen);
131     auto Err =
132         zlib::uncompress(CompressedFilenames, StorageBuf, UncompressedLen);
133     if (Err) {
134       consumeError(std::move(Err));
135       return make_error<CoverageMapError>(
136           coveragemap_error::decompression_failed);
137     }
138 
139     StringRef UncompressedFilenames(StorageBuf.data(), StorageBuf.size());
140     RawCoverageFilenamesReader Delegate(UncompressedFilenames, Filenames);
141     return Delegate.readUncompressed(Version, NumFilenames);
142   }
143 
144   return readUncompressed(Version, NumFilenames);
145 }
146 
147 Error RawCoverageFilenamesReader::readUncompressed(CovMapVersion Version,
148                                                    uint64_t NumFilenames) {
149   // Read uncompressed filenames.
150   if (Version < CovMapVersion::Version6) {
151     for (size_t I = 0; I < NumFilenames; ++I) {
152       StringRef Filename;
153       if (auto Err = readString(Filename))
154         return Err;
155       Filenames.push_back(Filename.str());
156     }
157   } else {
158     StringRef CWD;
159     if (auto Err = readString(CWD))
160       return Err;
161     Filenames.push_back(CWD.str());
162 
163     for (size_t I = 1; I < NumFilenames; ++I) {
164       StringRef Filename;
165       if (auto Err = readString(Filename))
166         return Err;
167       if (sys::path::is_absolute(Filename)) {
168         Filenames.push_back(Filename.str());
169       } else {
170         SmallString<256> P(CWD);
171         llvm::sys::path::append(P, Filename);
172         Filenames.push_back(static_cast<std::string>(P));
173       }
174     }
175   }
176   return Error::success();
177 }
178 
179 Error RawCoverageMappingReader::decodeCounter(unsigned Value, Counter &C) {
180   auto Tag = Value & Counter::EncodingTagMask;
181   switch (Tag) {
182   case Counter::Zero:
183     C = Counter::getZero();
184     return Error::success();
185   case Counter::CounterValueReference:
186     C = Counter::getCounter(Value >> Counter::EncodingTagBits);
187     return Error::success();
188   default:
189     break;
190   }
191   Tag -= Counter::Expression;
192   switch (Tag) {
193   case CounterExpression::Subtract:
194   case CounterExpression::Add: {
195     auto ID = Value >> Counter::EncodingTagBits;
196     if (ID >= Expressions.size())
197       return make_error<CoverageMapError>(coveragemap_error::malformed);
198     Expressions[ID].Kind = CounterExpression::ExprKind(Tag);
199     C = Counter::getExpression(ID);
200     break;
201   }
202   default:
203     return make_error<CoverageMapError>(coveragemap_error::malformed);
204   }
205   return Error::success();
206 }
207 
208 Error RawCoverageMappingReader::readCounter(Counter &C) {
209   uint64_t EncodedCounter;
210   if (auto Err =
211           readIntMax(EncodedCounter, std::numeric_limits<unsigned>::max()))
212     return Err;
213   if (auto Err = decodeCounter(EncodedCounter, C))
214     return Err;
215   return Error::success();
216 }
217 
218 static const unsigned EncodingExpansionRegionBit = 1
219                                                    << Counter::EncodingTagBits;
220 
221 /// Read the sub-array of regions for the given inferred file id.
222 /// \param NumFileIDs the number of file ids that are defined for this
223 /// function.
224 Error RawCoverageMappingReader::readMappingRegionsSubArray(
225     std::vector<CounterMappingRegion> &MappingRegions, unsigned InferredFileID,
226     size_t NumFileIDs) {
227   uint64_t NumRegions;
228   if (auto Err = readSize(NumRegions))
229     return Err;
230   unsigned LineStart = 0;
231   for (size_t I = 0; I < NumRegions; ++I) {
232     Counter C, C2;
233     CounterMappingRegion::RegionKind Kind = CounterMappingRegion::CodeRegion;
234 
235     // Read the combined counter + region kind.
236     uint64_t EncodedCounterAndRegion;
237     if (auto Err = readIntMax(EncodedCounterAndRegion,
238                               std::numeric_limits<unsigned>::max()))
239       return Err;
240     unsigned Tag = EncodedCounterAndRegion & Counter::EncodingTagMask;
241     uint64_t ExpandedFileID = 0;
242 
243     // If Tag does not represent a ZeroCounter, then it is understood to refer
244     // to a counter or counter expression with region kind assumed to be
245     // "CodeRegion". In that case, EncodedCounterAndRegion actually encodes the
246     // referenced counter or counter expression (and nothing else).
247     //
248     // If Tag represents a ZeroCounter and EncodingExpansionRegionBit is set,
249     // then EncodedCounterAndRegion is interpreted to represent an
250     // ExpansionRegion. In all other cases, EncodedCounterAndRegion is
251     // interpreted to refer to a specific region kind, after which additional
252     // fields may be read (e.g. BranchRegions have two encoded counters that
253     // follow an encoded region kind value).
254     if (Tag != Counter::Zero) {
255       if (auto Err = decodeCounter(EncodedCounterAndRegion, C))
256         return Err;
257     } else {
258       // Is it an expansion region?
259       if (EncodedCounterAndRegion & EncodingExpansionRegionBit) {
260         Kind = CounterMappingRegion::ExpansionRegion;
261         ExpandedFileID = EncodedCounterAndRegion >>
262                          Counter::EncodingCounterTagAndExpansionRegionTagBits;
263         if (ExpandedFileID >= NumFileIDs)
264           return make_error<CoverageMapError>(coveragemap_error::malformed);
265       } else {
266         switch (EncodedCounterAndRegion >>
267                 Counter::EncodingCounterTagAndExpansionRegionTagBits) {
268         case CounterMappingRegion::CodeRegion:
269           // Don't do anything when we have a code region with a zero counter.
270           break;
271         case CounterMappingRegion::SkippedRegion:
272           Kind = CounterMappingRegion::SkippedRegion;
273           break;
274         case CounterMappingRegion::BranchRegion:
275           // For a Branch Region, read two successive counters.
276           Kind = CounterMappingRegion::BranchRegion;
277           if (auto Err = readCounter(C))
278             return Err;
279           if (auto Err = readCounter(C2))
280             return Err;
281           break;
282         default:
283           return make_error<CoverageMapError>(coveragemap_error::malformed);
284         }
285       }
286     }
287 
288     // Read the source range.
289     uint64_t LineStartDelta, ColumnStart, NumLines, ColumnEnd;
290     if (auto Err =
291             readIntMax(LineStartDelta, std::numeric_limits<unsigned>::max()))
292       return Err;
293     if (auto Err = readULEB128(ColumnStart))
294       return Err;
295     if (ColumnStart > std::numeric_limits<unsigned>::max())
296       return make_error<CoverageMapError>(coveragemap_error::malformed);
297     if (auto Err = readIntMax(NumLines, std::numeric_limits<unsigned>::max()))
298       return Err;
299     if (auto Err = readIntMax(ColumnEnd, std::numeric_limits<unsigned>::max()))
300       return Err;
301     LineStart += LineStartDelta;
302 
303     // If the high bit of ColumnEnd is set, this is a gap region.
304     if (ColumnEnd & (1U << 31)) {
305       Kind = CounterMappingRegion::GapRegion;
306       ColumnEnd &= ~(1U << 31);
307     }
308 
309     // Adjust the column locations for the empty regions that are supposed to
310     // cover whole lines. Those regions should be encoded with the
311     // column range (1 -> std::numeric_limits<unsigned>::max()), but because
312     // the encoded std::numeric_limits<unsigned>::max() is several bytes long,
313     // we set the column range to (0 -> 0) to ensure that the column start and
314     // column end take up one byte each.
315     // The std::numeric_limits<unsigned>::max() is used to represent a column
316     // position at the end of the line without knowing the length of that line.
317     if (ColumnStart == 0 && ColumnEnd == 0) {
318       ColumnStart = 1;
319       ColumnEnd = std::numeric_limits<unsigned>::max();
320     }
321 
322     LLVM_DEBUG({
323       dbgs() << "Counter in file " << InferredFileID << " " << LineStart << ":"
324              << ColumnStart << " -> " << (LineStart + NumLines) << ":"
325              << ColumnEnd << ", ";
326       if (Kind == CounterMappingRegion::ExpansionRegion)
327         dbgs() << "Expands to file " << ExpandedFileID;
328       else
329         CounterMappingContext(Expressions).dump(C, dbgs());
330       dbgs() << "\n";
331     });
332 
333     auto CMR = CounterMappingRegion(C, C2, InferredFileID, ExpandedFileID,
334                                     LineStart, ColumnStart,
335                                     LineStart + NumLines, ColumnEnd, Kind);
336     if (CMR.startLoc() > CMR.endLoc())
337       return make_error<CoverageMapError>(coveragemap_error::malformed);
338     MappingRegions.push_back(CMR);
339   }
340   return Error::success();
341 }
342 
343 Error RawCoverageMappingReader::read() {
344   // Read the virtual file mapping.
345   SmallVector<unsigned, 8> VirtualFileMapping;
346   uint64_t NumFileMappings;
347   if (auto Err = readSize(NumFileMappings))
348     return Err;
349   for (size_t I = 0; I < NumFileMappings; ++I) {
350     uint64_t FilenameIndex;
351     if (auto Err = readIntMax(FilenameIndex, TranslationUnitFilenames.size()))
352       return Err;
353     VirtualFileMapping.push_back(FilenameIndex);
354   }
355 
356   // Construct the files using unique filenames and virtual file mapping.
357   for (auto I : VirtualFileMapping) {
358     Filenames.push_back(TranslationUnitFilenames[I]);
359   }
360 
361   // Read the expressions.
362   uint64_t NumExpressions;
363   if (auto Err = readSize(NumExpressions))
364     return Err;
365   // Create an array of dummy expressions that get the proper counters
366   // when the expressions are read, and the proper kinds when the counters
367   // are decoded.
368   Expressions.resize(
369       NumExpressions,
370       CounterExpression(CounterExpression::Subtract, Counter(), Counter()));
371   for (size_t I = 0; I < NumExpressions; ++I) {
372     if (auto Err = readCounter(Expressions[I].LHS))
373       return Err;
374     if (auto Err = readCounter(Expressions[I].RHS))
375       return Err;
376   }
377 
378   // Read the mapping regions sub-arrays.
379   for (unsigned InferredFileID = 0, S = VirtualFileMapping.size();
380        InferredFileID < S; ++InferredFileID) {
381     if (auto Err = readMappingRegionsSubArray(MappingRegions, InferredFileID,
382                                               VirtualFileMapping.size()))
383       return Err;
384   }
385 
386   // Set the counters for the expansion regions.
387   // i.e. Counter of expansion region = counter of the first region
388   // from the expanded file.
389   // Perform multiple passes to correctly propagate the counters through
390   // all the nested expansion regions.
391   SmallVector<CounterMappingRegion *, 8> FileIDExpansionRegionMapping;
392   FileIDExpansionRegionMapping.resize(VirtualFileMapping.size(), nullptr);
393   for (unsigned Pass = 1, S = VirtualFileMapping.size(); Pass < S; ++Pass) {
394     for (auto &R : MappingRegions) {
395       if (R.Kind != CounterMappingRegion::ExpansionRegion)
396         continue;
397       assert(!FileIDExpansionRegionMapping[R.ExpandedFileID]);
398       FileIDExpansionRegionMapping[R.ExpandedFileID] = &R;
399     }
400     for (auto &R : MappingRegions) {
401       if (FileIDExpansionRegionMapping[R.FileID]) {
402         FileIDExpansionRegionMapping[R.FileID]->Count = R.Count;
403         FileIDExpansionRegionMapping[R.FileID] = nullptr;
404       }
405     }
406   }
407 
408   return Error::success();
409 }
410 
411 Expected<bool> RawCoverageMappingDummyChecker::isDummy() {
412   // A dummy coverage mapping data consists of just one region with zero count.
413   uint64_t NumFileMappings;
414   if (Error Err = readSize(NumFileMappings))
415     return std::move(Err);
416   if (NumFileMappings != 1)
417     return false;
418   // We don't expect any specific value for the filename index, just skip it.
419   uint64_t FilenameIndex;
420   if (Error Err =
421           readIntMax(FilenameIndex, std::numeric_limits<unsigned>::max()))
422     return std::move(Err);
423   uint64_t NumExpressions;
424   if (Error Err = readSize(NumExpressions))
425     return std::move(Err);
426   if (NumExpressions != 0)
427     return false;
428   uint64_t NumRegions;
429   if (Error Err = readSize(NumRegions))
430     return std::move(Err);
431   if (NumRegions != 1)
432     return false;
433   uint64_t EncodedCounterAndRegion;
434   if (Error Err = readIntMax(EncodedCounterAndRegion,
435                              std::numeric_limits<unsigned>::max()))
436     return std::move(Err);
437   unsigned Tag = EncodedCounterAndRegion & Counter::EncodingTagMask;
438   return Tag == Counter::Zero;
439 }
440 
441 Error InstrProfSymtab::create(SectionRef &Section) {
442   Expected<StringRef> DataOrErr = Section.getContents();
443   if (!DataOrErr)
444     return DataOrErr.takeError();
445   Data = *DataOrErr;
446   Address = Section.getAddress();
447 
448   // If this is a linked PE/COFF file, then we have to skip over the null byte
449   // that is allocated in the .lprfn$A section in the LLVM profiling runtime.
450   const ObjectFile *Obj = Section.getObject();
451   if (isa<COFFObjectFile>(Obj) && !Obj->isRelocatableObject())
452     Data = Data.drop_front(1);
453 
454   return Error::success();
455 }
456 
457 StringRef InstrProfSymtab::getFuncName(uint64_t Pointer, size_t Size) {
458   if (Pointer < Address)
459     return StringRef();
460   auto Offset = Pointer - Address;
461   if (Offset + Size > Data.size())
462     return StringRef();
463   return Data.substr(Pointer - Address, Size);
464 }
465 
466 // Check if the mapping data is a dummy, i.e. is emitted for an unused function.
467 static Expected<bool> isCoverageMappingDummy(uint64_t Hash, StringRef Mapping) {
468   // The hash value of dummy mapping records is always zero.
469   if (Hash)
470     return false;
471   return RawCoverageMappingDummyChecker(Mapping).isDummy();
472 }
473 
474 /// A range of filename indices. Used to specify the location of a batch of
475 /// filenames in a vector-like container.
476 struct FilenameRange {
477   unsigned StartingIndex;
478   unsigned Length;
479 
480   FilenameRange(unsigned StartingIndex, unsigned Length)
481       : StartingIndex(StartingIndex), Length(Length) {}
482 
483   void markInvalid() { Length = 0; }
484   bool isInvalid() const { return Length == 0; }
485 };
486 
487 namespace {
488 
489 /// The interface to read coverage mapping function records for a module.
490 struct CovMapFuncRecordReader {
491   virtual ~CovMapFuncRecordReader() = default;
492 
493   // Read a coverage header.
494   //
495   // \p CovBuf points to the buffer containing the \c CovHeader of the coverage
496   // mapping data associated with the module.
497   //
498   // Returns a pointer to the next \c CovHeader if it exists, or to an address
499   // greater than \p CovEnd if not.
500   virtual Expected<const char *> readCoverageHeader(const char *CovBuf,
501                                                     const char *CovBufEnd) = 0;
502 
503   // Read function records.
504   //
505   // \p FuncRecBuf points to the buffer containing a batch of function records.
506   // \p FuncRecBufEnd points past the end of the batch of records.
507   //
508   // Prior to Version4, \p OutOfLineFileRange points to a sequence of filenames
509   // associated with the function records. It is unused in Version4.
510   //
511   // Prior to Version4, \p OutOfLineMappingBuf points to a sequence of coverage
512   // mappings associated with the function records. It is unused in Version4.
513   virtual Error readFunctionRecords(const char *FuncRecBuf,
514                                     const char *FuncRecBufEnd,
515                                     Optional<FilenameRange> OutOfLineFileRange,
516                                     const char *OutOfLineMappingBuf,
517                                     const char *OutOfLineMappingBufEnd) = 0;
518 
519   template <class IntPtrT, support::endianness Endian>
520   static Expected<std::unique_ptr<CovMapFuncRecordReader>>
521   get(CovMapVersion Version, InstrProfSymtab &P,
522       std::vector<BinaryCoverageReader::ProfileMappingRecord> &R,
523       std::vector<std::string> &F);
524 };
525 
526 // A class for reading coverage mapping function records for a module.
527 template <CovMapVersion Version, class IntPtrT, support::endianness Endian>
528 class VersionedCovMapFuncRecordReader : public CovMapFuncRecordReader {
529   using FuncRecordType =
530       typename CovMapTraits<Version, IntPtrT>::CovMapFuncRecordType;
531   using NameRefType = typename CovMapTraits<Version, IntPtrT>::NameRefType;
532 
533   // Maps function's name references to the indexes of their records
534   // in \c Records.
535   DenseMap<NameRefType, size_t> FunctionRecords;
536   InstrProfSymtab &ProfileNames;
537   std::vector<std::string> &Filenames;
538   std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records;
539 
540   // Maps a hash of the filenames in a TU to a \c FileRange. The range
541   // specifies the location of the hashed filenames in \c Filenames.
542   DenseMap<uint64_t, FilenameRange> FileRangeMap;
543 
544   // Add the record to the collection if we don't already have a record that
545   // points to the same function name. This is useful to ignore the redundant
546   // records for the functions with ODR linkage.
547   // In addition, prefer records with real coverage mapping data to dummy
548   // records, which were emitted for inline functions which were seen but
549   // not used in the corresponding translation unit.
550   Error insertFunctionRecordIfNeeded(const FuncRecordType *CFR,
551                                      StringRef Mapping,
552                                      FilenameRange FileRange) {
553     ++CovMapNumRecords;
554     uint64_t FuncHash = CFR->template getFuncHash<Endian>();
555     NameRefType NameRef = CFR->template getFuncNameRef<Endian>();
556     auto InsertResult =
557         FunctionRecords.insert(std::make_pair(NameRef, Records.size()));
558     if (InsertResult.second) {
559       StringRef FuncName;
560       if (Error Err = CFR->template getFuncName<Endian>(ProfileNames, FuncName))
561         return Err;
562       if (FuncName.empty())
563         return make_error<InstrProfError>(instrprof_error::malformed);
564       ++CovMapNumUsedRecords;
565       Records.emplace_back(Version, FuncName, FuncHash, Mapping,
566                            FileRange.StartingIndex, FileRange.Length);
567       return Error::success();
568     }
569     // Update the existing record if it's a dummy and the new record is real.
570     size_t OldRecordIndex = InsertResult.first->second;
571     BinaryCoverageReader::ProfileMappingRecord &OldRecord =
572         Records[OldRecordIndex];
573     Expected<bool> OldIsDummyExpected = isCoverageMappingDummy(
574         OldRecord.FunctionHash, OldRecord.CoverageMapping);
575     if (Error Err = OldIsDummyExpected.takeError())
576       return Err;
577     if (!*OldIsDummyExpected)
578       return Error::success();
579     Expected<bool> NewIsDummyExpected =
580         isCoverageMappingDummy(FuncHash, Mapping);
581     if (Error Err = NewIsDummyExpected.takeError())
582       return Err;
583     if (*NewIsDummyExpected)
584       return Error::success();
585     ++CovMapNumUsedRecords;
586     OldRecord.FunctionHash = FuncHash;
587     OldRecord.CoverageMapping = Mapping;
588     OldRecord.FilenamesBegin = FileRange.StartingIndex;
589     OldRecord.FilenamesSize = FileRange.Length;
590     return Error::success();
591   }
592 
593 public:
594   VersionedCovMapFuncRecordReader(
595       InstrProfSymtab &P,
596       std::vector<BinaryCoverageReader::ProfileMappingRecord> &R,
597       std::vector<std::string> &F)
598       : ProfileNames(P), Filenames(F), Records(R) {}
599 
600   ~VersionedCovMapFuncRecordReader() override = default;
601 
602   Expected<const char *> readCoverageHeader(const char *CovBuf,
603                                             const char *CovBufEnd) override {
604     using namespace support;
605 
606     if (CovBuf + sizeof(CovMapHeader) > CovBufEnd)
607       return make_error<CoverageMapError>(coveragemap_error::malformed);
608     auto CovHeader = reinterpret_cast<const CovMapHeader *>(CovBuf);
609     uint32_t NRecords = CovHeader->getNRecords<Endian>();
610     uint32_t FilenamesSize = CovHeader->getFilenamesSize<Endian>();
611     uint32_t CoverageSize = CovHeader->getCoverageSize<Endian>();
612     assert((CovMapVersion)CovHeader->getVersion<Endian>() == Version);
613     CovBuf = reinterpret_cast<const char *>(CovHeader + 1);
614 
615     // Skip past the function records, saving the start and end for later.
616     // This is a no-op in Version4 (function records are read after all headers
617     // are read).
618     const char *FuncRecBuf = nullptr;
619     const char *FuncRecBufEnd = nullptr;
620     if (Version < CovMapVersion::Version4)
621       FuncRecBuf = CovBuf;
622     CovBuf += NRecords * sizeof(FuncRecordType);
623     if (Version < CovMapVersion::Version4)
624       FuncRecBufEnd = CovBuf;
625 
626     // Get the filenames.
627     if (CovBuf + FilenamesSize > CovBufEnd)
628       return make_error<CoverageMapError>(coveragemap_error::malformed);
629     size_t FilenamesBegin = Filenames.size();
630     StringRef FilenameRegion(CovBuf, FilenamesSize);
631     RawCoverageFilenamesReader Reader(FilenameRegion, Filenames);
632     if (auto Err = Reader.read(Version))
633       return std::move(Err);
634     CovBuf += FilenamesSize;
635     FilenameRange FileRange(FilenamesBegin, Filenames.size() - FilenamesBegin);
636 
637     if (Version >= CovMapVersion::Version4) {
638       // Map a hash of the filenames region to the filename range associated
639       // with this coverage header.
640       int64_t FilenamesRef =
641           llvm::IndexedInstrProf::ComputeHash(FilenameRegion);
642       auto Insert =
643           FileRangeMap.insert(std::make_pair(FilenamesRef, FileRange));
644       if (!Insert.second) {
645         // The same filenames ref was encountered twice. It's possible that
646         // the associated filenames are the same.
647         auto It = Filenames.begin();
648         FilenameRange &OrigRange = Insert.first->getSecond();
649         if (std::equal(It + OrigRange.StartingIndex,
650                        It + OrigRange.StartingIndex + OrigRange.Length,
651                        It + FileRange.StartingIndex,
652                        It + FileRange.StartingIndex + FileRange.Length))
653           // Map the new range to the original one.
654           FileRange = OrigRange;
655         else
656           // This is a hash collision. Mark the filenames ref invalid.
657           OrigRange.markInvalid();
658       }
659     }
660 
661     // We'll read the coverage mapping records in the loop below.
662     // This is a no-op in Version4 (coverage mappings are not affixed to the
663     // coverage header).
664     const char *MappingBuf = CovBuf;
665     if (Version >= CovMapVersion::Version4 && CoverageSize != 0)
666       return make_error<CoverageMapError>(coveragemap_error::malformed);
667     CovBuf += CoverageSize;
668     const char *MappingEnd = CovBuf;
669 
670     if (CovBuf > CovBufEnd)
671       return make_error<CoverageMapError>(coveragemap_error::malformed);
672 
673     if (Version < CovMapVersion::Version4) {
674       // Read each function record.
675       if (Error E = readFunctionRecords(FuncRecBuf, FuncRecBufEnd, FileRange,
676                                         MappingBuf, MappingEnd))
677         return std::move(E);
678     }
679 
680     // Each coverage map has an alignment of 8, so we need to adjust alignment
681     // before reading the next map.
682     CovBuf += offsetToAlignedAddr(CovBuf, Align(8));
683 
684     return CovBuf;
685   }
686 
687   Error readFunctionRecords(const char *FuncRecBuf, const char *FuncRecBufEnd,
688                             Optional<FilenameRange> OutOfLineFileRange,
689                             const char *OutOfLineMappingBuf,
690                             const char *OutOfLineMappingBufEnd) override {
691     auto CFR = reinterpret_cast<const FuncRecordType *>(FuncRecBuf);
692     while ((const char *)CFR < FuncRecBufEnd) {
693       // Validate the length of the coverage mapping for this function.
694       const char *NextMappingBuf;
695       const FuncRecordType *NextCFR;
696       std::tie(NextMappingBuf, NextCFR) =
697           CFR->template advanceByOne<Endian>(OutOfLineMappingBuf);
698       if (Version < CovMapVersion::Version4)
699         if (NextMappingBuf > OutOfLineMappingBufEnd)
700           return make_error<CoverageMapError>(coveragemap_error::malformed);
701 
702       // Look up the set of filenames associated with this function record.
703       Optional<FilenameRange> FileRange;
704       if (Version < CovMapVersion::Version4) {
705         FileRange = OutOfLineFileRange;
706       } else {
707         uint64_t FilenamesRef = CFR->template getFilenamesRef<Endian>();
708         auto It = FileRangeMap.find(FilenamesRef);
709         if (It == FileRangeMap.end())
710           return make_error<CoverageMapError>(coveragemap_error::malformed);
711         else
712           FileRange = It->getSecond();
713       }
714 
715       // Now, read the coverage data.
716       if (FileRange && !FileRange->isInvalid()) {
717         StringRef Mapping =
718             CFR->template getCoverageMapping<Endian>(OutOfLineMappingBuf);
719         if (Version >= CovMapVersion::Version4 &&
720             Mapping.data() + Mapping.size() > FuncRecBufEnd)
721           return make_error<CoverageMapError>(coveragemap_error::malformed);
722         if (Error Err = insertFunctionRecordIfNeeded(CFR, Mapping, *FileRange))
723           return Err;
724       }
725 
726       std::tie(OutOfLineMappingBuf, CFR) = std::tie(NextMappingBuf, NextCFR);
727     }
728     return Error::success();
729   }
730 };
731 
732 } // end anonymous namespace
733 
734 template <class IntPtrT, support::endianness Endian>
735 Expected<std::unique_ptr<CovMapFuncRecordReader>> CovMapFuncRecordReader::get(
736     CovMapVersion Version, InstrProfSymtab &P,
737     std::vector<BinaryCoverageReader::ProfileMappingRecord> &R,
738     std::vector<std::string> &F) {
739   using namespace coverage;
740 
741   switch (Version) {
742   case CovMapVersion::Version1:
743     return std::make_unique<VersionedCovMapFuncRecordReader<
744         CovMapVersion::Version1, IntPtrT, Endian>>(P, R, F);
745   case CovMapVersion::Version2:
746   case CovMapVersion::Version3:
747   case CovMapVersion::Version4:
748   case CovMapVersion::Version5:
749   case CovMapVersion::Version6:
750     // Decompress the name data.
751     if (Error E = P.create(P.getNameData()))
752       return std::move(E);
753     if (Version == CovMapVersion::Version2)
754       return std::make_unique<VersionedCovMapFuncRecordReader<
755           CovMapVersion::Version2, IntPtrT, Endian>>(P, R, F);
756     else if (Version == CovMapVersion::Version3)
757       return std::make_unique<VersionedCovMapFuncRecordReader<
758           CovMapVersion::Version3, IntPtrT, Endian>>(P, R, F);
759     else if (Version == CovMapVersion::Version4)
760       return std::make_unique<VersionedCovMapFuncRecordReader<
761           CovMapVersion::Version4, IntPtrT, Endian>>(P, R, F);
762     else if (Version == CovMapVersion::Version5)
763       return std::make_unique<VersionedCovMapFuncRecordReader<
764           CovMapVersion::Version5, IntPtrT, Endian>>(P, R, F);
765     else if (Version == CovMapVersion::Version6)
766       return std::make_unique<VersionedCovMapFuncRecordReader<
767           CovMapVersion::Version6, IntPtrT, Endian>>(P, R, F);
768   }
769   llvm_unreachable("Unsupported version");
770 }
771 
772 template <typename T, support::endianness Endian>
773 static Error readCoverageMappingData(
774     InstrProfSymtab &ProfileNames, StringRef CovMap, StringRef FuncRecords,
775     std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records,
776     std::vector<std::string> &Filenames) {
777   using namespace coverage;
778 
779   // Read the records in the coverage data section.
780   auto CovHeader =
781       reinterpret_cast<const CovMapHeader *>(CovMap.data());
782   CovMapVersion Version = (CovMapVersion)CovHeader->getVersion<Endian>();
783   if (Version > CovMapVersion::CurrentVersion)
784     return make_error<CoverageMapError>(coveragemap_error::unsupported_version);
785   Expected<std::unique_ptr<CovMapFuncRecordReader>> ReaderExpected =
786       CovMapFuncRecordReader::get<T, Endian>(Version, ProfileNames, Records,
787                                              Filenames);
788   if (Error E = ReaderExpected.takeError())
789     return E;
790   auto Reader = std::move(ReaderExpected.get());
791   const char *CovBuf = CovMap.data();
792   const char *CovBufEnd = CovBuf + CovMap.size();
793   const char *FuncRecBuf = FuncRecords.data();
794   const char *FuncRecBufEnd = FuncRecords.data() + FuncRecords.size();
795   while (CovBuf < CovBufEnd) {
796     // Read the current coverage header & filename data.
797     //
798     // Prior to Version4, this also reads all function records affixed to the
799     // header.
800     //
801     // Return a pointer to the next coverage header.
802     auto NextOrErr = Reader->readCoverageHeader(CovBuf, CovBufEnd);
803     if (auto E = NextOrErr.takeError())
804       return E;
805     CovBuf = NextOrErr.get();
806   }
807   // In Version4, function records are not affixed to coverage headers. Read
808   // the records from their dedicated section.
809   if (Version >= CovMapVersion::Version4)
810     return Reader->readFunctionRecords(FuncRecBuf, FuncRecBufEnd, None, nullptr,
811                                        nullptr);
812   return Error::success();
813 }
814 
815 static const char *TestingFormatMagic = "llvmcovmtestdata";
816 
817 Expected<std::unique_ptr<BinaryCoverageReader>>
818 BinaryCoverageReader::createCoverageReaderFromBuffer(
819     StringRef Coverage, std::string &&FuncRecords, InstrProfSymtab &&ProfileNames,
820     uint8_t BytesInAddress, support::endianness Endian) {
821   std::unique_ptr<BinaryCoverageReader> Reader(
822       new BinaryCoverageReader(std::move(FuncRecords)));
823   Reader->ProfileNames = std::move(ProfileNames);
824   StringRef FuncRecordsRef = Reader->FuncRecords;
825   if (BytesInAddress == 4 && Endian == support::endianness::little) {
826     if (Error E =
827             readCoverageMappingData<uint32_t, support::endianness::little>(
828                 Reader->ProfileNames, Coverage, FuncRecordsRef,
829                 Reader->MappingRecords, Reader->Filenames))
830       return std::move(E);
831   } else if (BytesInAddress == 4 && Endian == support::endianness::big) {
832     if (Error E = readCoverageMappingData<uint32_t, support::endianness::big>(
833             Reader->ProfileNames, Coverage, FuncRecordsRef,
834             Reader->MappingRecords, Reader->Filenames))
835       return std::move(E);
836   } else if (BytesInAddress == 8 && Endian == support::endianness::little) {
837     if (Error E =
838             readCoverageMappingData<uint64_t, support::endianness::little>(
839                 Reader->ProfileNames, Coverage, FuncRecordsRef,
840                 Reader->MappingRecords, Reader->Filenames))
841       return std::move(E);
842   } else if (BytesInAddress == 8 && Endian == support::endianness::big) {
843     if (Error E = readCoverageMappingData<uint64_t, support::endianness::big>(
844             Reader->ProfileNames, Coverage, FuncRecordsRef,
845             Reader->MappingRecords, Reader->Filenames))
846       return std::move(E);
847   } else
848     return make_error<CoverageMapError>(coveragemap_error::malformed);
849   return std::move(Reader);
850 }
851 
852 static Expected<std::unique_ptr<BinaryCoverageReader>>
853 loadTestingFormat(StringRef Data) {
854   uint8_t BytesInAddress = 8;
855   support::endianness Endian = support::endianness::little;
856 
857   Data = Data.substr(StringRef(TestingFormatMagic).size());
858   if (Data.empty())
859     return make_error<CoverageMapError>(coveragemap_error::truncated);
860   unsigned N = 0;
861   uint64_t ProfileNamesSize = decodeULEB128(Data.bytes_begin(), &N);
862   if (N > Data.size())
863     return make_error<CoverageMapError>(coveragemap_error::malformed);
864   Data = Data.substr(N);
865   if (Data.empty())
866     return make_error<CoverageMapError>(coveragemap_error::truncated);
867   N = 0;
868   uint64_t Address = decodeULEB128(Data.bytes_begin(), &N);
869   if (N > Data.size())
870     return make_error<CoverageMapError>(coveragemap_error::malformed);
871   Data = Data.substr(N);
872   if (Data.size() < ProfileNamesSize)
873     return make_error<CoverageMapError>(coveragemap_error::malformed);
874   InstrProfSymtab ProfileNames;
875   if (Error E = ProfileNames.create(Data.substr(0, ProfileNamesSize), Address))
876     return std::move(E);
877   StringRef CoverageMapping = Data.substr(ProfileNamesSize);
878   // Skip the padding bytes because coverage map data has an alignment of 8.
879   if (CoverageMapping.empty())
880     return make_error<CoverageMapError>(coveragemap_error::truncated);
881   size_t Pad = offsetToAlignedAddr(CoverageMapping.data(), Align(8));
882   if (CoverageMapping.size() < Pad)
883     return make_error<CoverageMapError>(coveragemap_error::malformed);
884   CoverageMapping = CoverageMapping.substr(Pad);
885   return BinaryCoverageReader::createCoverageReaderFromBuffer(
886       CoverageMapping, "", std::move(ProfileNames), BytesInAddress, Endian);
887 }
888 
889 /// Find all sections that match \p Name. There may be more than one if comdats
890 /// are in use, e.g. for the __llvm_covfun section on ELF.
891 static Expected<std::vector<SectionRef>> lookupSections(ObjectFile &OF,
892                                                         StringRef Name) {
893   // On COFF, the object file section name may end in "$M". This tells the
894   // linker to sort these sections between "$A" and "$Z". The linker removes the
895   // dollar and everything after it in the final binary. Do the same to match.
896   bool IsCOFF = isa<COFFObjectFile>(OF);
897   auto stripSuffix = [IsCOFF](StringRef N) {
898     return IsCOFF ? N.split('$').first : N;
899   };
900   Name = stripSuffix(Name);
901 
902   std::vector<SectionRef> Sections;
903   for (const auto &Section : OF.sections()) {
904     Expected<StringRef> NameOrErr = Section.getName();
905     if (!NameOrErr)
906       return NameOrErr.takeError();
907     if (stripSuffix(*NameOrErr) == Name)
908       Sections.push_back(Section);
909   }
910   if (Sections.empty())
911     return make_error<CoverageMapError>(coveragemap_error::no_data_found);
912   return Sections;
913 }
914 
915 static Expected<std::unique_ptr<BinaryCoverageReader>>
916 loadBinaryFormat(std::unique_ptr<Binary> Bin, StringRef Arch) {
917   std::unique_ptr<ObjectFile> OF;
918   if (auto *Universal = dyn_cast<MachOUniversalBinary>(Bin.get())) {
919     // If we have a universal binary, try to look up the object for the
920     // appropriate architecture.
921     auto ObjectFileOrErr = Universal->getMachOObjectForArch(Arch);
922     if (!ObjectFileOrErr)
923       return ObjectFileOrErr.takeError();
924     OF = std::move(ObjectFileOrErr.get());
925   } else if (isa<ObjectFile>(Bin.get())) {
926     // For any other object file, upcast and take ownership.
927     OF.reset(cast<ObjectFile>(Bin.release()));
928     // If we've asked for a particular arch, make sure they match.
929     if (!Arch.empty() && OF->getArch() != Triple(Arch).getArch())
930       return errorCodeToError(object_error::arch_not_found);
931   } else
932     // We can only handle object files.
933     return make_error<CoverageMapError>(coveragemap_error::malformed);
934 
935   // The coverage uses native pointer sizes for the object it's written in.
936   uint8_t BytesInAddress = OF->getBytesInAddress();
937   support::endianness Endian = OF->isLittleEndian()
938                                    ? support::endianness::little
939                                    : support::endianness::big;
940 
941   // Look for the sections that we are interested in.
942   auto ObjFormat = OF->getTripleObjectFormat();
943   auto NamesSection =
944       lookupSections(*OF, getInstrProfSectionName(IPSK_name, ObjFormat,
945                                                  /*AddSegmentInfo=*/false));
946   if (auto E = NamesSection.takeError())
947     return std::move(E);
948   auto CoverageSection =
949       lookupSections(*OF, getInstrProfSectionName(IPSK_covmap, ObjFormat,
950                                                   /*AddSegmentInfo=*/false));
951   if (auto E = CoverageSection.takeError())
952     return std::move(E);
953   std::vector<SectionRef> CoverageSectionRefs = *CoverageSection;
954   if (CoverageSectionRefs.size() != 1)
955     return make_error<CoverageMapError>(coveragemap_error::malformed);
956   auto CoverageMappingOrErr = CoverageSectionRefs.back().getContents();
957   if (!CoverageMappingOrErr)
958     return CoverageMappingOrErr.takeError();
959   StringRef CoverageMapping = CoverageMappingOrErr.get();
960 
961   InstrProfSymtab ProfileNames;
962   std::vector<SectionRef> NamesSectionRefs = *NamesSection;
963   if (NamesSectionRefs.size() != 1)
964     return make_error<CoverageMapError>(coveragemap_error::malformed);
965   if (Error E = ProfileNames.create(NamesSectionRefs.back()))
966     return std::move(E);
967 
968   // Look for the coverage records section (Version4 only).
969   std::string FuncRecords;
970   auto CoverageRecordsSections =
971       lookupSections(*OF, getInstrProfSectionName(IPSK_covfun, ObjFormat,
972                                                   /*AddSegmentInfo=*/false));
973   if (auto E = CoverageRecordsSections.takeError())
974     consumeError(std::move(E));
975   else {
976     for (SectionRef Section : *CoverageRecordsSections) {
977       auto CoverageRecordsOrErr = Section.getContents();
978       if (!CoverageRecordsOrErr)
979         return CoverageRecordsOrErr.takeError();
980       FuncRecords += CoverageRecordsOrErr.get();
981       while (FuncRecords.size() % 8 != 0)
982         FuncRecords += '\0';
983     }
984   }
985 
986   return BinaryCoverageReader::createCoverageReaderFromBuffer(
987       CoverageMapping, std::move(FuncRecords), std::move(ProfileNames),
988       BytesInAddress, Endian);
989 }
990 
991 /// Determine whether \p Arch is invalid or empty, given \p Bin.
992 static bool isArchSpecifierInvalidOrMissing(Binary *Bin, StringRef Arch) {
993   // If we have a universal binary and Arch doesn't identify any of its slices,
994   // it's user error.
995   if (auto *Universal = dyn_cast<MachOUniversalBinary>(Bin)) {
996     for (auto &ObjForArch : Universal->objects())
997       if (Arch == ObjForArch.getArchFlagName())
998         return false;
999     return true;
1000   }
1001   return false;
1002 }
1003 
1004 Expected<std::vector<std::unique_ptr<BinaryCoverageReader>>>
1005 BinaryCoverageReader::create(
1006     MemoryBufferRef ObjectBuffer, StringRef Arch,
1007     SmallVectorImpl<std::unique_ptr<MemoryBuffer>> &ObjectFileBuffers) {
1008   std::vector<std::unique_ptr<BinaryCoverageReader>> Readers;
1009 
1010   if (ObjectBuffer.getBuffer().startswith(TestingFormatMagic)) {
1011     // This is a special format used for testing.
1012     auto ReaderOrErr = loadTestingFormat(ObjectBuffer.getBuffer());
1013     if (!ReaderOrErr)
1014       return ReaderOrErr.takeError();
1015     Readers.push_back(std::move(ReaderOrErr.get()));
1016     return std::move(Readers);
1017   }
1018 
1019   auto BinOrErr = createBinary(ObjectBuffer);
1020   if (!BinOrErr)
1021     return BinOrErr.takeError();
1022   std::unique_ptr<Binary> Bin = std::move(BinOrErr.get());
1023 
1024   if (isArchSpecifierInvalidOrMissing(Bin.get(), Arch))
1025     return make_error<CoverageMapError>(
1026         coveragemap_error::invalid_or_missing_arch_specifier);
1027 
1028   // MachO universal binaries which contain archives need to be treated as
1029   // archives, not as regular binaries.
1030   if (auto *Universal = dyn_cast<MachOUniversalBinary>(Bin.get())) {
1031     for (auto &ObjForArch : Universal->objects()) {
1032       // Skip slices within the universal binary which target the wrong arch.
1033       std::string ObjArch = ObjForArch.getArchFlagName();
1034       if (Arch != ObjArch)
1035         continue;
1036 
1037       auto ArchiveOrErr = ObjForArch.getAsArchive();
1038       if (!ArchiveOrErr) {
1039         // If this is not an archive, try treating it as a regular object.
1040         consumeError(ArchiveOrErr.takeError());
1041         break;
1042       }
1043 
1044       return BinaryCoverageReader::create(
1045           ArchiveOrErr.get()->getMemoryBufferRef(), Arch, ObjectFileBuffers);
1046     }
1047   }
1048 
1049   // Load coverage out of archive members.
1050   if (auto *Ar = dyn_cast<Archive>(Bin.get())) {
1051     Error Err = Error::success();
1052     for (auto &Child : Ar->children(Err)) {
1053       Expected<MemoryBufferRef> ChildBufOrErr = Child.getMemoryBufferRef();
1054       if (!ChildBufOrErr)
1055         return ChildBufOrErr.takeError();
1056 
1057       auto ChildReadersOrErr = BinaryCoverageReader::create(
1058           ChildBufOrErr.get(), Arch, ObjectFileBuffers);
1059       if (!ChildReadersOrErr)
1060         return ChildReadersOrErr.takeError();
1061       for (auto &Reader : ChildReadersOrErr.get())
1062         Readers.push_back(std::move(Reader));
1063     }
1064     if (Err)
1065       return std::move(Err);
1066 
1067     // Thin archives reference object files outside of the archive file, i.e.
1068     // files which reside in memory not owned by the caller. Transfer ownership
1069     // to the caller.
1070     if (Ar->isThin())
1071       for (auto &Buffer : Ar->takeThinBuffers())
1072         ObjectFileBuffers.push_back(std::move(Buffer));
1073 
1074     return std::move(Readers);
1075   }
1076 
1077   auto ReaderOrErr = loadBinaryFormat(std::move(Bin), Arch);
1078   if (!ReaderOrErr)
1079     return ReaderOrErr.takeError();
1080   Readers.push_back(std::move(ReaderOrErr.get()));
1081   return std::move(Readers);
1082 }
1083 
1084 Error BinaryCoverageReader::readNextRecord(CoverageMappingRecord &Record) {
1085   if (CurrentRecord >= MappingRecords.size())
1086     return make_error<CoverageMapError>(coveragemap_error::eof);
1087 
1088   FunctionsFilenames.clear();
1089   Expressions.clear();
1090   MappingRegions.clear();
1091   auto &R = MappingRecords[CurrentRecord];
1092   auto F = makeArrayRef(Filenames).slice(R.FilenamesBegin, R.FilenamesSize);
1093   RawCoverageMappingReader Reader(R.CoverageMapping, F, FunctionsFilenames,
1094                                   Expressions, MappingRegions);
1095   if (auto Err = Reader.read())
1096     return Err;
1097 
1098   Record.FunctionName = R.FunctionName;
1099   Record.FunctionHash = R.FunctionHash;
1100   Record.Filenames = FunctionsFilenames;
1101   Record.Expressions = Expressions;
1102   Record.MappingRegions = MappingRegions;
1103 
1104   ++CurrentRecord;
1105   return Error::success();
1106 }
1107