xref: /llvm-project/llvm/lib/ProfileData/InstrProfWriter.cpp (revision bda02096d3b3c19e69b9f70ca8ff161d8173fec0)
1 //===- InstrProfWriter.cpp - Instrumented profiling writer ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for writing profiling data for clang's
10 // instrumentation based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ProfileData/InstrProfWriter.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/IR/ProfileSummary.h"
19 #include "llvm/ProfileData/InstrProf.h"
20 #include "llvm/ProfileData/MemProf.h"
21 #include "llvm/ProfileData/ProfileCommon.h"
22 #include "llvm/Support/Compression.h"
23 #include "llvm/Support/Endian.h"
24 #include "llvm/Support/EndianStream.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/FormatVariadic.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/OnDiskHashTable.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <cstdint>
31 #include <ctime>
32 #include <memory>
33 #include <string>
34 #include <tuple>
35 #include <utility>
36 #include <vector>
37 
38 using namespace llvm;
39 
40 // A struct to define how the data stream should be patched. For Indexed
41 // profiling, only uint64_t data type is needed.
42 struct PatchItem {
43   uint64_t Pos;         // Where to patch.
44   ArrayRef<uint64_t> D; // An array of source data.
45 };
46 
47 namespace llvm {
48 
49 // A wrapper class to abstract writer stream with support of bytes
50 // back patching.
51 class ProfOStream {
52 public:
53   ProfOStream(raw_fd_ostream &FD)
54       : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {}
55   ProfOStream(raw_string_ostream &STR)
56       : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {}
57 
58   [[nodiscard]] uint64_t tell() const { return OS.tell(); }
59   void write(uint64_t V) { LE.write<uint64_t>(V); }
60   void write32(uint32_t V) { LE.write<uint32_t>(V); }
61   void writeByte(uint8_t V) { LE.write<uint8_t>(V); }
62 
63   // \c patch can only be called when all data is written and flushed.
64   // For raw_string_ostream, the patch is done on the target string
65   // directly and it won't be reflected in the stream's internal buffer.
66   void patch(ArrayRef<PatchItem> P) {
67     using namespace support;
68 
69     if (IsFDOStream) {
70       raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS);
71       const uint64_t LastPos = FDOStream.tell();
72       for (const auto &K : P) {
73         FDOStream.seek(K.Pos);
74         for (uint64_t Elem : K.D)
75           write(Elem);
76       }
77       // Reset the stream to the last position after patching so that users
78       // don't accidentally overwrite data. This makes it consistent with
79       // the string stream below which replaces the data directly.
80       FDOStream.seek(LastPos);
81     } else {
82       raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS);
83       std::string &Data = SOStream.str(); // with flush
84       for (const auto &K : P) {
85         for (int I = 0, E = K.D.size(); I != E; I++) {
86           uint64_t Bytes =
87               endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]);
88           Data.replace(K.Pos + I * sizeof(uint64_t), sizeof(uint64_t),
89                        (const char *)&Bytes, sizeof(uint64_t));
90         }
91       }
92     }
93   }
94 
95   // If \c OS is an instance of \c raw_fd_ostream, this field will be
96   // true. Otherwise, \c OS will be an raw_string_ostream.
97   bool IsFDOStream;
98   raw_ostream &OS;
99   support::endian::Writer LE;
100 };
101 
102 class InstrProfRecordWriterTrait {
103 public:
104   using key_type = StringRef;
105   using key_type_ref = StringRef;
106 
107   using data_type = const InstrProfWriter::ProfilingData *const;
108   using data_type_ref = const InstrProfWriter::ProfilingData *const;
109 
110   using hash_value_type = uint64_t;
111   using offset_type = uint64_t;
112 
113   llvm::endianness ValueProfDataEndianness = llvm::endianness::little;
114   InstrProfSummaryBuilder *SummaryBuilder;
115   InstrProfSummaryBuilder *CSSummaryBuilder;
116 
117   InstrProfRecordWriterTrait() = default;
118 
119   static hash_value_type ComputeHash(key_type_ref K) {
120     return IndexedInstrProf::ComputeHash(K);
121   }
122 
123   static std::pair<offset_type, offset_type>
124   EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
125     using namespace support;
126 
127     endian::Writer LE(Out, llvm::endianness::little);
128 
129     offset_type N = K.size();
130     LE.write<offset_type>(N);
131 
132     offset_type M = 0;
133     for (const auto &ProfileData : *V) {
134       const InstrProfRecord &ProfRecord = ProfileData.second;
135       M += sizeof(uint64_t); // The function hash
136       M += sizeof(uint64_t); // The size of the Counts vector
137       M += ProfRecord.Counts.size() * sizeof(uint64_t);
138       M += sizeof(uint64_t); // The size of the Bitmap vector
139       M += ProfRecord.BitmapBytes.size() * sizeof(uint64_t);
140 
141       // Value data
142       M += ValueProfData::getSize(ProfileData.second);
143     }
144     LE.write<offset_type>(M);
145 
146     return std::make_pair(N, M);
147   }
148 
149   void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N) {
150     Out.write(K.data(), N);
151   }
152 
153   void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type) {
154     using namespace support;
155 
156     endian::Writer LE(Out, llvm::endianness::little);
157     for (const auto &ProfileData : *V) {
158       const InstrProfRecord &ProfRecord = ProfileData.second;
159       if (NamedInstrProfRecord::hasCSFlagInHash(ProfileData.first))
160         CSSummaryBuilder->addRecord(ProfRecord);
161       else
162         SummaryBuilder->addRecord(ProfRecord);
163 
164       LE.write<uint64_t>(ProfileData.first); // Function hash
165       LE.write<uint64_t>(ProfRecord.Counts.size());
166       for (uint64_t I : ProfRecord.Counts)
167         LE.write<uint64_t>(I);
168 
169       LE.write<uint64_t>(ProfRecord.BitmapBytes.size());
170       for (uint64_t I : ProfRecord.BitmapBytes)
171         LE.write<uint64_t>(I);
172 
173       // Write value data
174       std::unique_ptr<ValueProfData> VDataPtr =
175           ValueProfData::serializeFrom(ProfileData.second);
176       uint32_t S = VDataPtr->getSize();
177       VDataPtr->swapBytesFromHost(ValueProfDataEndianness);
178       Out.write((const char *)VDataPtr.get(), S);
179     }
180   }
181 };
182 
183 } // end namespace llvm
184 
185 InstrProfWriter::InstrProfWriter(
186     bool Sparse, uint64_t TemporalProfTraceReservoirSize,
187     uint64_t MaxTemporalProfTraceLength, bool WritePrevVersion,
188     memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
189     bool MemprofGenerateRandomHotness,
190     unsigned MemprofGenerateRandomHotnessSeed)
191     : Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength),
192       TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize),
193       InfoObj(new InstrProfRecordWriterTrait()),
194       WritePrevVersion(WritePrevVersion),
195       MemProfVersionRequested(MemProfVersionRequested),
196       MemProfFullSchema(MemProfFullSchema),
197       MemprofGenerateRandomHotness(MemprofGenerateRandomHotness) {
198   // Set up the random number seed if requested.
199   if (MemprofGenerateRandomHotness) {
200     unsigned seed = MemprofGenerateRandomHotnessSeed
201                         ? MemprofGenerateRandomHotnessSeed
202                         : std::time(nullptr);
203     errs() << "random hotness seed = " << seed << "\n";
204     std::srand(seed);
205   }
206 }
207 
208 InstrProfWriter::~InstrProfWriter() { delete InfoObj; }
209 
210 // Internal interface for testing purpose only.
211 void InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) {
212   InfoObj->ValueProfDataEndianness = Endianness;
213 }
214 
215 void InstrProfWriter::setOutputSparse(bool Sparse) {
216   this->Sparse = Sparse;
217 }
218 
219 void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight,
220                                 function_ref<void(Error)> Warn) {
221   auto Name = I.Name;
222   auto Hash = I.Hash;
223   addRecord(Name, Hash, std::move(I), Weight, Warn);
224 }
225 
226 void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other,
227                                     OverlapStats &Overlap,
228                                     OverlapStats &FuncLevelOverlap,
229                                     const OverlapFuncFilters &FuncFilter) {
230   auto Name = Other.Name;
231   auto Hash = Other.Hash;
232   Other.accumulateCounts(FuncLevelOverlap.Test);
233   if (!FunctionData.contains(Name)) {
234     Overlap.addOneUnique(FuncLevelOverlap.Test);
235     return;
236   }
237   if (FuncLevelOverlap.Test.CountSum < 1.0f) {
238     Overlap.Overlap.NumEntries += 1;
239     return;
240   }
241   auto &ProfileDataMap = FunctionData[Name];
242   bool NewFunc;
243   ProfilingData::iterator Where;
244   std::tie(Where, NewFunc) =
245       ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord()));
246   if (NewFunc) {
247     Overlap.addOneMismatch(FuncLevelOverlap.Test);
248     return;
249   }
250   InstrProfRecord &Dest = Where->second;
251 
252   uint64_t ValueCutoff = FuncFilter.ValueCutoff;
253   if (!FuncFilter.NameFilter.empty() && Name.contains(FuncFilter.NameFilter))
254     ValueCutoff = 0;
255 
256   Dest.overlap(Other, Overlap, FuncLevelOverlap, ValueCutoff);
257 }
258 
259 void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
260                                 InstrProfRecord &&I, uint64_t Weight,
261                                 function_ref<void(Error)> Warn) {
262   auto &ProfileDataMap = FunctionData[Name];
263 
264   bool NewFunc;
265   ProfilingData::iterator Where;
266   std::tie(Where, NewFunc) =
267       ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord()));
268   InstrProfRecord &Dest = Where->second;
269 
270   auto MapWarn = [&](instrprof_error E) {
271     Warn(make_error<InstrProfError>(E));
272   };
273 
274   if (NewFunc) {
275     // We've never seen a function with this name and hash, add it.
276     Dest = std::move(I);
277     if (Weight > 1)
278       Dest.scale(Weight, 1, MapWarn);
279   } else {
280     // We're updating a function we've seen before.
281     Dest.merge(I, Weight, MapWarn);
282   }
283 
284   Dest.sortValueData();
285 }
286 
287 void InstrProfWriter::addMemProfRecord(
288     const Function::GUID Id, const memprof::IndexedMemProfRecord &Record) {
289   auto NewRecord = Record;
290   // Provoke random hotness values if requested. We specify the lifetime access
291   // density and lifetime length that will result in a cold or not cold hotness.
292   // See the logic in getAllocType() in Analysis/MemoryProfileInfo.cpp.
293   if (MemprofGenerateRandomHotness) {
294     for (auto &Alloc : NewRecord.AllocSites) {
295       // To get a not cold context, set the lifetime access density to the
296       // maximum value and the lifetime to 0.
297       uint64_t NewTLAD = std::numeric_limits<uint64_t>::max();
298       uint64_t NewTL = 0;
299       bool IsCold = std::rand() % 2;
300       if (IsCold) {
301         // To get a cold context, set the lifetime access density to 0 and the
302         // lifetime to the maximum value.
303         NewTLAD = 0;
304         NewTL = std::numeric_limits<uint64_t>::max();
305       }
306       Alloc.Info.setTotalLifetimeAccessDensity(NewTLAD);
307       Alloc.Info.setTotalLifetime(NewTL);
308     }
309   }
310   auto [Iter, Inserted] = MemProfData.Records.insert({Id, NewRecord});
311   // If we inserted a new record then we are done.
312   if (Inserted) {
313     return;
314   }
315   memprof::IndexedMemProfRecord &Existing = Iter->second;
316   Existing.merge(NewRecord);
317 }
318 
319 bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id,
320                                       const memprof::Frame &Frame,
321                                       function_ref<void(Error)> Warn) {
322   auto [Iter, Inserted] = MemProfData.Frames.insert({Id, Frame});
323   // If a mapping already exists for the current frame id and it does not
324   // match the new mapping provided then reset the existing contents and bail
325   // out. We don't support the merging of memprof data whose Frame -> Id
326   // mapping across profiles is inconsistent.
327   if (!Inserted && Iter->second != Frame) {
328     Warn(make_error<InstrProfError>(instrprof_error::malformed,
329                                     "frame to id mapping mismatch"));
330     return false;
331   }
332   return true;
333 }
334 
335 bool InstrProfWriter::addMemProfCallStack(
336     const memprof::CallStackId CSId,
337     const llvm::SmallVector<memprof::FrameId> &CallStack,
338     function_ref<void(Error)> Warn) {
339   auto [Iter, Inserted] = MemProfData.CallStacks.insert({CSId, CallStack});
340   // If a mapping already exists for the current call stack id and it does not
341   // match the new mapping provided then reset the existing contents and bail
342   // out. We don't support the merging of memprof data whose CallStack -> Id
343   // mapping across profiles is inconsistent.
344   if (!Inserted && Iter->second != CallStack) {
345     Warn(make_error<InstrProfError>(instrprof_error::malformed,
346                                     "call stack to id mapping mismatch"));
347     return false;
348   }
349   return true;
350 }
351 
352 bool InstrProfWriter::addMemProfData(memprof::IndexedMemProfData Incoming,
353                                      function_ref<void(Error)> Warn) {
354   // Return immediately if everything is empty.
355   if (Incoming.Frames.empty() && Incoming.CallStacks.empty() &&
356       Incoming.Records.empty())
357     return true;
358 
359   // Otherwise, every component must be non-empty.
360   assert(!Incoming.Frames.empty() && !Incoming.CallStacks.empty() &&
361          !Incoming.Records.empty());
362 
363   if (MemProfData.Frames.empty())
364     MemProfData.Frames = std::move(Incoming.Frames);
365   else
366     for (const auto &[Id, F] : Incoming.Frames)
367       if (addMemProfFrame(Id, F, Warn))
368         return false;
369 
370   if (MemProfData.CallStacks.empty())
371     MemProfData.CallStacks = std::move(Incoming.CallStacks);
372   else
373     for (const auto &[CSId, CS] : Incoming.CallStacks)
374       if (addMemProfCallStack(CSId, CS, Warn))
375         return false;
376 
377   // Add one record at a time if randomization is requested.
378   if (MemProfData.Records.empty() && !MemprofGenerateRandomHotness)
379     MemProfData.Records = std::move(Incoming.Records);
380   else
381     for (const auto &[GUID, Record] : Incoming.Records)
382       addMemProfRecord(GUID, Record);
383 
384   return true;
385 }
386 
387 void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) {
388   llvm::append_range(BinaryIds, BIs);
389 }
390 
391 void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) {
392   assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength);
393   assert(!Trace.FunctionNameRefs.empty());
394   if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) {
395     // Simply append the trace if we have not yet hit our reservoir size limit.
396     TemporalProfTraces.push_back(std::move(Trace));
397   } else {
398     // Otherwise, replace a random trace in the stream.
399     std::uniform_int_distribution<uint64_t> Distribution(
400         0, TemporalProfTraceStreamSize);
401     uint64_t RandomIndex = Distribution(RNG);
402     if (RandomIndex < TemporalProfTraces.size())
403       TemporalProfTraces[RandomIndex] = std::move(Trace);
404   }
405   ++TemporalProfTraceStreamSize;
406 }
407 
408 void InstrProfWriter::addTemporalProfileTraces(
409     SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) {
410   for (auto &Trace : SrcTraces)
411     if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength)
412       Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength);
413   llvm::erase_if(SrcTraces, [](auto &T) { return T.FunctionNameRefs.empty(); });
414   // Assume that the source has the same reservoir size as the destination to
415   // avoid needing to record it in the indexed profile format.
416   bool IsDestSampled =
417       (TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize);
418   bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize);
419   if (!IsDestSampled && IsSrcSampled) {
420     // If one of the traces are sampled, ensure that it belongs to Dest.
421     std::swap(TemporalProfTraces, SrcTraces);
422     std::swap(TemporalProfTraceStreamSize, SrcStreamSize);
423     std::swap(IsDestSampled, IsSrcSampled);
424   }
425   if (!IsSrcSampled) {
426     // If the source stream is not sampled, we add each source trace normally.
427     for (auto &Trace : SrcTraces)
428       addTemporalProfileTrace(std::move(Trace));
429     return;
430   }
431   // Otherwise, we find the traces that would have been removed if we added
432   // the whole source stream.
433   SmallSetVector<uint64_t, 8> IndicesToReplace;
434   for (uint64_t I = 0; I < SrcStreamSize; I++) {
435     std::uniform_int_distribution<uint64_t> Distribution(
436         0, TemporalProfTraceStreamSize);
437     uint64_t RandomIndex = Distribution(RNG);
438     if (RandomIndex < TemporalProfTraces.size())
439       IndicesToReplace.insert(RandomIndex);
440     ++TemporalProfTraceStreamSize;
441   }
442   // Then we insert a random sample of the source traces.
443   llvm::shuffle(SrcTraces.begin(), SrcTraces.end(), RNG);
444   for (const auto &[Index, Trace] : llvm::zip(IndicesToReplace, SrcTraces))
445     TemporalProfTraces[Index] = std::move(Trace);
446 }
447 
448 void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
449                                              function_ref<void(Error)> Warn) {
450   for (auto &I : IPW.FunctionData)
451     for (auto &Func : I.getValue())
452       addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn);
453 
454   BinaryIds.reserve(BinaryIds.size() + IPW.BinaryIds.size());
455   for (auto &I : IPW.BinaryIds)
456     addBinaryIds(I);
457 
458   addTemporalProfileTraces(IPW.TemporalProfTraces,
459                            IPW.TemporalProfTraceStreamSize);
460 
461   MemProfData.Frames.reserve(IPW.MemProfData.Frames.size());
462   for (auto &[FrameId, Frame] : IPW.MemProfData.Frames) {
463     // If we weren't able to add the frame mappings then it doesn't make sense
464     // to try to merge the records from this profile.
465     if (!addMemProfFrame(FrameId, Frame, Warn))
466       return;
467   }
468 
469   MemProfData.CallStacks.reserve(IPW.MemProfData.CallStacks.size());
470   for (auto &[CSId, CallStack] : IPW.MemProfData.CallStacks) {
471     if (!addMemProfCallStack(CSId, CallStack, Warn))
472       return;
473   }
474 
475   MemProfData.Records.reserve(IPW.MemProfData.Records.size());
476   for (auto &[GUID, Record] : IPW.MemProfData.Records) {
477     addMemProfRecord(GUID, Record);
478   }
479 }
480 
481 bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
482   if (!Sparse)
483     return true;
484   for (const auto &Func : PD) {
485     const InstrProfRecord &IPR = Func.second;
486     if (llvm::any_of(IPR.Counts, [](uint64_t Count) { return Count > 0; }))
487       return true;
488     if (llvm::any_of(IPR.BitmapBytes, [](uint8_t Byte) { return Byte > 0; }))
489       return true;
490   }
491   return false;
492 }
493 
494 static void setSummary(IndexedInstrProf::Summary *TheSummary,
495                        ProfileSummary &PS) {
496   using namespace IndexedInstrProf;
497 
498   const std::vector<ProfileSummaryEntry> &Res = PS.getDetailedSummary();
499   TheSummary->NumSummaryFields = Summary::NumKinds;
500   TheSummary->NumCutoffEntries = Res.size();
501   TheSummary->set(Summary::MaxFunctionCount, PS.getMaxFunctionCount());
502   TheSummary->set(Summary::MaxBlockCount, PS.getMaxCount());
503   TheSummary->set(Summary::MaxInternalBlockCount, PS.getMaxInternalCount());
504   TheSummary->set(Summary::TotalBlockCount, PS.getTotalCount());
505   TheSummary->set(Summary::TotalNumBlocks, PS.getNumCounts());
506   TheSummary->set(Summary::TotalNumFunctions, PS.getNumFunctions());
507   for (unsigned I = 0; I < Res.size(); I++)
508     TheSummary->setEntry(I, Res[I]);
509 }
510 
511 // Serialize Schema.
512 static void writeMemProfSchema(ProfOStream &OS,
513                                const memprof::MemProfSchema &Schema) {
514   OS.write(static_cast<uint64_t>(Schema.size()));
515   for (const auto Id : Schema)
516     OS.write(static_cast<uint64_t>(Id));
517 }
518 
519 // Serialize MemProfRecordData.  Return RecordTableOffset.
520 static uint64_t writeMemProfRecords(
521     ProfOStream &OS,
522     llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
523         &MemProfRecordData,
524     memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,
525     llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
526         *MemProfCallStackIndexes = nullptr) {
527   memprof::RecordWriterTrait RecordWriter(Schema, Version,
528                                           MemProfCallStackIndexes);
529   OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
530       RecordTableGenerator;
531   for (auto &[GUID, Record] : MemProfRecordData) {
532     // Insert the key (func hash) and value (memprof record).
533     RecordTableGenerator.insert(GUID, Record, RecordWriter);
534   }
535   // Release the memory of this MapVector as it is no longer needed.
536   MemProfRecordData.clear();
537 
538   // The call to Emit invokes RecordWriterTrait::EmitData which destructs
539   // the memprof record copies owned by the RecordTableGenerator. This works
540   // because the RecordTableGenerator is not used after this point.
541   return RecordTableGenerator.Emit(OS.OS, RecordWriter);
542 }
543 
544 // Serialize MemProfFrameData.  Return FrameTableOffset.
545 static uint64_t writeMemProfFrames(
546     ProfOStream &OS,
547     llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
548   OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>
549       FrameTableGenerator;
550   for (auto &[FrameId, Frame] : MemProfFrameData) {
551     // Insert the key (frame id) and value (frame contents).
552     FrameTableGenerator.insert(FrameId, Frame);
553   }
554   // Release the memory of this MapVector as it is no longer needed.
555   MemProfFrameData.clear();
556 
557   return FrameTableGenerator.Emit(OS.OS);
558 }
559 
560 // Serialize MemProfFrameData.  Return the mapping from FrameIds to their
561 // indexes within the frame array.
562 static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
563 writeMemProfFrameArray(
564     ProfOStream &OS,
565     llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
566     llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {
567   // Mappings from FrameIds to array indexes.
568   llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes;
569 
570   // Compute the order in which we serialize Frames.  The order does not matter
571   // in terms of correctness, but we still compute it for deserialization
572   // performance.  Specifically, if we serialize frequently used Frames one
573   // after another, we have better cache utilization.  For two Frames that
574   // appear equally frequently, we break a tie by serializing the one that tends
575   // to appear earlier in call stacks.  We implement the tie-breaking mechanism
576   // by computing the sum of indexes within call stacks for each Frame.  If we
577   // still have a tie, then we just resort to compare two FrameIds, which is
578   // just for stability of output.
579   std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;
580   FrameIdOrder.reserve(MemProfFrameData.size());
581   for (const auto &[Id, Frame] : MemProfFrameData)
582     FrameIdOrder.emplace_back(Id, &Frame);
583   assert(MemProfFrameData.size() == FrameIdOrder.size());
584   llvm::sort(FrameIdOrder,
585              [&](const std::pair<memprof::FrameId, const memprof::Frame *> &L,
586                  const std::pair<memprof::FrameId, const memprof::Frame *> &R) {
587                const auto &SL = FrameHistogram[L.first];
588                const auto &SR = FrameHistogram[R.first];
589                // Popular FrameIds should come first.
590                if (SL.Count != SR.Count)
591                  return SL.Count > SR.Count;
592                // If they are equally popular, then the one that tends to appear
593                // earlier in call stacks should come first.
594                if (SL.PositionSum != SR.PositionSum)
595                  return SL.PositionSum < SR.PositionSum;
596                // Compare their FrameIds for sort stability.
597                return L.first < R.first;
598              });
599 
600   // Serialize all frames while creating mappings from linear IDs to FrameIds.
601   uint64_t Index = 0;
602   MemProfFrameIndexes.reserve(FrameIdOrder.size());
603   for (const auto &[Id, F] : FrameIdOrder) {
604     F->serialize(OS.OS);
605     MemProfFrameIndexes.insert({Id, Index});
606     ++Index;
607   }
608   assert(MemProfFrameData.size() == Index);
609   assert(MemProfFrameData.size() == MemProfFrameIndexes.size());
610 
611   // Release the memory of this MapVector as it is no longer needed.
612   MemProfFrameData.clear();
613 
614   return MemProfFrameIndexes;
615 }
616 
617 static uint64_t writeMemProfCallStacks(
618     ProfOStream &OS,
619     llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
620         &MemProfCallStackData) {
621   OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>
622       CallStackTableGenerator;
623   for (auto &[CSId, CallStack] : MemProfCallStackData)
624     CallStackTableGenerator.insert(CSId, CallStack);
625   // Release the memory of this vector as it is no longer needed.
626   MemProfCallStackData.clear();
627 
628   return CallStackTableGenerator.Emit(OS.OS);
629 }
630 
631 static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
632 writeMemProfCallStackArray(
633     ProfOStream &OS,
634     llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
635         &MemProfCallStackData,
636     llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
637         &MemProfFrameIndexes,
638     llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram,
639     unsigned &NumElements) {
640   llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
641       MemProfCallStackIndexes;
642 
643   memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;
644   Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
645                 FrameHistogram);
646   for (auto I : Builder.getRadixArray())
647     OS.write32(I);
648   NumElements = Builder.getRadixArray().size();
649   MemProfCallStackIndexes = Builder.takeCallStackPos();
650 
651   // Release the memory of this vector as it is no longer needed.
652   MemProfCallStackData.clear();
653 
654   return MemProfCallStackIndexes;
655 }
656 
657 // Write out MemProf Version2 as follows:
658 // uint64_t Version
659 // uint64_t RecordTableOffset = RecordTableGenerator.Emit
660 // uint64_t FramePayloadOffset = Offset for the frame payload
661 // uint64_t FrameTableOffset = FrameTableGenerator.Emit
662 // uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2)
663 // uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2)
664 // uint64_t Num schema entries
665 // uint64_t Schema entry 0
666 // uint64_t Schema entry 1
667 // ....
668 // uint64_t Schema entry N - 1
669 // OnDiskChainedHashTable MemProfRecordData
670 // OnDiskChainedHashTable MemProfFrameData
671 // OnDiskChainedHashTable MemProfCallStackData (NEW in V2)
672 static Error writeMemProfV2(ProfOStream &OS,
673                             memprof::IndexedMemProfData &MemProfData,
674                             bool MemProfFullSchema) {
675   OS.write(memprof::Version2);
676   uint64_t HeaderUpdatePos = OS.tell();
677   OS.write(0ULL); // Reserve space for the memprof record table offset.
678   OS.write(0ULL); // Reserve space for the memprof frame payload offset.
679   OS.write(0ULL); // Reserve space for the memprof frame table offset.
680   OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
681   OS.write(0ULL); // Reserve space for the memprof call stack table offset.
682 
683   auto Schema = memprof::getHotColdSchema();
684   if (MemProfFullSchema)
685     Schema = memprof::getFullSchema();
686   writeMemProfSchema(OS, Schema);
687 
688   uint64_t RecordTableOffset =
689       writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2);
690 
691   uint64_t FramePayloadOffset = OS.tell();
692   uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);
693 
694   uint64_t CallStackPayloadOffset = OS.tell();
695   uint64_t CallStackTableOffset =
696       writeMemProfCallStacks(OS, MemProfData.CallStacks);
697 
698   uint64_t Header[] = {
699       RecordTableOffset,      FramePayloadOffset,   FrameTableOffset,
700       CallStackPayloadOffset, CallStackTableOffset,
701   };
702   OS.patch({{HeaderUpdatePos, Header}});
703 
704   return Error::success();
705 }
706 
707 // Write out MemProf Version3 as follows:
708 // uint64_t Version
709 // uint64_t CallStackPayloadOffset = Offset for the call stack payload
710 // uint64_t RecordPayloadOffset = Offset for the record payload
711 // uint64_t RecordTableOffset = RecordTableGenerator.Emit
712 // uint64_t Num schema entries
713 // uint64_t Schema entry 0
714 // uint64_t Schema entry 1
715 // ....
716 // uint64_t Schema entry N - 1
717 // Frames serialized one after another
718 // Call stacks encoded as a radix tree
719 // OnDiskChainedHashTable MemProfRecordData
720 static Error writeMemProfV3(ProfOStream &OS,
721                             memprof::IndexedMemProfData &MemProfData,
722                             bool MemProfFullSchema) {
723   OS.write(memprof::Version3);
724   uint64_t HeaderUpdatePos = OS.tell();
725   OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
726   OS.write(0ULL); // Reserve space for the memprof record payload offset.
727   OS.write(0ULL); // Reserve space for the memprof record table offset.
728 
729   auto Schema = memprof::getHotColdSchema();
730   if (MemProfFullSchema)
731     Schema = memprof::getFullSchema();
732   writeMemProfSchema(OS, Schema);
733 
734   llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram =
735       memprof::computeFrameHistogram(MemProfData.CallStacks);
736   assert(MemProfData.Frames.size() == FrameHistogram.size());
737 
738   llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes =
739       writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram);
740 
741   uint64_t CallStackPayloadOffset = OS.tell();
742   // The number of elements in the call stack array.
743   unsigned NumElements = 0;
744   llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
745       MemProfCallStackIndexes =
746           writeMemProfCallStackArray(OS, MemProfData.CallStacks,
747                                      MemProfFrameIndexes, FrameHistogram,
748                                      NumElements);
749 
750   uint64_t RecordPayloadOffset = OS.tell();
751   uint64_t RecordTableOffset =
752       writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3,
753                           &MemProfCallStackIndexes);
754 
755   // IndexedMemProfReader::deserializeV3 computes the number of elements in the
756   // call stack array from the difference between CallStackPayloadOffset and
757   // RecordPayloadOffset.  Verify that the computation works.
758   assert(CallStackPayloadOffset +
759              NumElements * sizeof(memprof::LinearFrameId) ==
760          RecordPayloadOffset);
761 
762   uint64_t Header[] = {
763       CallStackPayloadOffset,
764       RecordPayloadOffset,
765       RecordTableOffset,
766   };
767   OS.patch({{HeaderUpdatePos, Header}});
768 
769   return Error::success();
770 }
771 
772 // Write out the MemProf data in a requested version.
773 static Error writeMemProf(ProfOStream &OS,
774                           memprof::IndexedMemProfData &MemProfData,
775                           memprof::IndexedVersion MemProfVersionRequested,
776                           bool MemProfFullSchema) {
777   switch (MemProfVersionRequested) {
778   case memprof::Version2:
779     return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
780   case memprof::Version3:
781     return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
782   }
783 
784   return make_error<InstrProfError>(
785       instrprof_error::unsupported_version,
786       formatv("MemProf version {} not supported; "
787               "requires version between {} and {}, inclusive",
788               MemProfVersionRequested, memprof::MinimumSupportedVersion,
789               memprof::MaximumSupportedVersion));
790 }
791 
792 uint64_t InstrProfWriter::writeHeader(const IndexedInstrProf::Header &Header,
793                                       const bool WritePrevVersion,
794                                       ProfOStream &OS) {
795   // Only write out the first four fields.
796   for (int I = 0; I < 4; I++)
797     OS.write(reinterpret_cast<const uint64_t *>(&Header)[I]);
798 
799   // Remember the offset of the remaining fields to allow back patching later.
800   auto BackPatchStartOffset = OS.tell();
801 
802   // Reserve the space for back patching later.
803   OS.write(0); // HashOffset
804   OS.write(0); // MemProfOffset
805   OS.write(0); // BinaryIdOffset
806   OS.write(0); // TemporalProfTracesOffset
807   if (!WritePrevVersion)
808     OS.write(0); // VTableNamesOffset
809 
810   return BackPatchStartOffset;
811 }
812 
813 Error InstrProfWriter::writeBinaryIds(ProfOStream &OS) {
814   // BinaryIdSection has two parts:
815   // 1. uint64_t BinaryIdsSectionSize
816   // 2. list of binary ids that consist of:
817   //    a. uint64_t BinaryIdLength
818   //    b. uint8_t  BinaryIdData
819   //    c. uint8_t  Padding (if necessary)
820   // Calculate size of binary section.
821   uint64_t BinaryIdsSectionSize = 0;
822 
823   // Remove duplicate binary ids.
824   llvm::sort(BinaryIds);
825   BinaryIds.erase(llvm::unique(BinaryIds), BinaryIds.end());
826 
827   for (const auto &BI : BinaryIds) {
828     // Increment by binary id length data type size.
829     BinaryIdsSectionSize += sizeof(uint64_t);
830     // Increment by binary id data length, aligned to 8 bytes.
831     BinaryIdsSectionSize += alignToPowerOf2(BI.size(), sizeof(uint64_t));
832   }
833   // Write binary ids section size.
834   OS.write(BinaryIdsSectionSize);
835 
836   for (const auto &BI : BinaryIds) {
837     uint64_t BILen = BI.size();
838     // Write binary id length.
839     OS.write(BILen);
840     // Write binary id data.
841     for (unsigned K = 0; K < BILen; K++)
842       OS.writeByte(BI[K]);
843     // Write padding if necessary.
844     uint64_t PaddingSize = alignToPowerOf2(BILen, sizeof(uint64_t)) - BILen;
845     for (unsigned K = 0; K < PaddingSize; K++)
846       OS.writeByte(0);
847   }
848 
849   return Error::success();
850 }
851 
852 Error InstrProfWriter::writeVTableNames(ProfOStream &OS) {
853   std::vector<std::string> VTableNameStrs;
854   for (StringRef VTableName : VTableNames.keys())
855     VTableNameStrs.push_back(VTableName.str());
856 
857   std::string CompressedVTableNames;
858   if (!VTableNameStrs.empty())
859     if (Error E = collectGlobalObjectNameStrings(
860             VTableNameStrs, compression::zlib::isAvailable(),
861             CompressedVTableNames))
862       return E;
863 
864   const uint64_t CompressedStringLen = CompressedVTableNames.length();
865 
866   // Record the length of compressed string.
867   OS.write(CompressedStringLen);
868 
869   // Write the chars in compressed strings.
870   for (auto &c : CompressedVTableNames)
871     OS.writeByte(static_cast<uint8_t>(c));
872 
873   // Pad up to a multiple of 8.
874   // InstrProfReader could read bytes according to 'CompressedStringLen'.
875   const uint64_t PaddedLength = alignTo(CompressedStringLen, 8);
876 
877   for (uint64_t K = CompressedStringLen; K < PaddedLength; K++)
878     OS.writeByte(0);
879 
880   return Error::success();
881 }
882 
883 Error InstrProfWriter::writeImpl(ProfOStream &OS) {
884   using namespace IndexedInstrProf;
885   using namespace support;
886 
887   OnDiskChainedHashTableGenerator<InstrProfRecordWriterTrait> Generator;
888 
889   InstrProfSummaryBuilder ISB(ProfileSummaryBuilder::DefaultCutoffs);
890   InfoObj->SummaryBuilder = &ISB;
891   InstrProfSummaryBuilder CSISB(ProfileSummaryBuilder::DefaultCutoffs);
892   InfoObj->CSSummaryBuilder = &CSISB;
893 
894   // Populate the hash table generator.
895   SmallVector<std::pair<StringRef, const ProfilingData *>> OrderedData;
896   for (const auto &I : FunctionData)
897     if (shouldEncodeData(I.getValue()))
898       OrderedData.emplace_back((I.getKey()), &I.getValue());
899   llvm::sort(OrderedData, less_first());
900   for (const auto &I : OrderedData)
901     Generator.insert(I.first, I.second);
902 
903   // Write the header.
904   IndexedInstrProf::Header Header;
905   Header.Version = WritePrevVersion
906                        ? IndexedInstrProf::ProfVersion::Version11
907                        : IndexedInstrProf::ProfVersion::CurrentVersion;
908   // The WritePrevVersion handling will either need to be removed or updated
909   // if the version is advanced beyond 12.
910   static_assert(IndexedInstrProf::ProfVersion::CurrentVersion ==
911                 IndexedInstrProf::ProfVersion::Version12);
912   if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
913     Header.Version |= VARIANT_MASK_IR_PROF;
914   if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
915     Header.Version |= VARIANT_MASK_CSIR_PROF;
916   if (static_cast<bool>(ProfileKind &
917                         InstrProfKind::FunctionEntryInstrumentation))
918     Header.Version |= VARIANT_MASK_INSTR_ENTRY;
919   if (static_cast<bool>(ProfileKind &
920                         InstrProfKind::LoopEntriesInstrumentation))
921     Header.Version |= VARIANT_MASK_INSTR_LOOP_ENTRIES;
922   if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
923     Header.Version |= VARIANT_MASK_BYTE_COVERAGE;
924   if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly))
925     Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY;
926   if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf))
927     Header.Version |= VARIANT_MASK_MEMPROF;
928   if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile))
929     Header.Version |= VARIANT_MASK_TEMPORAL_PROF;
930 
931   const uint64_t BackPatchStartOffset =
932       writeHeader(Header, WritePrevVersion, OS);
933 
934   // Reserve space to write profile summary data.
935   uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
936   uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
937   // Remember the summary offset.
938   uint64_t SummaryOffset = OS.tell();
939   for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
940     OS.write(0);
941   uint64_t CSSummaryOffset = 0;
942   uint64_t CSSummarySize = 0;
943   if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) {
944     CSSummaryOffset = OS.tell();
945     CSSummarySize = SummarySize / sizeof(uint64_t);
946     for (unsigned I = 0; I < CSSummarySize; I++)
947       OS.write(0);
948   }
949 
950   // Write the hash table.
951   uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj);
952 
953   // Write the MemProf profile data if we have it.
954   uint64_t MemProfSectionStart = 0;
955   if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
956     MemProfSectionStart = OS.tell();
957     if (auto E = writeMemProf(OS, MemProfData, MemProfVersionRequested,
958                               MemProfFullSchema))
959       return E;
960   }
961 
962   uint64_t BinaryIdSectionStart = OS.tell();
963   if (auto E = writeBinaryIds(OS))
964     return E;
965 
966   uint64_t VTableNamesSectionStart = OS.tell();
967 
968   if (!WritePrevVersion)
969     if (Error E = writeVTableNames(OS))
970       return E;
971 
972   uint64_t TemporalProfTracesSectionStart = 0;
973   if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) {
974     TemporalProfTracesSectionStart = OS.tell();
975     OS.write(TemporalProfTraces.size());
976     OS.write(TemporalProfTraceStreamSize);
977     for (auto &Trace : TemporalProfTraces) {
978       OS.write(Trace.Weight);
979       OS.write(Trace.FunctionNameRefs.size());
980       for (auto &NameRef : Trace.FunctionNameRefs)
981         OS.write(NameRef);
982     }
983   }
984 
985   // Allocate space for data to be serialized out.
986   std::unique_ptr<IndexedInstrProf::Summary> TheSummary =
987       IndexedInstrProf::allocSummary(SummarySize);
988   // Compute the Summary and copy the data to the data
989   // structure to be serialized out (to disk or buffer).
990   std::unique_ptr<ProfileSummary> PS = ISB.getSummary();
991   setSummary(TheSummary.get(), *PS);
992   InfoObj->SummaryBuilder = nullptr;
993 
994   // For Context Sensitive summary.
995   std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary = nullptr;
996   if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) {
997     TheCSSummary = IndexedInstrProf::allocSummary(SummarySize);
998     std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary();
999     setSummary(TheCSSummary.get(), *CSPS);
1000   }
1001   InfoObj->CSSummaryBuilder = nullptr;
1002 
1003   SmallVector<uint64_t, 8> HeaderOffsets = {HashTableStart, MemProfSectionStart,
1004                                             BinaryIdSectionStart,
1005                                             TemporalProfTracesSectionStart};
1006   if (!WritePrevVersion)
1007     HeaderOffsets.push_back(VTableNamesSectionStart);
1008 
1009   PatchItem PatchItems[] = {
1010       // Patch the Header fields
1011       {BackPatchStartOffset, HeaderOffsets},
1012       // Patch the summary data.
1013       {SummaryOffset,
1014        ArrayRef<uint64_t>(reinterpret_cast<uint64_t *>(TheSummary.get()),
1015                           SummarySize / sizeof(uint64_t))},
1016       {CSSummaryOffset,
1017        ArrayRef<uint64_t>(reinterpret_cast<uint64_t *>(TheCSSummary.get()),
1018                           CSSummarySize)}};
1019 
1020   OS.patch(PatchItems);
1021 
1022   for (const auto &I : FunctionData)
1023     for (const auto &F : I.getValue())
1024       if (Error E = validateRecord(F.second))
1025         return E;
1026 
1027   return Error::success();
1028 }
1029 
1030 Error InstrProfWriter::write(raw_fd_ostream &OS) {
1031   // Write the hash table.
1032   ProfOStream POS(OS);
1033   return writeImpl(POS);
1034 }
1035 
1036 Error InstrProfWriter::write(raw_string_ostream &OS) {
1037   ProfOStream POS(OS);
1038   return writeImpl(POS);
1039 }
1040 
1041 std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() {
1042   std::string Data;
1043   raw_string_ostream OS(Data);
1044   // Write the hash table.
1045   if (Error E = write(OS))
1046     return nullptr;
1047   // Return this in an aligned memory buffer.
1048   return MemoryBuffer::getMemBufferCopy(Data);
1049 }
1050 
1051 static const char *ValueProfKindStr[] = {
1052 #define VALUE_PROF_KIND(Enumerator, Value, Descr) #Enumerator,
1053 #include "llvm/ProfileData/InstrProfData.inc"
1054 };
1055 
1056 Error InstrProfWriter::validateRecord(const InstrProfRecord &Func) {
1057   for (uint32_t VK = 0; VK <= IPVK_Last; VK++) {
1058     if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget)
1059       continue;
1060     uint32_t NS = Func.getNumValueSites(VK);
1061     for (uint32_t S = 0; S < NS; S++) {
1062       DenseSet<uint64_t> SeenValues;
1063       for (const auto &V : Func.getValueArrayForSite(VK, S))
1064         if (!SeenValues.insert(V.Value).second)
1065           return make_error<InstrProfError>(instrprof_error::invalid_prof);
1066     }
1067   }
1068 
1069   return Error::success();
1070 }
1071 
1072 void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,
1073                                         const InstrProfRecord &Func,
1074                                         InstrProfSymtab &Symtab,
1075                                         raw_fd_ostream &OS) {
1076   OS << Name << "\n";
1077   OS << "# Func Hash:\n" << Hash << "\n";
1078   OS << "# Num Counters:\n" << Func.Counts.size() << "\n";
1079   OS << "# Counter Values:\n";
1080   for (uint64_t Count : Func.Counts)
1081     OS << Count << "\n";
1082 
1083   if (Func.BitmapBytes.size() > 0) {
1084     OS << "# Num Bitmap Bytes:\n$" << Func.BitmapBytes.size() << "\n";
1085     OS << "# Bitmap Byte Values:\n";
1086     for (uint8_t Byte : Func.BitmapBytes) {
1087       OS << "0x";
1088       OS.write_hex(Byte);
1089       OS << "\n";
1090     }
1091     OS << "\n";
1092   }
1093 
1094   uint32_t NumValueKinds = Func.getNumValueKinds();
1095   if (!NumValueKinds) {
1096     OS << "\n";
1097     return;
1098   }
1099 
1100   OS << "# Num Value Kinds:\n" << Func.getNumValueKinds() << "\n";
1101   for (uint32_t VK = 0; VK < IPVK_Last + 1; VK++) {
1102     uint32_t NS = Func.getNumValueSites(VK);
1103     if (!NS)
1104       continue;
1105     OS << "# ValueKind = " << ValueProfKindStr[VK] << ":\n" << VK << "\n";
1106     OS << "# NumValueSites:\n" << NS << "\n";
1107     for (uint32_t S = 0; S < NS; S++) {
1108       auto VD = Func.getValueArrayForSite(VK, S);
1109       OS << VD.size() << "\n";
1110       for (const auto &V : VD) {
1111         if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget)
1112           OS << Symtab.getFuncOrVarNameIfDefined(V.Value) << ":" << V.Count
1113              << "\n";
1114         else
1115           OS << V.Value << ":" << V.Count << "\n";
1116       }
1117     }
1118   }
1119 
1120   OS << "\n";
1121 }
1122 
1123 Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
1124   // Check CS first since it implies an IR level profile.
1125   if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
1126     OS << "# CSIR level Instrumentation Flag\n:csir\n";
1127   else if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
1128     OS << "# IR level Instrumentation Flag\n:ir\n";
1129 
1130   if (static_cast<bool>(ProfileKind &
1131                         InstrProfKind::FunctionEntryInstrumentation))
1132     OS << "# Always instrument the function entry block\n:entry_first\n";
1133   if (static_cast<bool>(ProfileKind &
1134                         InstrProfKind::LoopEntriesInstrumentation))
1135     OS << "# Always instrument the loop entry "
1136           "blocks\n:instrument_loop_entries\n";
1137   if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
1138     OS << "# Instrument block coverage\n:single_byte_coverage\n";
1139   InstrProfSymtab Symtab;
1140 
1141   using FuncPair = detail::DenseMapPair<uint64_t, InstrProfRecord>;
1142   using RecordType = std::pair<StringRef, FuncPair>;
1143   SmallVector<RecordType, 4> OrderedFuncData;
1144 
1145   for (const auto &I : FunctionData) {
1146     if (shouldEncodeData(I.getValue())) {
1147       if (Error E = Symtab.addFuncName(I.getKey()))
1148         return E;
1149       for (const auto &Func : I.getValue())
1150         OrderedFuncData.push_back(std::make_pair(I.getKey(), Func));
1151     }
1152   }
1153 
1154   for (const auto &VTableName : VTableNames)
1155     if (Error E = Symtab.addVTableName(VTableName.getKey()))
1156       return E;
1157 
1158   if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile))
1159     writeTextTemporalProfTraceData(OS, Symtab);
1160 
1161   llvm::sort(OrderedFuncData, [](const RecordType &A, const RecordType &B) {
1162     return std::tie(A.first, A.second.first) <
1163            std::tie(B.first, B.second.first);
1164   });
1165 
1166   for (const auto &record : OrderedFuncData) {
1167     const StringRef &Name = record.first;
1168     const FuncPair &Func = record.second;
1169     writeRecordInText(Name, Func.first, Func.second, Symtab, OS);
1170   }
1171 
1172   for (const auto &record : OrderedFuncData) {
1173     const FuncPair &Func = record.second;
1174     if (Error E = validateRecord(Func.second))
1175       return E;
1176   }
1177 
1178   return Error::success();
1179 }
1180 
1181 void InstrProfWriter::writeTextTemporalProfTraceData(raw_fd_ostream &OS,
1182                                                      InstrProfSymtab &Symtab) {
1183   OS << ":temporal_prof_traces\n";
1184   OS << "# Num Temporal Profile Traces:\n" << TemporalProfTraces.size() << "\n";
1185   OS << "# Temporal Profile Trace Stream Size:\n"
1186      << TemporalProfTraceStreamSize << "\n";
1187   for (auto &Trace : TemporalProfTraces) {
1188     OS << "# Weight:\n" << Trace.Weight << "\n";
1189     for (auto &NameRef : Trace.FunctionNameRefs)
1190       OS << Symtab.getFuncOrVarName(NameRef) << ",";
1191     OS << "\n";
1192   }
1193   OS << "\n";
1194 }
1195