1 //===- InstrProfWriter.cpp - Instrumented profiling writer ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for writing profiling data for clang's 10 // instrumentation based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ProfileData/InstrProfWriter.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/SetVector.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/IR/ProfileSummary.h" 19 #include "llvm/ProfileData/InstrProf.h" 20 #include "llvm/ProfileData/MemProf.h" 21 #include "llvm/ProfileData/ProfileCommon.h" 22 #include "llvm/Support/Compression.h" 23 #include "llvm/Support/Endian.h" 24 #include "llvm/Support/EndianStream.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Support/FormatVariadic.h" 27 #include "llvm/Support/MemoryBuffer.h" 28 #include "llvm/Support/OnDiskHashTable.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <cstdint> 31 #include <ctime> 32 #include <memory> 33 #include <string> 34 #include <tuple> 35 #include <utility> 36 #include <vector> 37 38 using namespace llvm; 39 40 // A struct to define how the data stream should be patched. For Indexed 41 // profiling, only uint64_t data type is needed. 42 struct PatchItem { 43 uint64_t Pos; // Where to patch. 44 ArrayRef<uint64_t> D; // An array of source data. 45 }; 46 47 namespace llvm { 48 49 // A wrapper class to abstract writer stream with support of bytes 50 // back patching. 51 class ProfOStream { 52 public: 53 ProfOStream(raw_fd_ostream &FD) 54 : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {} 55 ProfOStream(raw_string_ostream &STR) 56 : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {} 57 58 [[nodiscard]] uint64_t tell() const { return OS.tell(); } 59 void write(uint64_t V) { LE.write<uint64_t>(V); } 60 void write32(uint32_t V) { LE.write<uint32_t>(V); } 61 void writeByte(uint8_t V) { LE.write<uint8_t>(V); } 62 63 // \c patch can only be called when all data is written and flushed. 64 // For raw_string_ostream, the patch is done on the target string 65 // directly and it won't be reflected in the stream's internal buffer. 66 void patch(ArrayRef<PatchItem> P) { 67 using namespace support; 68 69 if (IsFDOStream) { 70 raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS); 71 const uint64_t LastPos = FDOStream.tell(); 72 for (const auto &K : P) { 73 FDOStream.seek(K.Pos); 74 for (uint64_t Elem : K.D) 75 write(Elem); 76 } 77 // Reset the stream to the last position after patching so that users 78 // don't accidentally overwrite data. This makes it consistent with 79 // the string stream below which replaces the data directly. 80 FDOStream.seek(LastPos); 81 } else { 82 raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS); 83 std::string &Data = SOStream.str(); // with flush 84 for (const auto &K : P) { 85 for (int I = 0, E = K.D.size(); I != E; I++) { 86 uint64_t Bytes = 87 endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]); 88 Data.replace(K.Pos + I * sizeof(uint64_t), sizeof(uint64_t), 89 (const char *)&Bytes, sizeof(uint64_t)); 90 } 91 } 92 } 93 } 94 95 // If \c OS is an instance of \c raw_fd_ostream, this field will be 96 // true. Otherwise, \c OS will be an raw_string_ostream. 97 bool IsFDOStream; 98 raw_ostream &OS; 99 support::endian::Writer LE; 100 }; 101 102 class InstrProfRecordWriterTrait { 103 public: 104 using key_type = StringRef; 105 using key_type_ref = StringRef; 106 107 using data_type = const InstrProfWriter::ProfilingData *const; 108 using data_type_ref = const InstrProfWriter::ProfilingData *const; 109 110 using hash_value_type = uint64_t; 111 using offset_type = uint64_t; 112 113 llvm::endianness ValueProfDataEndianness = llvm::endianness::little; 114 InstrProfSummaryBuilder *SummaryBuilder; 115 InstrProfSummaryBuilder *CSSummaryBuilder; 116 117 InstrProfRecordWriterTrait() = default; 118 119 static hash_value_type ComputeHash(key_type_ref K) { 120 return IndexedInstrProf::ComputeHash(K); 121 } 122 123 static std::pair<offset_type, offset_type> 124 EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { 125 using namespace support; 126 127 endian::Writer LE(Out, llvm::endianness::little); 128 129 offset_type N = K.size(); 130 LE.write<offset_type>(N); 131 132 offset_type M = 0; 133 for (const auto &ProfileData : *V) { 134 const InstrProfRecord &ProfRecord = ProfileData.second; 135 M += sizeof(uint64_t); // The function hash 136 M += sizeof(uint64_t); // The size of the Counts vector 137 M += ProfRecord.Counts.size() * sizeof(uint64_t); 138 M += sizeof(uint64_t); // The size of the Bitmap vector 139 M += ProfRecord.BitmapBytes.size() * sizeof(uint64_t); 140 141 // Value data 142 M += ValueProfData::getSize(ProfileData.second); 143 } 144 LE.write<offset_type>(M); 145 146 return std::make_pair(N, M); 147 } 148 149 void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N) { 150 Out.write(K.data(), N); 151 } 152 153 void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type) { 154 using namespace support; 155 156 endian::Writer LE(Out, llvm::endianness::little); 157 for (const auto &ProfileData : *V) { 158 const InstrProfRecord &ProfRecord = ProfileData.second; 159 if (NamedInstrProfRecord::hasCSFlagInHash(ProfileData.first)) 160 CSSummaryBuilder->addRecord(ProfRecord); 161 else 162 SummaryBuilder->addRecord(ProfRecord); 163 164 LE.write<uint64_t>(ProfileData.first); // Function hash 165 LE.write<uint64_t>(ProfRecord.Counts.size()); 166 for (uint64_t I : ProfRecord.Counts) 167 LE.write<uint64_t>(I); 168 169 LE.write<uint64_t>(ProfRecord.BitmapBytes.size()); 170 for (uint64_t I : ProfRecord.BitmapBytes) 171 LE.write<uint64_t>(I); 172 173 // Write value data 174 std::unique_ptr<ValueProfData> VDataPtr = 175 ValueProfData::serializeFrom(ProfileData.second); 176 uint32_t S = VDataPtr->getSize(); 177 VDataPtr->swapBytesFromHost(ValueProfDataEndianness); 178 Out.write((const char *)VDataPtr.get(), S); 179 } 180 } 181 }; 182 183 } // end namespace llvm 184 185 InstrProfWriter::InstrProfWriter( 186 bool Sparse, uint64_t TemporalProfTraceReservoirSize, 187 uint64_t MaxTemporalProfTraceLength, bool WritePrevVersion, 188 memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema, 189 bool MemprofGenerateRandomHotness, 190 unsigned MemprofGenerateRandomHotnessSeed) 191 : Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength), 192 TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize), 193 InfoObj(new InstrProfRecordWriterTrait()), 194 WritePrevVersion(WritePrevVersion), 195 MemProfVersionRequested(MemProfVersionRequested), 196 MemProfFullSchema(MemProfFullSchema), 197 MemprofGenerateRandomHotness(MemprofGenerateRandomHotness) { 198 // Set up the random number seed if requested. 199 if (MemprofGenerateRandomHotness) { 200 unsigned seed = MemprofGenerateRandomHotnessSeed 201 ? MemprofGenerateRandomHotnessSeed 202 : std::time(nullptr); 203 errs() << "random hotness seed = " << seed << "\n"; 204 std::srand(seed); 205 } 206 } 207 208 InstrProfWriter::~InstrProfWriter() { delete InfoObj; } 209 210 // Internal interface for testing purpose only. 211 void InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) { 212 InfoObj->ValueProfDataEndianness = Endianness; 213 } 214 215 void InstrProfWriter::setOutputSparse(bool Sparse) { 216 this->Sparse = Sparse; 217 } 218 219 void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight, 220 function_ref<void(Error)> Warn) { 221 auto Name = I.Name; 222 auto Hash = I.Hash; 223 addRecord(Name, Hash, std::move(I), Weight, Warn); 224 } 225 226 void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other, 227 OverlapStats &Overlap, 228 OverlapStats &FuncLevelOverlap, 229 const OverlapFuncFilters &FuncFilter) { 230 auto Name = Other.Name; 231 auto Hash = Other.Hash; 232 Other.accumulateCounts(FuncLevelOverlap.Test); 233 if (!FunctionData.contains(Name)) { 234 Overlap.addOneUnique(FuncLevelOverlap.Test); 235 return; 236 } 237 if (FuncLevelOverlap.Test.CountSum < 1.0f) { 238 Overlap.Overlap.NumEntries += 1; 239 return; 240 } 241 auto &ProfileDataMap = FunctionData[Name]; 242 bool NewFunc; 243 ProfilingData::iterator Where; 244 std::tie(Where, NewFunc) = 245 ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord())); 246 if (NewFunc) { 247 Overlap.addOneMismatch(FuncLevelOverlap.Test); 248 return; 249 } 250 InstrProfRecord &Dest = Where->second; 251 252 uint64_t ValueCutoff = FuncFilter.ValueCutoff; 253 if (!FuncFilter.NameFilter.empty() && Name.contains(FuncFilter.NameFilter)) 254 ValueCutoff = 0; 255 256 Dest.overlap(Other, Overlap, FuncLevelOverlap, ValueCutoff); 257 } 258 259 void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash, 260 InstrProfRecord &&I, uint64_t Weight, 261 function_ref<void(Error)> Warn) { 262 auto &ProfileDataMap = FunctionData[Name]; 263 264 bool NewFunc; 265 ProfilingData::iterator Where; 266 std::tie(Where, NewFunc) = 267 ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord())); 268 InstrProfRecord &Dest = Where->second; 269 270 auto MapWarn = [&](instrprof_error E) { 271 Warn(make_error<InstrProfError>(E)); 272 }; 273 274 if (NewFunc) { 275 // We've never seen a function with this name and hash, add it. 276 Dest = std::move(I); 277 if (Weight > 1) 278 Dest.scale(Weight, 1, MapWarn); 279 } else { 280 // We're updating a function we've seen before. 281 Dest.merge(I, Weight, MapWarn); 282 } 283 284 Dest.sortValueData(); 285 } 286 287 void InstrProfWriter::addMemProfRecord( 288 const Function::GUID Id, const memprof::IndexedMemProfRecord &Record) { 289 auto NewRecord = Record; 290 // Provoke random hotness values if requested. We specify the lifetime access 291 // density and lifetime length that will result in a cold or not cold hotness. 292 // See the logic in getAllocType() in Analysis/MemoryProfileInfo.cpp. 293 if (MemprofGenerateRandomHotness) { 294 for (auto &Alloc : NewRecord.AllocSites) { 295 // To get a not cold context, set the lifetime access density to the 296 // maximum value and the lifetime to 0. 297 uint64_t NewTLAD = std::numeric_limits<uint64_t>::max(); 298 uint64_t NewTL = 0; 299 bool IsCold = std::rand() % 2; 300 if (IsCold) { 301 // To get a cold context, set the lifetime access density to 0 and the 302 // lifetime to the maximum value. 303 NewTLAD = 0; 304 NewTL = std::numeric_limits<uint64_t>::max(); 305 } 306 Alloc.Info.setTotalLifetimeAccessDensity(NewTLAD); 307 Alloc.Info.setTotalLifetime(NewTL); 308 } 309 } 310 auto [Iter, Inserted] = MemProfData.Records.insert({Id, NewRecord}); 311 // If we inserted a new record then we are done. 312 if (Inserted) { 313 return; 314 } 315 memprof::IndexedMemProfRecord &Existing = Iter->second; 316 Existing.merge(NewRecord); 317 } 318 319 bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id, 320 const memprof::Frame &Frame, 321 function_ref<void(Error)> Warn) { 322 auto [Iter, Inserted] = MemProfData.Frames.insert({Id, Frame}); 323 // If a mapping already exists for the current frame id and it does not 324 // match the new mapping provided then reset the existing contents and bail 325 // out. We don't support the merging of memprof data whose Frame -> Id 326 // mapping across profiles is inconsistent. 327 if (!Inserted && Iter->second != Frame) { 328 Warn(make_error<InstrProfError>(instrprof_error::malformed, 329 "frame to id mapping mismatch")); 330 return false; 331 } 332 return true; 333 } 334 335 bool InstrProfWriter::addMemProfCallStack( 336 const memprof::CallStackId CSId, 337 const llvm::SmallVector<memprof::FrameId> &CallStack, 338 function_ref<void(Error)> Warn) { 339 auto [Iter, Inserted] = MemProfData.CallStacks.insert({CSId, CallStack}); 340 // If a mapping already exists for the current call stack id and it does not 341 // match the new mapping provided then reset the existing contents and bail 342 // out. We don't support the merging of memprof data whose CallStack -> Id 343 // mapping across profiles is inconsistent. 344 if (!Inserted && Iter->second != CallStack) { 345 Warn(make_error<InstrProfError>(instrprof_error::malformed, 346 "call stack to id mapping mismatch")); 347 return false; 348 } 349 return true; 350 } 351 352 bool InstrProfWriter::addMemProfData(memprof::IndexedMemProfData Incoming, 353 function_ref<void(Error)> Warn) { 354 // Return immediately if everything is empty. 355 if (Incoming.Frames.empty() && Incoming.CallStacks.empty() && 356 Incoming.Records.empty()) 357 return true; 358 359 // Otherwise, every component must be non-empty. 360 assert(!Incoming.Frames.empty() && !Incoming.CallStacks.empty() && 361 !Incoming.Records.empty()); 362 363 if (MemProfData.Frames.empty()) 364 MemProfData.Frames = std::move(Incoming.Frames); 365 else 366 for (const auto &[Id, F] : Incoming.Frames) 367 if (addMemProfFrame(Id, F, Warn)) 368 return false; 369 370 if (MemProfData.CallStacks.empty()) 371 MemProfData.CallStacks = std::move(Incoming.CallStacks); 372 else 373 for (const auto &[CSId, CS] : Incoming.CallStacks) 374 if (addMemProfCallStack(CSId, CS, Warn)) 375 return false; 376 377 // Add one record at a time if randomization is requested. 378 if (MemProfData.Records.empty() && !MemprofGenerateRandomHotness) 379 MemProfData.Records = std::move(Incoming.Records); 380 else 381 for (const auto &[GUID, Record] : Incoming.Records) 382 addMemProfRecord(GUID, Record); 383 384 return true; 385 } 386 387 void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) { 388 llvm::append_range(BinaryIds, BIs); 389 } 390 391 void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) { 392 assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength); 393 assert(!Trace.FunctionNameRefs.empty()); 394 if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) { 395 // Simply append the trace if we have not yet hit our reservoir size limit. 396 TemporalProfTraces.push_back(std::move(Trace)); 397 } else { 398 // Otherwise, replace a random trace in the stream. 399 std::uniform_int_distribution<uint64_t> Distribution( 400 0, TemporalProfTraceStreamSize); 401 uint64_t RandomIndex = Distribution(RNG); 402 if (RandomIndex < TemporalProfTraces.size()) 403 TemporalProfTraces[RandomIndex] = std::move(Trace); 404 } 405 ++TemporalProfTraceStreamSize; 406 } 407 408 void InstrProfWriter::addTemporalProfileTraces( 409 SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) { 410 for (auto &Trace : SrcTraces) 411 if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength) 412 Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength); 413 llvm::erase_if(SrcTraces, [](auto &T) { return T.FunctionNameRefs.empty(); }); 414 // Assume that the source has the same reservoir size as the destination to 415 // avoid needing to record it in the indexed profile format. 416 bool IsDestSampled = 417 (TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize); 418 bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize); 419 if (!IsDestSampled && IsSrcSampled) { 420 // If one of the traces are sampled, ensure that it belongs to Dest. 421 std::swap(TemporalProfTraces, SrcTraces); 422 std::swap(TemporalProfTraceStreamSize, SrcStreamSize); 423 std::swap(IsDestSampled, IsSrcSampled); 424 } 425 if (!IsSrcSampled) { 426 // If the source stream is not sampled, we add each source trace normally. 427 for (auto &Trace : SrcTraces) 428 addTemporalProfileTrace(std::move(Trace)); 429 return; 430 } 431 // Otherwise, we find the traces that would have been removed if we added 432 // the whole source stream. 433 SmallSetVector<uint64_t, 8> IndicesToReplace; 434 for (uint64_t I = 0; I < SrcStreamSize; I++) { 435 std::uniform_int_distribution<uint64_t> Distribution( 436 0, TemporalProfTraceStreamSize); 437 uint64_t RandomIndex = Distribution(RNG); 438 if (RandomIndex < TemporalProfTraces.size()) 439 IndicesToReplace.insert(RandomIndex); 440 ++TemporalProfTraceStreamSize; 441 } 442 // Then we insert a random sample of the source traces. 443 llvm::shuffle(SrcTraces.begin(), SrcTraces.end(), RNG); 444 for (const auto &[Index, Trace] : llvm::zip(IndicesToReplace, SrcTraces)) 445 TemporalProfTraces[Index] = std::move(Trace); 446 } 447 448 void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, 449 function_ref<void(Error)> Warn) { 450 for (auto &I : IPW.FunctionData) 451 for (auto &Func : I.getValue()) 452 addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn); 453 454 BinaryIds.reserve(BinaryIds.size() + IPW.BinaryIds.size()); 455 for (auto &I : IPW.BinaryIds) 456 addBinaryIds(I); 457 458 addTemporalProfileTraces(IPW.TemporalProfTraces, 459 IPW.TemporalProfTraceStreamSize); 460 461 MemProfData.Frames.reserve(IPW.MemProfData.Frames.size()); 462 for (auto &[FrameId, Frame] : IPW.MemProfData.Frames) { 463 // If we weren't able to add the frame mappings then it doesn't make sense 464 // to try to merge the records from this profile. 465 if (!addMemProfFrame(FrameId, Frame, Warn)) 466 return; 467 } 468 469 MemProfData.CallStacks.reserve(IPW.MemProfData.CallStacks.size()); 470 for (auto &[CSId, CallStack] : IPW.MemProfData.CallStacks) { 471 if (!addMemProfCallStack(CSId, CallStack, Warn)) 472 return; 473 } 474 475 MemProfData.Records.reserve(IPW.MemProfData.Records.size()); 476 for (auto &[GUID, Record] : IPW.MemProfData.Records) { 477 addMemProfRecord(GUID, Record); 478 } 479 } 480 481 bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { 482 if (!Sparse) 483 return true; 484 for (const auto &Func : PD) { 485 const InstrProfRecord &IPR = Func.second; 486 if (llvm::any_of(IPR.Counts, [](uint64_t Count) { return Count > 0; })) 487 return true; 488 if (llvm::any_of(IPR.BitmapBytes, [](uint8_t Byte) { return Byte > 0; })) 489 return true; 490 } 491 return false; 492 } 493 494 static void setSummary(IndexedInstrProf::Summary *TheSummary, 495 ProfileSummary &PS) { 496 using namespace IndexedInstrProf; 497 498 const std::vector<ProfileSummaryEntry> &Res = PS.getDetailedSummary(); 499 TheSummary->NumSummaryFields = Summary::NumKinds; 500 TheSummary->NumCutoffEntries = Res.size(); 501 TheSummary->set(Summary::MaxFunctionCount, PS.getMaxFunctionCount()); 502 TheSummary->set(Summary::MaxBlockCount, PS.getMaxCount()); 503 TheSummary->set(Summary::MaxInternalBlockCount, PS.getMaxInternalCount()); 504 TheSummary->set(Summary::TotalBlockCount, PS.getTotalCount()); 505 TheSummary->set(Summary::TotalNumBlocks, PS.getNumCounts()); 506 TheSummary->set(Summary::TotalNumFunctions, PS.getNumFunctions()); 507 for (unsigned I = 0; I < Res.size(); I++) 508 TheSummary->setEntry(I, Res[I]); 509 } 510 511 // Serialize Schema. 512 static void writeMemProfSchema(ProfOStream &OS, 513 const memprof::MemProfSchema &Schema) { 514 OS.write(static_cast<uint64_t>(Schema.size())); 515 for (const auto Id : Schema) 516 OS.write(static_cast<uint64_t>(Id)); 517 } 518 519 // Serialize MemProfRecordData. Return RecordTableOffset. 520 static uint64_t writeMemProfRecords( 521 ProfOStream &OS, 522 llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord> 523 &MemProfRecordData, 524 memprof::MemProfSchema *Schema, memprof::IndexedVersion Version, 525 llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> 526 *MemProfCallStackIndexes = nullptr) { 527 memprof::RecordWriterTrait RecordWriter(Schema, Version, 528 MemProfCallStackIndexes); 529 OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait> 530 RecordTableGenerator; 531 for (auto &[GUID, Record] : MemProfRecordData) { 532 // Insert the key (func hash) and value (memprof record). 533 RecordTableGenerator.insert(GUID, Record, RecordWriter); 534 } 535 // Release the memory of this MapVector as it is no longer needed. 536 MemProfRecordData.clear(); 537 538 // The call to Emit invokes RecordWriterTrait::EmitData which destructs 539 // the memprof record copies owned by the RecordTableGenerator. This works 540 // because the RecordTableGenerator is not used after this point. 541 return RecordTableGenerator.Emit(OS.OS, RecordWriter); 542 } 543 544 // Serialize MemProfFrameData. Return FrameTableOffset. 545 static uint64_t writeMemProfFrames( 546 ProfOStream &OS, 547 llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) { 548 OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait> 549 FrameTableGenerator; 550 for (auto &[FrameId, Frame] : MemProfFrameData) { 551 // Insert the key (frame id) and value (frame contents). 552 FrameTableGenerator.insert(FrameId, Frame); 553 } 554 // Release the memory of this MapVector as it is no longer needed. 555 MemProfFrameData.clear(); 556 557 return FrameTableGenerator.Emit(OS.OS); 558 } 559 560 // Serialize MemProfFrameData. Return the mapping from FrameIds to their 561 // indexes within the frame array. 562 static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> 563 writeMemProfFrameArray( 564 ProfOStream &OS, 565 llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData, 566 llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) { 567 // Mappings from FrameIds to array indexes. 568 llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes; 569 570 // Compute the order in which we serialize Frames. The order does not matter 571 // in terms of correctness, but we still compute it for deserialization 572 // performance. Specifically, if we serialize frequently used Frames one 573 // after another, we have better cache utilization. For two Frames that 574 // appear equally frequently, we break a tie by serializing the one that tends 575 // to appear earlier in call stacks. We implement the tie-breaking mechanism 576 // by computing the sum of indexes within call stacks for each Frame. If we 577 // still have a tie, then we just resort to compare two FrameIds, which is 578 // just for stability of output. 579 std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder; 580 FrameIdOrder.reserve(MemProfFrameData.size()); 581 for (const auto &[Id, Frame] : MemProfFrameData) 582 FrameIdOrder.emplace_back(Id, &Frame); 583 assert(MemProfFrameData.size() == FrameIdOrder.size()); 584 llvm::sort(FrameIdOrder, 585 [&](const std::pair<memprof::FrameId, const memprof::Frame *> &L, 586 const std::pair<memprof::FrameId, const memprof::Frame *> &R) { 587 const auto &SL = FrameHistogram[L.first]; 588 const auto &SR = FrameHistogram[R.first]; 589 // Popular FrameIds should come first. 590 if (SL.Count != SR.Count) 591 return SL.Count > SR.Count; 592 // If they are equally popular, then the one that tends to appear 593 // earlier in call stacks should come first. 594 if (SL.PositionSum != SR.PositionSum) 595 return SL.PositionSum < SR.PositionSum; 596 // Compare their FrameIds for sort stability. 597 return L.first < R.first; 598 }); 599 600 // Serialize all frames while creating mappings from linear IDs to FrameIds. 601 uint64_t Index = 0; 602 MemProfFrameIndexes.reserve(FrameIdOrder.size()); 603 for (const auto &[Id, F] : FrameIdOrder) { 604 F->serialize(OS.OS); 605 MemProfFrameIndexes.insert({Id, Index}); 606 ++Index; 607 } 608 assert(MemProfFrameData.size() == Index); 609 assert(MemProfFrameData.size() == MemProfFrameIndexes.size()); 610 611 // Release the memory of this MapVector as it is no longer needed. 612 MemProfFrameData.clear(); 613 614 return MemProfFrameIndexes; 615 } 616 617 static uint64_t writeMemProfCallStacks( 618 ProfOStream &OS, 619 llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> 620 &MemProfCallStackData) { 621 OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait> 622 CallStackTableGenerator; 623 for (auto &[CSId, CallStack] : MemProfCallStackData) 624 CallStackTableGenerator.insert(CSId, CallStack); 625 // Release the memory of this vector as it is no longer needed. 626 MemProfCallStackData.clear(); 627 628 return CallStackTableGenerator.Emit(OS.OS); 629 } 630 631 static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> 632 writeMemProfCallStackArray( 633 ProfOStream &OS, 634 llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> 635 &MemProfCallStackData, 636 llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> 637 &MemProfFrameIndexes, 638 llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram, 639 unsigned &NumElements) { 640 llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> 641 MemProfCallStackIndexes; 642 643 memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder; 644 Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes, 645 FrameHistogram); 646 for (auto I : Builder.getRadixArray()) 647 OS.write32(I); 648 NumElements = Builder.getRadixArray().size(); 649 MemProfCallStackIndexes = Builder.takeCallStackPos(); 650 651 // Release the memory of this vector as it is no longer needed. 652 MemProfCallStackData.clear(); 653 654 return MemProfCallStackIndexes; 655 } 656 657 // Write out MemProf Version2 as follows: 658 // uint64_t Version 659 // uint64_t RecordTableOffset = RecordTableGenerator.Emit 660 // uint64_t FramePayloadOffset = Offset for the frame payload 661 // uint64_t FrameTableOffset = FrameTableGenerator.Emit 662 // uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2) 663 // uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2) 664 // uint64_t Num schema entries 665 // uint64_t Schema entry 0 666 // uint64_t Schema entry 1 667 // .... 668 // uint64_t Schema entry N - 1 669 // OnDiskChainedHashTable MemProfRecordData 670 // OnDiskChainedHashTable MemProfFrameData 671 // OnDiskChainedHashTable MemProfCallStackData (NEW in V2) 672 static Error writeMemProfV2(ProfOStream &OS, 673 memprof::IndexedMemProfData &MemProfData, 674 bool MemProfFullSchema) { 675 OS.write(memprof::Version2); 676 uint64_t HeaderUpdatePos = OS.tell(); 677 OS.write(0ULL); // Reserve space for the memprof record table offset. 678 OS.write(0ULL); // Reserve space for the memprof frame payload offset. 679 OS.write(0ULL); // Reserve space for the memprof frame table offset. 680 OS.write(0ULL); // Reserve space for the memprof call stack payload offset. 681 OS.write(0ULL); // Reserve space for the memprof call stack table offset. 682 683 auto Schema = memprof::getHotColdSchema(); 684 if (MemProfFullSchema) 685 Schema = memprof::getFullSchema(); 686 writeMemProfSchema(OS, Schema); 687 688 uint64_t RecordTableOffset = 689 writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2); 690 691 uint64_t FramePayloadOffset = OS.tell(); 692 uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames); 693 694 uint64_t CallStackPayloadOffset = OS.tell(); 695 uint64_t CallStackTableOffset = 696 writeMemProfCallStacks(OS, MemProfData.CallStacks); 697 698 uint64_t Header[] = { 699 RecordTableOffset, FramePayloadOffset, FrameTableOffset, 700 CallStackPayloadOffset, CallStackTableOffset, 701 }; 702 OS.patch({{HeaderUpdatePos, Header}}); 703 704 return Error::success(); 705 } 706 707 // Write out MemProf Version3 as follows: 708 // uint64_t Version 709 // uint64_t CallStackPayloadOffset = Offset for the call stack payload 710 // uint64_t RecordPayloadOffset = Offset for the record payload 711 // uint64_t RecordTableOffset = RecordTableGenerator.Emit 712 // uint64_t Num schema entries 713 // uint64_t Schema entry 0 714 // uint64_t Schema entry 1 715 // .... 716 // uint64_t Schema entry N - 1 717 // Frames serialized one after another 718 // Call stacks encoded as a radix tree 719 // OnDiskChainedHashTable MemProfRecordData 720 static Error writeMemProfV3(ProfOStream &OS, 721 memprof::IndexedMemProfData &MemProfData, 722 bool MemProfFullSchema) { 723 OS.write(memprof::Version3); 724 uint64_t HeaderUpdatePos = OS.tell(); 725 OS.write(0ULL); // Reserve space for the memprof call stack payload offset. 726 OS.write(0ULL); // Reserve space for the memprof record payload offset. 727 OS.write(0ULL); // Reserve space for the memprof record table offset. 728 729 auto Schema = memprof::getHotColdSchema(); 730 if (MemProfFullSchema) 731 Schema = memprof::getFullSchema(); 732 writeMemProfSchema(OS, Schema); 733 734 llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram = 735 memprof::computeFrameHistogram(MemProfData.CallStacks); 736 assert(MemProfData.Frames.size() == FrameHistogram.size()); 737 738 llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes = 739 writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram); 740 741 uint64_t CallStackPayloadOffset = OS.tell(); 742 // The number of elements in the call stack array. 743 unsigned NumElements = 0; 744 llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> 745 MemProfCallStackIndexes = 746 writeMemProfCallStackArray(OS, MemProfData.CallStacks, 747 MemProfFrameIndexes, FrameHistogram, 748 NumElements); 749 750 uint64_t RecordPayloadOffset = OS.tell(); 751 uint64_t RecordTableOffset = 752 writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3, 753 &MemProfCallStackIndexes); 754 755 // IndexedMemProfReader::deserializeV3 computes the number of elements in the 756 // call stack array from the difference between CallStackPayloadOffset and 757 // RecordPayloadOffset. Verify that the computation works. 758 assert(CallStackPayloadOffset + 759 NumElements * sizeof(memprof::LinearFrameId) == 760 RecordPayloadOffset); 761 762 uint64_t Header[] = { 763 CallStackPayloadOffset, 764 RecordPayloadOffset, 765 RecordTableOffset, 766 }; 767 OS.patch({{HeaderUpdatePos, Header}}); 768 769 return Error::success(); 770 } 771 772 // Write out the MemProf data in a requested version. 773 static Error writeMemProf(ProfOStream &OS, 774 memprof::IndexedMemProfData &MemProfData, 775 memprof::IndexedVersion MemProfVersionRequested, 776 bool MemProfFullSchema) { 777 switch (MemProfVersionRequested) { 778 case memprof::Version2: 779 return writeMemProfV2(OS, MemProfData, MemProfFullSchema); 780 case memprof::Version3: 781 return writeMemProfV3(OS, MemProfData, MemProfFullSchema); 782 } 783 784 return make_error<InstrProfError>( 785 instrprof_error::unsupported_version, 786 formatv("MemProf version {} not supported; " 787 "requires version between {} and {}, inclusive", 788 MemProfVersionRequested, memprof::MinimumSupportedVersion, 789 memprof::MaximumSupportedVersion)); 790 } 791 792 uint64_t InstrProfWriter::writeHeader(const IndexedInstrProf::Header &Header, 793 const bool WritePrevVersion, 794 ProfOStream &OS) { 795 // Only write out the first four fields. 796 for (int I = 0; I < 4; I++) 797 OS.write(reinterpret_cast<const uint64_t *>(&Header)[I]); 798 799 // Remember the offset of the remaining fields to allow back patching later. 800 auto BackPatchStartOffset = OS.tell(); 801 802 // Reserve the space for back patching later. 803 OS.write(0); // HashOffset 804 OS.write(0); // MemProfOffset 805 OS.write(0); // BinaryIdOffset 806 OS.write(0); // TemporalProfTracesOffset 807 if (!WritePrevVersion) 808 OS.write(0); // VTableNamesOffset 809 810 return BackPatchStartOffset; 811 } 812 813 Error InstrProfWriter::writeBinaryIds(ProfOStream &OS) { 814 // BinaryIdSection has two parts: 815 // 1. uint64_t BinaryIdsSectionSize 816 // 2. list of binary ids that consist of: 817 // a. uint64_t BinaryIdLength 818 // b. uint8_t BinaryIdData 819 // c. uint8_t Padding (if necessary) 820 // Calculate size of binary section. 821 uint64_t BinaryIdsSectionSize = 0; 822 823 // Remove duplicate binary ids. 824 llvm::sort(BinaryIds); 825 BinaryIds.erase(llvm::unique(BinaryIds), BinaryIds.end()); 826 827 for (const auto &BI : BinaryIds) { 828 // Increment by binary id length data type size. 829 BinaryIdsSectionSize += sizeof(uint64_t); 830 // Increment by binary id data length, aligned to 8 bytes. 831 BinaryIdsSectionSize += alignToPowerOf2(BI.size(), sizeof(uint64_t)); 832 } 833 // Write binary ids section size. 834 OS.write(BinaryIdsSectionSize); 835 836 for (const auto &BI : BinaryIds) { 837 uint64_t BILen = BI.size(); 838 // Write binary id length. 839 OS.write(BILen); 840 // Write binary id data. 841 for (unsigned K = 0; K < BILen; K++) 842 OS.writeByte(BI[K]); 843 // Write padding if necessary. 844 uint64_t PaddingSize = alignToPowerOf2(BILen, sizeof(uint64_t)) - BILen; 845 for (unsigned K = 0; K < PaddingSize; K++) 846 OS.writeByte(0); 847 } 848 849 return Error::success(); 850 } 851 852 Error InstrProfWriter::writeVTableNames(ProfOStream &OS) { 853 std::vector<std::string> VTableNameStrs; 854 for (StringRef VTableName : VTableNames.keys()) 855 VTableNameStrs.push_back(VTableName.str()); 856 857 std::string CompressedVTableNames; 858 if (!VTableNameStrs.empty()) 859 if (Error E = collectGlobalObjectNameStrings( 860 VTableNameStrs, compression::zlib::isAvailable(), 861 CompressedVTableNames)) 862 return E; 863 864 const uint64_t CompressedStringLen = CompressedVTableNames.length(); 865 866 // Record the length of compressed string. 867 OS.write(CompressedStringLen); 868 869 // Write the chars in compressed strings. 870 for (auto &c : CompressedVTableNames) 871 OS.writeByte(static_cast<uint8_t>(c)); 872 873 // Pad up to a multiple of 8. 874 // InstrProfReader could read bytes according to 'CompressedStringLen'. 875 const uint64_t PaddedLength = alignTo(CompressedStringLen, 8); 876 877 for (uint64_t K = CompressedStringLen; K < PaddedLength; K++) 878 OS.writeByte(0); 879 880 return Error::success(); 881 } 882 883 Error InstrProfWriter::writeImpl(ProfOStream &OS) { 884 using namespace IndexedInstrProf; 885 using namespace support; 886 887 OnDiskChainedHashTableGenerator<InstrProfRecordWriterTrait> Generator; 888 889 InstrProfSummaryBuilder ISB(ProfileSummaryBuilder::DefaultCutoffs); 890 InfoObj->SummaryBuilder = &ISB; 891 InstrProfSummaryBuilder CSISB(ProfileSummaryBuilder::DefaultCutoffs); 892 InfoObj->CSSummaryBuilder = &CSISB; 893 894 // Populate the hash table generator. 895 SmallVector<std::pair<StringRef, const ProfilingData *>> OrderedData; 896 for (const auto &I : FunctionData) 897 if (shouldEncodeData(I.getValue())) 898 OrderedData.emplace_back((I.getKey()), &I.getValue()); 899 llvm::sort(OrderedData, less_first()); 900 for (const auto &I : OrderedData) 901 Generator.insert(I.first, I.second); 902 903 // Write the header. 904 IndexedInstrProf::Header Header; 905 Header.Version = WritePrevVersion 906 ? IndexedInstrProf::ProfVersion::Version11 907 : IndexedInstrProf::ProfVersion::CurrentVersion; 908 // The WritePrevVersion handling will either need to be removed or updated 909 // if the version is advanced beyond 12. 910 static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == 911 IndexedInstrProf::ProfVersion::Version12); 912 if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation)) 913 Header.Version |= VARIANT_MASK_IR_PROF; 914 if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) 915 Header.Version |= VARIANT_MASK_CSIR_PROF; 916 if (static_cast<bool>(ProfileKind & 917 InstrProfKind::FunctionEntryInstrumentation)) 918 Header.Version |= VARIANT_MASK_INSTR_ENTRY; 919 if (static_cast<bool>(ProfileKind & 920 InstrProfKind::LoopEntriesInstrumentation)) 921 Header.Version |= VARIANT_MASK_INSTR_LOOP_ENTRIES; 922 if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage)) 923 Header.Version |= VARIANT_MASK_BYTE_COVERAGE; 924 if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly)) 925 Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY; 926 if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) 927 Header.Version |= VARIANT_MASK_MEMPROF; 928 if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) 929 Header.Version |= VARIANT_MASK_TEMPORAL_PROF; 930 931 const uint64_t BackPatchStartOffset = 932 writeHeader(Header, WritePrevVersion, OS); 933 934 // Reserve space to write profile summary data. 935 uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size(); 936 uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries); 937 // Remember the summary offset. 938 uint64_t SummaryOffset = OS.tell(); 939 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) 940 OS.write(0); 941 uint64_t CSSummaryOffset = 0; 942 uint64_t CSSummarySize = 0; 943 if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) { 944 CSSummaryOffset = OS.tell(); 945 CSSummarySize = SummarySize / sizeof(uint64_t); 946 for (unsigned I = 0; I < CSSummarySize; I++) 947 OS.write(0); 948 } 949 950 // Write the hash table. 951 uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj); 952 953 // Write the MemProf profile data if we have it. 954 uint64_t MemProfSectionStart = 0; 955 if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) { 956 MemProfSectionStart = OS.tell(); 957 if (auto E = writeMemProf(OS, MemProfData, MemProfVersionRequested, 958 MemProfFullSchema)) 959 return E; 960 } 961 962 uint64_t BinaryIdSectionStart = OS.tell(); 963 if (auto E = writeBinaryIds(OS)) 964 return E; 965 966 uint64_t VTableNamesSectionStart = OS.tell(); 967 968 if (!WritePrevVersion) 969 if (Error E = writeVTableNames(OS)) 970 return E; 971 972 uint64_t TemporalProfTracesSectionStart = 0; 973 if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) { 974 TemporalProfTracesSectionStart = OS.tell(); 975 OS.write(TemporalProfTraces.size()); 976 OS.write(TemporalProfTraceStreamSize); 977 for (auto &Trace : TemporalProfTraces) { 978 OS.write(Trace.Weight); 979 OS.write(Trace.FunctionNameRefs.size()); 980 for (auto &NameRef : Trace.FunctionNameRefs) 981 OS.write(NameRef); 982 } 983 } 984 985 // Allocate space for data to be serialized out. 986 std::unique_ptr<IndexedInstrProf::Summary> TheSummary = 987 IndexedInstrProf::allocSummary(SummarySize); 988 // Compute the Summary and copy the data to the data 989 // structure to be serialized out (to disk or buffer). 990 std::unique_ptr<ProfileSummary> PS = ISB.getSummary(); 991 setSummary(TheSummary.get(), *PS); 992 InfoObj->SummaryBuilder = nullptr; 993 994 // For Context Sensitive summary. 995 std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary = nullptr; 996 if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) { 997 TheCSSummary = IndexedInstrProf::allocSummary(SummarySize); 998 std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary(); 999 setSummary(TheCSSummary.get(), *CSPS); 1000 } 1001 InfoObj->CSSummaryBuilder = nullptr; 1002 1003 SmallVector<uint64_t, 8> HeaderOffsets = {HashTableStart, MemProfSectionStart, 1004 BinaryIdSectionStart, 1005 TemporalProfTracesSectionStart}; 1006 if (!WritePrevVersion) 1007 HeaderOffsets.push_back(VTableNamesSectionStart); 1008 1009 PatchItem PatchItems[] = { 1010 // Patch the Header fields 1011 {BackPatchStartOffset, HeaderOffsets}, 1012 // Patch the summary data. 1013 {SummaryOffset, 1014 ArrayRef<uint64_t>(reinterpret_cast<uint64_t *>(TheSummary.get()), 1015 SummarySize / sizeof(uint64_t))}, 1016 {CSSummaryOffset, 1017 ArrayRef<uint64_t>(reinterpret_cast<uint64_t *>(TheCSSummary.get()), 1018 CSSummarySize)}}; 1019 1020 OS.patch(PatchItems); 1021 1022 for (const auto &I : FunctionData) 1023 for (const auto &F : I.getValue()) 1024 if (Error E = validateRecord(F.second)) 1025 return E; 1026 1027 return Error::success(); 1028 } 1029 1030 Error InstrProfWriter::write(raw_fd_ostream &OS) { 1031 // Write the hash table. 1032 ProfOStream POS(OS); 1033 return writeImpl(POS); 1034 } 1035 1036 Error InstrProfWriter::write(raw_string_ostream &OS) { 1037 ProfOStream POS(OS); 1038 return writeImpl(POS); 1039 } 1040 1041 std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() { 1042 std::string Data; 1043 raw_string_ostream OS(Data); 1044 // Write the hash table. 1045 if (Error E = write(OS)) 1046 return nullptr; 1047 // Return this in an aligned memory buffer. 1048 return MemoryBuffer::getMemBufferCopy(Data); 1049 } 1050 1051 static const char *ValueProfKindStr[] = { 1052 #define VALUE_PROF_KIND(Enumerator, Value, Descr) #Enumerator, 1053 #include "llvm/ProfileData/InstrProfData.inc" 1054 }; 1055 1056 Error InstrProfWriter::validateRecord(const InstrProfRecord &Func) { 1057 for (uint32_t VK = 0; VK <= IPVK_Last; VK++) { 1058 if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget) 1059 continue; 1060 uint32_t NS = Func.getNumValueSites(VK); 1061 for (uint32_t S = 0; S < NS; S++) { 1062 DenseSet<uint64_t> SeenValues; 1063 for (const auto &V : Func.getValueArrayForSite(VK, S)) 1064 if (!SeenValues.insert(V.Value).second) 1065 return make_error<InstrProfError>(instrprof_error::invalid_prof); 1066 } 1067 } 1068 1069 return Error::success(); 1070 } 1071 1072 void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash, 1073 const InstrProfRecord &Func, 1074 InstrProfSymtab &Symtab, 1075 raw_fd_ostream &OS) { 1076 OS << Name << "\n"; 1077 OS << "# Func Hash:\n" << Hash << "\n"; 1078 OS << "# Num Counters:\n" << Func.Counts.size() << "\n"; 1079 OS << "# Counter Values:\n"; 1080 for (uint64_t Count : Func.Counts) 1081 OS << Count << "\n"; 1082 1083 if (Func.BitmapBytes.size() > 0) { 1084 OS << "# Num Bitmap Bytes:\n$" << Func.BitmapBytes.size() << "\n"; 1085 OS << "# Bitmap Byte Values:\n"; 1086 for (uint8_t Byte : Func.BitmapBytes) { 1087 OS << "0x"; 1088 OS.write_hex(Byte); 1089 OS << "\n"; 1090 } 1091 OS << "\n"; 1092 } 1093 1094 uint32_t NumValueKinds = Func.getNumValueKinds(); 1095 if (!NumValueKinds) { 1096 OS << "\n"; 1097 return; 1098 } 1099 1100 OS << "# Num Value Kinds:\n" << Func.getNumValueKinds() << "\n"; 1101 for (uint32_t VK = 0; VK < IPVK_Last + 1; VK++) { 1102 uint32_t NS = Func.getNumValueSites(VK); 1103 if (!NS) 1104 continue; 1105 OS << "# ValueKind = " << ValueProfKindStr[VK] << ":\n" << VK << "\n"; 1106 OS << "# NumValueSites:\n" << NS << "\n"; 1107 for (uint32_t S = 0; S < NS; S++) { 1108 auto VD = Func.getValueArrayForSite(VK, S); 1109 OS << VD.size() << "\n"; 1110 for (const auto &V : VD) { 1111 if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget) 1112 OS << Symtab.getFuncOrVarNameIfDefined(V.Value) << ":" << V.Count 1113 << "\n"; 1114 else 1115 OS << V.Value << ":" << V.Count << "\n"; 1116 } 1117 } 1118 } 1119 1120 OS << "\n"; 1121 } 1122 1123 Error InstrProfWriter::writeText(raw_fd_ostream &OS) { 1124 // Check CS first since it implies an IR level profile. 1125 if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) 1126 OS << "# CSIR level Instrumentation Flag\n:csir\n"; 1127 else if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation)) 1128 OS << "# IR level Instrumentation Flag\n:ir\n"; 1129 1130 if (static_cast<bool>(ProfileKind & 1131 InstrProfKind::FunctionEntryInstrumentation)) 1132 OS << "# Always instrument the function entry block\n:entry_first\n"; 1133 if (static_cast<bool>(ProfileKind & 1134 InstrProfKind::LoopEntriesInstrumentation)) 1135 OS << "# Always instrument the loop entry " 1136 "blocks\n:instrument_loop_entries\n"; 1137 if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage)) 1138 OS << "# Instrument block coverage\n:single_byte_coverage\n"; 1139 InstrProfSymtab Symtab; 1140 1141 using FuncPair = detail::DenseMapPair<uint64_t, InstrProfRecord>; 1142 using RecordType = std::pair<StringRef, FuncPair>; 1143 SmallVector<RecordType, 4> OrderedFuncData; 1144 1145 for (const auto &I : FunctionData) { 1146 if (shouldEncodeData(I.getValue())) { 1147 if (Error E = Symtab.addFuncName(I.getKey())) 1148 return E; 1149 for (const auto &Func : I.getValue()) 1150 OrderedFuncData.push_back(std::make_pair(I.getKey(), Func)); 1151 } 1152 } 1153 1154 for (const auto &VTableName : VTableNames) 1155 if (Error E = Symtab.addVTableName(VTableName.getKey())) 1156 return E; 1157 1158 if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) 1159 writeTextTemporalProfTraceData(OS, Symtab); 1160 1161 llvm::sort(OrderedFuncData, [](const RecordType &A, const RecordType &B) { 1162 return std::tie(A.first, A.second.first) < 1163 std::tie(B.first, B.second.first); 1164 }); 1165 1166 for (const auto &record : OrderedFuncData) { 1167 const StringRef &Name = record.first; 1168 const FuncPair &Func = record.second; 1169 writeRecordInText(Name, Func.first, Func.second, Symtab, OS); 1170 } 1171 1172 for (const auto &record : OrderedFuncData) { 1173 const FuncPair &Func = record.second; 1174 if (Error E = validateRecord(Func.second)) 1175 return E; 1176 } 1177 1178 return Error::success(); 1179 } 1180 1181 void InstrProfWriter::writeTextTemporalProfTraceData(raw_fd_ostream &OS, 1182 InstrProfSymtab &Symtab) { 1183 OS << ":temporal_prof_traces\n"; 1184 OS << "# Num Temporal Profile Traces:\n" << TemporalProfTraces.size() << "\n"; 1185 OS << "# Temporal Profile Trace Stream Size:\n" 1186 << TemporalProfTraceStreamSize << "\n"; 1187 for (auto &Trace : TemporalProfTraces) { 1188 OS << "# Weight:\n" << Trace.Weight << "\n"; 1189 for (auto &NameRef : Trace.FunctionNameRefs) 1190 OS << Symtab.getFuncOrVarName(NameRef) << ","; 1191 OS << "\n"; 1192 } 1193 OS << "\n"; 1194 } 1195