Lines Matching +full:avg +full:- +full:samples
1 //===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // llvm-profdata merges .profdata files.
11 //===----------------------------------------------------------------------===//
51 // https://llvm.org/docs/CommandGuide/llvm-profdata.html has documentations
57 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-show");
63 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-order");
68 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-overlap");
73 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge");
93 cl::init("-"), cl::desc("Output file"),
100 cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
109 cl::opt<std::string> Filename(cl::Positional, cl::desc("<profdata-file>"),
113 "max-debug-info-correlation-warnings",
118 "profiled-binary", cl::init(""),
122 "debug-info", cl::init(""),
129 BinaryFilename("binary-file", cl::init(""),
142 // factor out the common cl::sub in cl::opt constructor for subcommand-specific
148 cl::list<std::string> WeightedInputFilenames("weighted-input",
160 "GCC encoding (only meaningful for -sample)")));
162 InputFilenamesFile("input-files", cl::init(""), cl::sub(MergeSubcommand),
163 cl::desc("Path to file containing newline-separated "
165 cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
168 "dump-input-file-list", cl::init(false), cl::Hidden,
171 cl::opt<std::string> RemappingFile("remapping-file", cl::value_desc("file"),
174 cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"),
177 UseMD5("use-md5", cl::init(false), cl::Hidden,
179 "meaningful for -extbinary)"),
182 "compress-all-sections", cl::init(false), cl::Hidden,
185 "meaningful for -extbinary)"));
187 "sample-merge-cold-context", cl::init(false), cl::Hidden,
192 "sample-trim-cold-context", cl::init(false), cl::Hidden,
197 "sample-frame-depth-for-cold-context", cl::init(1),
200 "context-less base profile"));
202 "output-size-limit", cl::init(0), cl::Hidden, cl::sub(MergeSubcommand),
207 "gen-partial-profile", cl::init(false), cl::Hidden,
209 cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
211 "supplement-instr-with-sample", cl::init(""), cl::Hidden,
216 "format (The flag only works with -instr)"));
218 "zero-counter-threshold", cl::init(0.7), cl::Hidden,
226 "suppl-min-size-threshold", cl::init(10), cl::Hidden,
232 "instr-prof-cold-threshold", cl::init(0), cl::Hidden,
240 "temporal-profile-trace-reservoir-size", cl::init(100),
245 "temporal-profile-max-trace-length", cl::init(10000),
250 "no-function", cl::init(""),
255 FailMode("failure-mode", cl::init(failIfAnyAreInvalid),
266 cl::desc("Generate a sparse profile (only meaningful for -instr)"));
268 "num-threads", cl::init(0), cl::sub(MergeSubcommand),
270 cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
274 "prof-sym-list", cl::init(""), cl::sub(MergeSubcommand),
279 "convert-sample-profile-layout",
289 "drop-profile-symbol-list", cl::init(false), cl::Hidden,
292 "(only meaningful for -sample)"));
295 "keep-vtable-symbols", cl::init(false), cl::Hidden,
302 // deployment of newer versions of llvm-profdata.
304 "write-prev-version", cl::init(false), cl::Hidden,
309 "memprof-version", cl::Hidden, cl::sub(MergeSubcommand),
318 "memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand),
330 "similarity-cutoff", cl::init(0),
342 "value-cutoff", cl::init(-1),
354 SFormat("show-format", cl::init(ShowFormat::Text),
361 // TODO: Consider replacing this with `--show-format=text-encoding`.
369 "(deprecated, please use --show-format=json)"),
372 "ic-targets", cl::init(false),
375 cl::opt<bool> ShowVTables("show-vtables", cl::init(false),
379 "memop-sizes", cl::init(false),
383 cl::opt<bool> ShowDetailedSummary("detailed-summary", cl::init(false),
387 cl::CommaSeparated, "detailed-summary-cutoffs",
392 ShowHotFuncList("hot-func-list", cl::init(false),
395 cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
412 "value-cutoff", cl::init(0),
417 "list-below-cutoff", cl::init(false),
422 "show-prof-sym-list", cl::init(false),
426 "show-sec-info-only", cl::init(false),
431 cl::opt<bool> ShowBinaryIds("binary-ids", cl::init(false),
435 "temporal-profile-traces",
444 cl::opt<bool> ShowProfileVersion("profile-version", cl::init(false),
450 NumTestTraces("num-test-traces", cl::init(0),
451 cl::desc("Keep aside the last <num-test-traces> traces in "
458 const std::string DuplicateNameStr = "----";
495 Hint = "Perhaps you forgot to use the --sample or --memory option?";
527 [&IPE](std::unique_ptr<InstrProfError> E) -> Error {
528 IPE = E->get();
568 Remapper->File = std::move(BufOrError.get());
570 for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#');
572 std::pair<StringRef, StringRef> Parts = LineIt->split(' ');
579 Remapper->RemappingTable.insert(Parts);
637 WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
647 WC->Writer.overlapRecord(std::move(I), Overlap, FuncOverlap, FuncFilter);
656 std::unique_lock<std::mutex> CtxGuard{WC->Lock};
672 if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
674 WC->Errors.emplace_back(
684 WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
689 const auto &IdToFrame = Reader->getFrameMapping();
691 bool Succeeded = WC->Writer.addMemProfFrame(
700 const auto &CSIdToCallStacks = Reader->getCallStacks();
702 bool Succeeded = WC->Writer.addMemProfCallStack(
710 const auto &FunctionProfileData = Reader->getProfileData();
713 WC->Writer.addMemProfRecord(GUID, Record);
719 // TODO: This only saves the first non-fatal error from InstrProfReader, and
721 // we have more non-fatal errors from InstrProfReader in the future. How
722 // should this interact with different -failure-mode?
739 WC->Errors.emplace_back(make_error<InstrProfError>(ErrCode, Msg),
745 if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
747 WC->Errors.emplace_back(
760 WC->Writer.addRecord(std::move(I), Input.Weight, [&](Error E) {
768 std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
769 bool firstTime = WC->WriterErrorCodes.insert(ErrCode).second;
776 const InstrProfSymtab &symtab = Reader->getSymtab();
780 WC->Writer.addVTableName(kv.getKey());
783 if (Reader->hasTemporalProfile()) {
784 auto &Traces = Reader->getTemporalProfTraces(Input.Weight);
786 WC->Writer.addTemporalProfileTraces(
787 Traces, Reader->getTemporalProfTraceStreamSize());
789 if (Reader->hasError()) {
790 if (Error E = Reader->getError()) {
791 WC->Errors.emplace_back(std::move(E), Filename);
797 if (Error E = Reader->readBinaryIds(BinaryIds)) {
798 WC->Errors.emplace_back(std::move(E), Filename);
801 WC->Writer.addBinaryIds(BinaryIds);
804 WC->Errors.emplace_back(std::move(ReaderWarning->first),
805 ReaderWarning->second);
811 for (auto &ErrorPair : Src->Errors)
812 Dst->Errors.push_back(std::move(ErrorPair));
813 Src->Errors.clear();
815 if (Error E = Dst->Writer.mergeProfileKind(Src->Writer.getProfileKind()))
818 Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer), [&](Error E) {
820 std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock};
821 bool firstTime = Dst->WriterErrorCodes.insert(ErrorCode).second;
845 llvm::Regex ProbablyMSVCMangledName("[?@$_0-9A-Za-z]+");
879 llvm::dbgs() << Count - ProfileMap.size() << " of " << Count << " functions "
898 exitWithError("cannot write a non-text format profile to the terminal");
917 // correlation modes(w/wo debug-info/object correlation).
919 exitWithError("Expected only one of -debug-info, -binary-file");
935 if (auto Err = Correlator->correlateProfileData(MaxDbgCorrelationWarnings))
942 // If NumThreads is not specified, auto-detect a good default.
981 Contexts[End - 1].get());
993 for (auto &ErrorPair : WC->Errors) {
1002 filterFunctions(Contexts[0]->Writer.getProfileData());
1004 writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer);
1019 uint64_t CntNum = Record->Counts.size();
1022 MaxCount = std::max(MaxCount, Record->Counts[I]);
1023 ZeroCntNum += !Record->Counts[I];
1030 /// -1 / -2 /in order to drop the profile or scale up the
1043 // counters to be -1 / -2 and PGO profile-use will drop the profile.
1044 // All counters being -1 also implies that the function is hot so
1045 // PGO profile-use will also set the entry count metadata to be
1047 // All counters being -2 implies that the function is warm so
1048 // PGO profile-use will also set the entry count metadata to be
1052 ProfRecord->setPseudoCount(Kind);
1069 ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) {
1082 "fs-discriminator-pass", cl::init(PassLast), cl::Hidden,
1088 clEnumVal(Pass2, "Use base and pass 1-2 discriminators"),
1089 clEnumVal(Pass3, "Use base and pass 1-3 discriminators"),
1109 for (const auto &PD : Reader->getProfiles()) {
1139 StringRef FName = Name.substr(0, PrefixPos - 1);
1151 // If instrumented profile uses -funique-internal-linkage-symbols,
1156 // If sample profile uses -funique-internal-linkage-symbols,
1175 // One caveat is the pre-inlined function -- their samples
1181 // name = FS->getName();
1182 // get samples for FS;
1232 auto &BuildImpl) -> void {
1245 It = InstrProfileMap.find(NewName->second.str());
1246 if (NewName->second != DuplicateNameStr) {
1247 NewRootName = &NewName->second;
1268 for (auto &PD : WC->Writer.getProfileData()) {
1280 InstrProfRecord *R = &PD.getValue().begin()->second;
1286 for (auto &PD : Reader->getProfiles()) {
1293 ProfileSummary SamplePS = Reader->getSummary();
1330 It = InstrProfileMap.find(NewName->second.str());
1331 if (NewName->second == DuplicateNameStr) {
1339 It->second.MaxCount > ColdInstrThreshold ||
1340 It->second.NumEdgeCounters < SupplMinSizeThreshold)
1343 updateInstrProfileEntry(It->second, SetToHot, HotInstrThreshold,
1364 if (OutputFilename == "-")
1381 if (std::error_code EC = Reader->read())
1390 if (WC->Errors.size() > 0)
1391 exitWithError(std::move(WC->Errors[0].first), InstrFilename);
1395 writeInstrProfile(OutputFilename, OutputFormat, WC->Writer);
1398 /// Make a copy of the given function samples with all symbol names remapped
1401 remapSamples(const sampleprof::FunctionSamples &Samples,
1404 Result.setFunction(Remapper(Samples.getFunction()));
1405 Result.addTotalSamples(Samples.getTotalSamples());
1406 Result.addHeadSamples(Samples.getHeadSamples());
1407 for (const auto &BodySample : Samples.getBodySamples()) {
1418 for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
1457 StringRef Data = Buffer->getBuffer();
1458 Data.split(SymbolVec, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1479 warn("-compress-all-section is ignored. Specify -extbinary to enable it");
1485 warn("-use-md5 is ignored. Specify -extbinary to enable it");
1491 warn("-gen-partial-profile is ignored. Specify -extbinary to enable it");
1523 if (std::error_code EC = Reader->read()) {
1529 SampleProfileMap &Profiles = Reader->getProfiles();
1533 "cannot merge probe-based profile with non-probe-based profile");
1536 exitWithError("cannot merge CS profile with non-CS profile");
1542 Remapper ? remapSamples(I->second, *Remapper, Result)
1544 FunctionSamples &Samples = Remapper ? Remapped : I->second;
1545 SampleContext FContext = Samples.getContext();
1547 ProfileMap[FContext].merge(Samples, Input.Weight));
1557 Reader->getProfileSymbolList();
1569 (Summary->getDetailedSummary()));
1603 Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit))
1623 if (Filename == "-") {
1643 if (llvm::sys::fs::is_regular_file(F->path())) {
1644 addWeightedInput(WNI, {F->path(), Weight});
1658 StringRef Data = Buffer->getBuffer();
1659 Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1686 exitWithError("no input files specified. See " + ProgName + " merge -help");
1701 "-supplement-instr-with-sample can only work with -instr. ");
1751 // Total samples of overlap units
1753 // Number of and total samples of units that only present in base or test
1759 // Number of units and total samples in base or test profile
1764 // Number of and total samples of units that present in at least one profile
1857 (SecondIter == SecondEnd || FirstIter->first < SecondIter->first))
1860 (FirstIter == FirstEnd || SecondIter->first < FirstIter->first))
1866 // Return the sum of line/block samples, the max line/block sample, and the
1867 // number of line/block samples above the given threshold in a function
1906 /// Detect 0-sample input profile and report to output stream. This interface
1910 /// Write out function-level similarity statistics for functions specified by
1911 /// options --function, --value-cutoff, and --similarity-cutoff.
1914 /// Write out program-level similarity and overlap statistics.
1917 /// Write out hot-function and hot-block statistics for base_profile,
1926 /// pair of matched functions, it aggregates the function-level
1927 /// similarity into a profile-level similarity. It also dump function-level
1928 /// similarity information of functions specified by --function,
1929 /// --value-cutoff, and --similarity-cutoff options. The program-level
1931 /// Given function-level similarity FS(A) for all function A, the
1934 /// sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
1935 /// meaning no-overlap.
1938 /// Initialize ProfOverlap with the sum of samples in base and test
1939 /// profiles. This function also computes and keeps the sum of samples and
1941 /// use to avoid re-computations.
1964 // Block samples above BaseHotThreshold or TestHotThreshold are considered hot
1973 // FuncFilter carries specifications in options --value-cutoff and
1974 // --function.
1976 // Column offsets for printing the function-level details table.
1987 /// function in base and test profiles. The line/block-similarity BS(i) is
1992 /// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
1993 /// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
2022 /// base and test profile. It also calculates a function-internal similarity
2024 /// For offsets i that have samples in at least one profile in this
2026 /// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
2033 /// Function-level similarity (FS) is a weighted value over function internal
2039 /// The function-level similarity FS(A) for a function A is computed as
2041 /// Compute a function-internal similarity FIS(A) by
2044 /// profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
2053 /// Profile-level similarity (PS) is a weighted aggregate over function-level
2084 return 1.0 - std::fabs(BaseFrac - TestFrac);
2149 1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap);
2181 // Accumulate Difference for regular line/block samples in the function.
2182 // We match them through sort-merge join algorithm because
2193 : BlockIterStep.getFirstIter()->second.getSamples();
2197 : BlockIterStep.getSecondIter()->second.getSamples();
2205 // them through sort-merge algorithm because
2223 for (const auto &F : Callsite->second)
2228 // matching all of them. This match is implemented through sort-merge
2232 CallsiteIterStep.getFirstIter()->second.cbegin(),
2233 CallsiteIterStep.getFirstIter()->second.cend(),
2234 CallsiteIterStep.getSecondIter()->second.cbegin(),
2235 CallsiteIterStep.getSecondIter()->second.cend());
2243 updateForUnmatchedCallee(Callee->second, FuncOverlap, Difference,
2248 Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap(
2249 CalleeIterStep.getFirstIter()->second,
2250 CalleeIterStep.getSecondIter()->second,
2259 // Difference reflects the total differences of line/block samples in this
2260 // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
2262 return (2.0 - Difference) / 2;
2273 "Total samples in base profile should be greater than 0");
2276 "Total samples in test profile should be greater than 0");
2278 double WeightDistance = std::fabs(BaseFrac - TestFrac);
2281 return FuncInternalSimilarity * (1 - WeightDistance);
2292 "Total samples in base profile should be greater than 0");
2295 "Total samples in test profile should be greater than 0");
2324 FuncInternalSimilarity = (std::fabs(FuncInternalSimilarity - 0) < Epsilon)
2339 const auto &BaseProfiles = BaseReader->getProfiles();
2345 const auto &TestProfiles = TestReader->getProfiles();
2374 // Two functions match with each other. Compute function-level overlap and
2375 // aggregate them into profile-level overlap.
2376 FuncOverlap.BaseName = Match->second->getContext();
2383 Match->second, &TestFunc.second, &FuncOverlap, FuncOverlap.BaseSample,
2391 // Accumulate the percentage of base unique and test unique samples into
2402 // Print function-level similarity information if specified by options.
2412 "Total samples in base profile should be greater than 0");
2416 "Total samples in test profile should be greater than 0");
2425 assert(BaseStats.count(F.second->getContext()) &&
2428 const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()];
2445 ProfOverlap.Similarity = (std::fabs(ProfOverlap.Similarity - 1) < Epsilon)
2453 const auto &BaseProf = BaseReader->getProfiles();
2462 const auto &TestProf = TestReader->getProfiles();
2482 FOS << "Function-level details:\n";
2495 FOS << "Base samples";
2497 FOS << "Test samples";
2546 "Total samples in two profile should be greater than 0");
2550 "Total samples in base profile should be greater than 0");
2554 "Total samples in test profile should be greater than 0");
2560 OS << " percentage of samples unique in base profile: "
2562 OS << " percentage of samples unique in test profile: "
2564 OS << " total samples in base profile: " << ProfOverlap.BaseSample << "\n"
2565 << " total samples in test profile: " << ProfOverlap.TestSample << "\n";
2584 OS << " Hot-function overlap: "
2590 << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n";
2592 << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n";
2596 OS << " Hot-block overlap: "
2602 << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n";
2604 << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n";
2625 if (std::error_code EC = BaseReader->read())
2627 if (std::error_code EC = TestReader->read())
2629 if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased())
2631 "cannot compare probe-based profile with non-probe-based profile");
2632 if (BaseReader->profileIsCS() != TestReader->profileIsCS())
2633 exitWithError("cannot compare CS profile with non-CS profile");
2635 // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
2637 ProfileSummary &BasePS = BaseReader->getSummary();
2638 ProfileSummary &TestPS = TestReader->getSummary();
2654 // report 2--3 places after decimal point in percentage numbers.
2714 Stats.ValueSitesHistogram[NV - 1]++;
2727 OS << Symtab->getFuncOrVarName(V.Value);
2764 bool IsIRInstr = Reader->isIRLevelProfile();
2767 int NumVPKind = IPVK_Last - IPVK_First + 1;
2785 // Add marker so that IR-level instrumentation round-trips properly.
2790 if (Reader->isIRLevelProfile()) {
2801 InstrProfSymtab &Symtab = Reader->getSymtab();
2901 &(Reader->getSymtab()));
2908 &(Reader->getSymtab()));
2918 if (Reader->hasError())
2919 exitWithError(Reader->getError(), Filename);
2924 bool IsIR = Reader->isIRLevelProfile();
2925 OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
2927 OS << " entry_first = " << Reader->instrEntryBBEnabled();
2931 OS << "Total functions: " << PS->getNumFunctions() << "\n";
2936 << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n";
2938 OS << "Maximum function count: " << PS->getMaxFunctionCount() << "\n";
2939 OS << "Maximum internal block count: " << PS->getMaxInternalCount() << "\n";
2970 OS << "Total number of blocks: " << PS->getNumCounts() << "\n";
2971 OS << "Total count: " << PS->getTotalCount() << "\n";
2972 PS->printDetailedSummary(OS);
2976 if (Error E = Reader->printBinaryIds(OS))
2980 OS << "Profile version: " << Reader->getVersion() << "\n";
2983 auto &Traces = Reader->getTemporalProfTraces();
2984 OS << "Temporal Profile Traces (samples=" << Traces.size()
2985 << " seen=" << Reader->getTemporalProfTraceStreamSize() << "):\n";
2990 OS << " " << Reader->getSymtab().getFuncOrVarName(NameRef) << "\n";
2999 if (!Reader->dumpSectionInfo(OS)) {
3000 WithColor::warning() << "-show-sec-info-only is only supported for "
3094 // The following loop also calculates the sum of total samples of all
3158 if (std::error_code EC = Reader->read())
3163 Reader->dumpJson(OS);
3165 Reader->dump(OS);
3173 FunctionSamples *FS = Reader->getSamplesFor(StringRef(FuncNameFilter));
3174 Reader->dumpFunctionProfile(FS ? *FS : FunctionSamples(), OS);
3179 Reader->getProfileSymbolList();
3180 ReaderList->dump(OS);
3184 auto &PS = Reader->getSummary();
3190 showHotFunctionList(Reader->getProfiles(), Reader->getSummary(),
3210 Reader->printYAML(OS);
3224 if (auto Err = Correlator->dumpYaml(MaxDbgCorrelationWarnings, OS))
3229 if (auto Err = Correlator->correlateProfileData(MaxDbgCorrelationWarnings))
3234 StringRef(Correlator->getNamesPointer(), Correlator->getNamesSize())))
3243 << Twine::utohexstr(Correlator->getCountersSectionSize()) << " bytes\n";
3244 OS << "Found " << Correlator->getDataSize() << " functions\n";
3252 "the positional argument '<profdata-file>' is required unless '--" +
3270 WithColor::warning() << "-function argument ignored: showing all functions\n";
3297 ArrayRef Traces = Reader->getTemporalProfTraces();
3300 "--" + NumTestTraces.ArgStr +
3326 TouchedPages.insert(It->getSecond());
3333 OS << "# Warning: Mach-O may prefix symbols with \"_\" depending on the "
3335 "post-processing may be required before passing to the linker via "
3336 "-order_file.\n";
3339 getParsedIRPGOName(Reader->getSymtab().getFuncOrVarName(N.Id));
3355 << ": No subcommand specified! Run llvm-profata --help for usage.\n";
3374 << ": Unknown command. Run llvm-profdata --help for usage.\n";