xref: /llvm-project/llvm/tools/llvm-profdata/llvm-profdata.cpp (revision f97c610d1f824bcd3e078560c836aaaffaaf69b0)
1 //===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // llvm-profdata merges .profdata files.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/SmallSet.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/Debuginfod/HTTPClient.h"
17 #include "llvm/IR/LLVMContext.h"
18 #include "llvm/Object/Binary.h"
19 #include "llvm/ProfileData/InstrProfCorrelator.h"
20 #include "llvm/ProfileData/InstrProfReader.h"
21 #include "llvm/ProfileData/InstrProfWriter.h"
22 #include "llvm/ProfileData/MemProf.h"
23 #include "llvm/ProfileData/MemProfReader.h"
24 #include "llvm/ProfileData/ProfileCommon.h"
25 #include "llvm/ProfileData/SampleProfReader.h"
26 #include "llvm/ProfileData/SampleProfWriter.h"
27 #include "llvm/Support/BalancedPartitioning.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Support/Discriminator.h"
30 #include "llvm/Support/Errc.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/Support/Format.h"
33 #include "llvm/Support/FormattedStream.h"
34 #include "llvm/Support/LLVMDriver.h"
35 #include "llvm/Support/MD5.h"
36 #include "llvm/Support/MemoryBuffer.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/Regex.h"
39 #include "llvm/Support/ThreadPool.h"
40 #include "llvm/Support/Threading.h"
41 #include "llvm/Support/VirtualFileSystem.h"
42 #include "llvm/Support/WithColor.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include <algorithm>
45 #include <cmath>
46 #include <optional>
47 #include <queue>
48 
49 using namespace llvm;
50 using ProfCorrelatorKind = InstrProfCorrelator::ProfCorrelatorKind;
51 
52 // https://llvm.org/docs/CommandGuide/llvm-profdata.html has documentations
53 // on each subcommand.
54 cl::SubCommand ShowSubcommand(
55     "show",
56     "Takes a profile data file and displays the profiles. See detailed "
57     "documentation in "
58     "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-show");
59 cl::SubCommand OrderSubcommand(
60     "order",
61     "Reads temporal profiling traces from a profile and outputs a function "
62     "order that reduces the number of page faults for those traces. See "
63     "detailed documentation in "
64     "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-order");
65 cl::SubCommand OverlapSubcommand(
66     "overlap",
67     "Computes and displays the overlap between two profiles. See detailed "
68     "documentation in "
69     "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-overlap");
70 cl::SubCommand MergeSubcommand(
71     "merge",
72     "Takes several profiles and merge them together. See detailed "
73     "documentation in "
74     "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge");
75 
76 namespace {
77 enum ProfileKinds { instr, sample, memory };
78 enum FailureMode { warnOnly, failIfAnyAreInvalid, failIfAllAreInvalid };
79 
80 enum ProfileFormat {
81   PF_None = 0,
82   PF_Text,
83   PF_Compact_Binary, // Deprecated
84   PF_Ext_Binary,
85   PF_GCC,
86   PF_Binary
87 };
88 
89 enum class ShowFormat { Text, Json, Yaml };
90 } // namespace
91 
92 // Common options.
93 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
94                                     cl::init("-"), cl::desc("Output file"),
95                                     cl::sub(ShowSubcommand),
96                                     cl::sub(OrderSubcommand),
97                                     cl::sub(OverlapSubcommand),
98                                     cl::sub(MergeSubcommand));
99 // NOTE: cl::alias must not have cl::sub(), since aliased option's cl::sub()
100 // will be used. llvm::cl::alias::done() method asserts this condition.
101 cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
102                           cl::aliasopt(OutputFilename));
103 
104 // Options common to at least two commands.
105 cl::opt<ProfileKinds> ProfileKind(
106     cl::desc("Profile kind:"), cl::sub(MergeSubcommand),
107     cl::sub(OverlapSubcommand), cl::init(instr),
108     cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
109                clEnumVal(sample, "Sample profile")));
110 cl::opt<std::string> Filename(cl::Positional, cl::desc("<profdata-file>"),
111                               cl::sub(ShowSubcommand),
112                               cl::sub(OrderSubcommand));
113 cl::opt<unsigned> MaxDbgCorrelationWarnings(
114     "max-debug-info-correlation-warnings",
115     cl::desc("The maximum number of warnings to emit when correlating "
116              "profile from debug info (0 = no limit)"),
117     cl::sub(MergeSubcommand), cl::sub(ShowSubcommand), cl::init(5));
118 cl::opt<std::string> ProfiledBinary(
119     "profiled-binary", cl::init(""),
120     cl::desc("Path to binary from which the profile was collected."),
121     cl::sub(ShowSubcommand), cl::sub(MergeSubcommand));
122 cl::opt<std::string> DebugInfoFilename(
123     "debug-info", cl::init(""),
124     cl::desc(
125         "For show, read and extract profile metadata from debug info and show "
126         "the functions it found. For merge, use the provided debug info to "
127         "correlate the raw profile."),
128     cl::sub(ShowSubcommand), cl::sub(MergeSubcommand));
129 cl::opt<std::string>
130     BinaryFilename("binary-file", cl::init(""),
131                    cl::desc("For merge, use the provided unstripped bianry to "
132                             "correlate the raw profile."),
133                    cl::sub(MergeSubcommand));
134 cl::list<std::string> DebugFileDirectory(
135     "debug-file-directory",
136     cl::desc("Directories to search for object files by build ID"));
137 cl::opt<bool> DebugInfod("debuginfod", cl::init(false), cl::Hidden,
138                          cl::sub(MergeSubcommand),
139                          cl::desc("Enable debuginfod"));
140 cl::opt<ProfCorrelatorKind> BIDFetcherProfileCorrelate(
141     "correlate",
142     cl::desc("Use debug-info or binary correlation to correlate profiles with "
143              "build id fetcher"),
144     cl::init(InstrProfCorrelator::NONE),
145     cl::values(clEnumValN(InstrProfCorrelator::NONE, "",
146                           "No profile correlation"),
147                clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info",
148                           "Use debug info to correlate"),
149                clEnumValN(InstrProfCorrelator::BINARY, "binary",
150                           "Use binary to correlate")));
151 cl::opt<std::string> FuncNameFilter(
152     "function",
153     cl::desc("Only functions matching the filter are shown in the output. For "
154              "overlapping CSSPGO, this takes a function name with calling "
155              "context."),
156     cl::sub(ShowSubcommand), cl::sub(OverlapSubcommand),
157     cl::sub(MergeSubcommand));
158 
159 // TODO: Consider creating a template class (e.g., MergeOption, ShowOption) to
160 // factor out the common cl::sub in cl::opt constructor for subcommand-specific
161 // options.
162 
163 // Options specific to merge subcommand.
164 cl::list<std::string> InputFilenames(cl::Positional, cl::sub(MergeSubcommand),
165                                      cl::desc("<filename...>"));
166 cl::list<std::string> WeightedInputFilenames("weighted-input",
167                                              cl::sub(MergeSubcommand),
168                                              cl::desc("<weight>,<filename>"));
169 cl::opt<ProfileFormat> OutputFormat(
170     cl::desc("Format of output profile"), cl::sub(MergeSubcommand),
171     cl::init(PF_Ext_Binary),
172     cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding"),
173                clEnumValN(PF_Ext_Binary, "extbinary",
174                           "Extensible binary encoding "
175                           "(default)"),
176                clEnumValN(PF_Text, "text", "Text encoding"),
177                clEnumValN(PF_GCC, "gcc",
178                           "GCC encoding (only meaningful for -sample)")));
179 cl::opt<std::string>
180     InputFilenamesFile("input-files", cl::init(""), cl::sub(MergeSubcommand),
181                        cl::desc("Path to file containing newline-separated "
182                                 "[<weight>,]<filename> entries"));
183 cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
184                               cl::aliasopt(InputFilenamesFile));
185 cl::opt<bool> DumpInputFileList(
186     "dump-input-file-list", cl::init(false), cl::Hidden,
187     cl::sub(MergeSubcommand),
188     cl::desc("Dump the list of input files and their weights, then exit"));
189 cl::opt<std::string> RemappingFile("remapping-file", cl::value_desc("file"),
190                                    cl::sub(MergeSubcommand),
191                                    cl::desc("Symbol remapping file"));
192 cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"),
193                          cl::aliasopt(RemappingFile));
194 cl::opt<bool>
195     UseMD5("use-md5", cl::init(false), cl::Hidden,
196            cl::desc("Choose to use MD5 to represent string in name table (only "
197                     "meaningful for -extbinary)"),
198            cl::sub(MergeSubcommand));
199 cl::opt<bool> CompressAllSections(
200     "compress-all-sections", cl::init(false), cl::Hidden,
201     cl::sub(MergeSubcommand),
202     cl::desc("Compress all sections when writing the profile (only "
203              "meaningful for -extbinary)"));
204 cl::opt<bool> SampleMergeColdContext(
205     "sample-merge-cold-context", cl::init(false), cl::Hidden,
206     cl::sub(MergeSubcommand),
207     cl::desc(
208         "Merge context sample profiles whose count is below cold threshold"));
209 cl::opt<bool> SampleTrimColdContext(
210     "sample-trim-cold-context", cl::init(false), cl::Hidden,
211     cl::sub(MergeSubcommand),
212     cl::desc(
213         "Trim context sample profiles whose count is below cold threshold"));
214 cl::opt<uint32_t> SampleColdContextFrameDepth(
215     "sample-frame-depth-for-cold-context", cl::init(1),
216     cl::sub(MergeSubcommand),
217     cl::desc("Keep the last K frames while merging cold profile. 1 means the "
218              "context-less base profile"));
219 cl::opt<size_t> OutputSizeLimit(
220     "output-size-limit", cl::init(0), cl::Hidden, cl::sub(MergeSubcommand),
221     cl::desc("Trim cold functions until profile size is below specified "
222              "limit in bytes. This uses a heursitic and functions may be "
223              "excessively trimmed"));
224 cl::opt<bool> GenPartialProfile(
225     "gen-partial-profile", cl::init(false), cl::Hidden,
226     cl::sub(MergeSubcommand),
227     cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
228 cl::opt<bool> SplitLayout(
229     "split-layout", cl::init(false), cl::Hidden,
230     cl::sub(MergeSubcommand),
231     cl::desc("Split the profile to two sections with one containing sample "
232              "profiles with inlined functions and the other without (only "
233              "meaningful for -extbinary)"));
234 cl::opt<std::string> SupplInstrWithSample(
235     "supplement-instr-with-sample", cl::init(""), cl::Hidden,
236     cl::sub(MergeSubcommand),
237     cl::desc("Supplement an instr profile with sample profile, to correct "
238              "the profile unrepresentativeness issue. The sample "
239              "profile is the input of the flag. Output will be in instr "
240              "format (The flag only works with -instr)"));
241 cl::opt<float> ZeroCounterThreshold(
242     "zero-counter-threshold", cl::init(0.7), cl::Hidden,
243     cl::sub(MergeSubcommand),
244     cl::desc("For the function which is cold in instr profile but hot in "
245              "sample profile, if the ratio of the number of zero counters "
246              "divided by the total number of counters is above the "
247              "threshold, the profile of the function will be regarded as "
248              "being harmful for performance and will be dropped."));
249 cl::opt<unsigned> SupplMinSizeThreshold(
250     "suppl-min-size-threshold", cl::init(10), cl::Hidden,
251     cl::sub(MergeSubcommand),
252     cl::desc("If the size of a function is smaller than the threshold, "
253              "assume it can be inlined by PGO early inliner and it won't "
254              "be adjusted based on sample profile."));
255 cl::opt<unsigned> InstrProfColdThreshold(
256     "instr-prof-cold-threshold", cl::init(0), cl::Hidden,
257     cl::sub(MergeSubcommand),
258     cl::desc("User specified cold threshold for instr profile which will "
259              "override the cold threshold got from profile summary. "));
260 // WARNING: This reservoir size value is propagated to any input indexed
261 // profiles for simplicity. Changing this value between invocations could
262 // result in sample bias.
263 cl::opt<uint64_t> TemporalProfTraceReservoirSize(
264     "temporal-profile-trace-reservoir-size", cl::init(100),
265     cl::sub(MergeSubcommand),
266     cl::desc("The maximum number of stored temporal profile traces (default: "
267              "100)"));
268 cl::opt<uint64_t> TemporalProfMaxTraceLength(
269     "temporal-profile-max-trace-length", cl::init(10000),
270     cl::sub(MergeSubcommand),
271     cl::desc("The maximum length of a single temporal profile trace "
272              "(default: 10000)"));
273 cl::opt<std::string> FuncNameNegativeFilter(
274     "no-function", cl::init(""),
275     cl::sub(MergeSubcommand),
276     cl::desc("Exclude functions matching the filter from the output."));
277 
278 cl::opt<FailureMode>
279     FailMode("failure-mode", cl::init(failIfAnyAreInvalid),
280              cl::desc("Failure mode:"), cl::sub(MergeSubcommand),
281              cl::values(clEnumValN(warnOnly, "warn",
282                                    "Do not fail and just print warnings."),
283                         clEnumValN(failIfAnyAreInvalid, "any",
284                                    "Fail if any profile is invalid."),
285                         clEnumValN(failIfAllAreInvalid, "all",
286                                    "Fail only if all profiles are invalid.")));
287 
288 cl::opt<bool> OutputSparse(
289     "sparse", cl::init(false), cl::sub(MergeSubcommand),
290     cl::desc("Generate a sparse profile (only meaningful for -instr)"));
291 cl::opt<unsigned> NumThreads(
292     "num-threads", cl::init(0), cl::sub(MergeSubcommand),
293     cl::desc("Number of merge threads to use (default: autodetect)"));
294 cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
295                       cl::aliasopt(NumThreads));
296 
297 cl::opt<std::string> ProfileSymbolListFile(
298     "prof-sym-list", cl::init(""), cl::sub(MergeSubcommand),
299     cl::desc("Path to file containing the list of function symbols "
300              "used to populate profile symbol list"));
301 
302 cl::opt<SampleProfileLayout> ProfileLayout(
303     "convert-sample-profile-layout",
304     cl::desc("Convert the generated profile to a profile with a new layout"),
305     cl::sub(MergeSubcommand), cl::init(SPL_None),
306     cl::values(
307         clEnumValN(SPL_Nest, "nest",
308                    "Nested profile, the input should be CS flat profile"),
309         clEnumValN(SPL_Flat, "flat",
310                    "Profile with nested inlinee flatten out")));
311 
312 cl::opt<bool> DropProfileSymbolList(
313     "drop-profile-symbol-list", cl::init(false), cl::Hidden,
314     cl::sub(MergeSubcommand),
315     cl::desc("Drop the profile symbol list when merging AutoFDO profiles "
316              "(only meaningful for -sample)"));
317 
318 cl::opt<bool> KeepVTableSymbols(
319     "keep-vtable-symbols", cl::init(false), cl::Hidden,
320     cl::sub(MergeSubcommand),
321     cl::desc("If true, keep the vtable symbols in indexed profiles"));
322 
323 // Temporary support for writing the previous version of the format, to enable
324 // some forward compatibility.
325 // TODO: Consider enabling this with future version changes as well, to ease
326 // deployment of newer versions of llvm-profdata.
327 cl::opt<bool> DoWritePrevVersion(
328     "write-prev-version", cl::init(false), cl::Hidden,
329     cl::desc("Write the previous version of indexed format, to enable "
330              "some forward compatibility."));
331 
332 cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
333     "memprof-version", cl::Hidden, cl::sub(MergeSubcommand),
334     cl::desc("Specify the version of the memprof format to use"),
335     cl::init(memprof::Version3),
336     cl::values(clEnumValN(memprof::Version1, "1", "version 1"),
337                clEnumValN(memprof::Version2, "2", "version 2"),
338                clEnumValN(memprof::Version3, "3", "version 3")));
339 
340 cl::opt<bool> MemProfFullSchema(
341     "memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand),
342     cl::desc("Use the full schema for serialization"), cl::init(false));
343 
344 static cl::opt<bool>
345     MemprofGenerateRandomHotness("memprof-random-hotness", cl::init(false),
346                                  cl::Hidden, cl::sub(MergeSubcommand),
347                                  cl::desc("Generate random hotness values"));
348 static cl::opt<unsigned> MemprofGenerateRandomHotnessSeed(
349     "memprof-random-hotness-seed", cl::init(0), cl::Hidden,
350     cl::sub(MergeSubcommand),
351     cl::desc("Random hotness seed to use (0 to generate new seed)"));
352 
353 // Options specific to overlap subcommand.
354 cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
355                                   cl::desc("<base profile file>"),
356                                   cl::sub(OverlapSubcommand));
357 cl::opt<std::string> TestFilename(cl::Positional, cl::Required,
358                                   cl::desc("<test profile file>"),
359                                   cl::sub(OverlapSubcommand));
360 
361 cl::opt<unsigned long long> SimilarityCutoff(
362     "similarity-cutoff", cl::init(0),
363     cl::desc("For sample profiles, list function names (with calling context "
364              "for csspgo) for overlapped functions "
365              "with similarities below the cutoff (percentage times 10000)."),
366     cl::sub(OverlapSubcommand));
367 
368 cl::opt<bool> IsCS(
369     "cs", cl::init(false),
370     cl::desc("For context sensitive PGO counts. Does not work with CSSPGO."),
371     cl::sub(OverlapSubcommand));
372 
373 cl::opt<unsigned long long> OverlapValueCutoff(
374     "value-cutoff", cl::init(-1),
375     cl::desc(
376         "Function level overlap information for every function (with calling "
377         "context for csspgo) in test "
378         "profile with max count value greater than the parameter value"),
379     cl::sub(OverlapSubcommand));
380 
381 // Options specific to show subcommand.
382 cl::opt<bool> ShowCounts("counts", cl::init(false),
383                          cl::desc("Show counter values for shown functions"),
384                          cl::sub(ShowSubcommand));
385 cl::opt<ShowFormat>
386     SFormat("show-format", cl::init(ShowFormat::Text),
387             cl::desc("Emit output in the selected format if supported"),
388             cl::sub(ShowSubcommand),
389             cl::values(clEnumValN(ShowFormat::Text, "text",
390                                   "emit normal text output (default)"),
391                        clEnumValN(ShowFormat::Json, "json", "emit JSON"),
392                        clEnumValN(ShowFormat::Yaml, "yaml", "emit YAML")));
393 // TODO: Consider replacing this with `--show-format=text-encoding`.
394 cl::opt<bool>
395     TextFormat("text", cl::init(false),
396                cl::desc("Show instr profile data in text dump format"),
397                cl::sub(ShowSubcommand));
398 cl::opt<bool>
399     JsonFormat("json",
400                cl::desc("Show sample profile data in the JSON format "
401                         "(deprecated, please use --show-format=json)"),
402                cl::sub(ShowSubcommand));
403 cl::opt<bool> ShowIndirectCallTargets(
404     "ic-targets", cl::init(false),
405     cl::desc("Show indirect call site target values for shown functions"),
406     cl::sub(ShowSubcommand));
407 cl::opt<bool> ShowVTables("show-vtables", cl::init(false),
408                           cl::desc("Show vtable names for shown functions"),
409                           cl::sub(ShowSubcommand));
410 cl::opt<bool> ShowMemOPSizes(
411     "memop-sizes", cl::init(false),
412     cl::desc("Show the profiled sizes of the memory intrinsic calls "
413              "for shown functions"),
414     cl::sub(ShowSubcommand));
415 cl::opt<bool> ShowDetailedSummary("detailed-summary", cl::init(false),
416                                   cl::desc("Show detailed profile summary"),
417                                   cl::sub(ShowSubcommand));
418 cl::list<uint32_t> DetailedSummaryCutoffs(
419     cl::CommaSeparated, "detailed-summary-cutoffs",
420     cl::desc(
421         "Cutoff percentages (times 10000) for generating detailed summary"),
422     cl::value_desc("800000,901000,999999"), cl::sub(ShowSubcommand));
423 cl::opt<bool>
424     ShowHotFuncList("hot-func-list", cl::init(false),
425                     cl::desc("Show profile summary of a list of hot functions"),
426                     cl::sub(ShowSubcommand));
427 cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
428                                cl::desc("Details for each and every function"),
429                                cl::sub(ShowSubcommand));
430 cl::opt<bool> ShowCS("showcs", cl::init(false),
431                      cl::desc("Show context sensitive counts"),
432                      cl::sub(ShowSubcommand));
433 cl::opt<ProfileKinds> ShowProfileKind(
434     cl::desc("Profile kind supported by show:"), cl::sub(ShowSubcommand),
435     cl::init(instr),
436     cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
437                clEnumVal(sample, "Sample profile"),
438                clEnumVal(memory, "MemProf memory access profile")));
439 cl::opt<uint32_t> TopNFunctions(
440     "topn", cl::init(0),
441     cl::desc("Show the list of functions with the largest internal counts"),
442     cl::sub(ShowSubcommand));
443 cl::opt<uint32_t> ShowValueCutoff(
444     "value-cutoff", cl::init(0),
445     cl::desc("Set the count value cutoff. Functions with the maximum count "
446              "less than this value will not be printed out. (Default is 0)"),
447     cl::sub(ShowSubcommand));
448 cl::opt<bool> OnlyListBelow(
449     "list-below-cutoff", cl::init(false),
450     cl::desc("Only output names of functions whose max count values are "
451              "below the cutoff value"),
452     cl::sub(ShowSubcommand));
453 cl::opt<bool> ShowProfileSymbolList(
454     "show-prof-sym-list", cl::init(false),
455     cl::desc("Show profile symbol list if it exists in the profile. "),
456     cl::sub(ShowSubcommand));
457 cl::opt<bool> ShowSectionInfoOnly(
458     "show-sec-info-only", cl::init(false),
459     cl::desc("Show the information of each section in the sample profile. "
460              "The flag is only usable when the sample profile is in "
461              "extbinary format"),
462     cl::sub(ShowSubcommand));
463 cl::opt<bool> ShowBinaryIds("binary-ids", cl::init(false),
464                             cl::desc("Show binary ids in the profile. "),
465                             cl::sub(ShowSubcommand));
466 cl::opt<bool> ShowTemporalProfTraces(
467     "temporal-profile-traces",
468     cl::desc("Show temporal profile traces in the profile."),
469     cl::sub(ShowSubcommand));
470 
471 cl::opt<bool>
472     ShowCovered("covered", cl::init(false),
473                 cl::desc("Show only the functions that have been executed."),
474                 cl::sub(ShowSubcommand));
475 
476 cl::opt<bool> ShowProfileVersion("profile-version", cl::init(false),
477                                  cl::desc("Show profile version. "),
478                                  cl::sub(ShowSubcommand));
479 
480 // Options specific to order subcommand.
481 cl::opt<unsigned>
482     NumTestTraces("num-test-traces", cl::init(0),
483                   cl::desc("Keep aside the last <num-test-traces> traces in "
484                            "the profile when computing the function order and "
485                            "instead use them to evaluate that order"),
486                   cl::sub(OrderSubcommand));
487 
488 // We use this string to indicate that there are
489 // multiple static functions map to the same name.
490 const std::string DuplicateNameStr = "----";
491 
492 static void warn(Twine Message, StringRef Whence = "", StringRef Hint = "") {
493   WithColor::warning();
494   if (!Whence.empty())
495     errs() << Whence << ": ";
496   errs() << Message << "\n";
497   if (!Hint.empty())
498     WithColor::note() << Hint << "\n";
499 }
500 
501 static void warn(Error E, StringRef Whence = "") {
502   if (E.isA<InstrProfError>()) {
503     handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
504       warn(IPE.message(), Whence);
505     });
506   }
507 }
508 
509 static void exitWithError(Twine Message, StringRef Whence = "",
510                           StringRef Hint = "") {
511   WithColor::error();
512   if (!Whence.empty())
513     errs() << Whence << ": ";
514   errs() << Message << "\n";
515   if (!Hint.empty())
516     WithColor::note() << Hint << "\n";
517   ::exit(1);
518 }
519 
520 static void exitWithError(Error E, StringRef Whence = "") {
521   if (E.isA<InstrProfError>()) {
522     handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
523       instrprof_error instrError = IPE.get();
524       StringRef Hint = "";
525       if (instrError == instrprof_error::unrecognized_format) {
526         // Hint in case user missed specifying the profile type.
527         Hint = "Perhaps you forgot to use the --sample or --memory option?";
528       }
529       exitWithError(IPE.message(), Whence, Hint);
530     });
531     return;
532   }
533 
534   exitWithError(toString(std::move(E)), Whence);
535 }
536 
537 static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
538   exitWithError(EC.message(), Whence);
539 }
540 
541 static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC,
542                                  StringRef Whence = "") {
543   if (FailMode == failIfAnyAreInvalid)
544     exitWithErrorCode(EC, Whence);
545   else
546     warn(EC.message(), Whence);
547 }
548 
549 static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
550                                    StringRef WhenceFunction = "",
551                                    bool ShowHint = true) {
552   if (!WhenceFile.empty())
553     errs() << WhenceFile << ": ";
554   if (!WhenceFunction.empty())
555     errs() << WhenceFunction << ": ";
556 
557   auto IPE = instrprof_error::success;
558   E = handleErrors(std::move(E),
559                    [&IPE](std::unique_ptr<InstrProfError> E) -> Error {
560                      IPE = E->get();
561                      return Error(std::move(E));
562                    });
563   errs() << toString(std::move(E)) << "\n";
564 
565   if (ShowHint) {
566     StringRef Hint = "";
567     if (IPE != instrprof_error::success) {
568       switch (IPE) {
569       case instrprof_error::hash_mismatch:
570       case instrprof_error::count_mismatch:
571       case instrprof_error::value_site_count_mismatch:
572         Hint = "Make sure that all profile data to be merged is generated "
573                "from the same binary.";
574         break;
575       default:
576         break;
577       }
578     }
579 
580     if (!Hint.empty())
581       errs() << Hint << "\n";
582   }
583 }
584 
585 namespace {
586 /// A remapper from original symbol names to new symbol names based on a file
587 /// containing a list of mappings from old name to new name.
588 class SymbolRemapper {
589   std::unique_ptr<MemoryBuffer> File;
590   DenseMap<StringRef, StringRef> RemappingTable;
591 
592 public:
593   /// Build a SymbolRemapper from a file containing a list of old/new symbols.
594   static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) {
595     auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
596     if (!BufOrError)
597       exitWithErrorCode(BufOrError.getError(), InputFile);
598 
599     auto Remapper = std::make_unique<SymbolRemapper>();
600     Remapper->File = std::move(BufOrError.get());
601 
602     for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#');
603          !LineIt.is_at_eof(); ++LineIt) {
604       std::pair<StringRef, StringRef> Parts = LineIt->split(' ');
605       if (Parts.first.empty() || Parts.second.empty() ||
606           Parts.second.count(' ')) {
607         exitWithError("unexpected line in remapping file",
608                       (InputFile + ":" + Twine(LineIt.line_number())).str(),
609                       "expected 'old_symbol new_symbol'");
610       }
611       Remapper->RemappingTable.insert(Parts);
612     }
613     return Remapper;
614   }
615 
616   /// Attempt to map the given old symbol into a new symbol.
617   ///
618   /// \return The new symbol, or \p Name if no such symbol was found.
619   StringRef operator()(StringRef Name) {
620     StringRef New = RemappingTable.lookup(Name);
621     return New.empty() ? Name : New;
622   }
623 
624   FunctionId operator()(FunctionId Name) {
625     // MD5 name cannot be remapped.
626     if (!Name.isStringRef())
627       return Name;
628     StringRef New = RemappingTable.lookup(Name.stringRef());
629     return New.empty() ? Name : FunctionId(New);
630   }
631 };
632 }
633 
634 struct WeightedFile {
635   std::string Filename;
636   uint64_t Weight;
637 };
638 typedef SmallVector<WeightedFile, 5> WeightedFileVector;
639 
640 /// Keep track of merged data and reported errors.
641 struct WriterContext {
642   std::mutex Lock;
643   InstrProfWriter Writer;
644   std::vector<std::pair<Error, std::string>> Errors;
645   std::mutex &ErrLock;
646   SmallSet<instrprof_error, 4> &WriterErrorCodes;
647 
648   WriterContext(bool IsSparse, std::mutex &ErrLock,
649                 SmallSet<instrprof_error, 4> &WriterErrorCodes,
650                 uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
651       : Writer(IsSparse, ReservoirSize, MaxTraceLength, DoWritePrevVersion,
652                MemProfVersionRequested, MemProfFullSchema,
653                MemprofGenerateRandomHotness, MemprofGenerateRandomHotnessSeed),
654         ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
655 };
656 
657 /// Computer the overlap b/w profile BaseFilename and TestFileName,
658 /// and store the program level result to Overlap.
659 static void overlapInput(const std::string &BaseFilename,
660                          const std::string &TestFilename, WriterContext *WC,
661                          OverlapStats &Overlap,
662                          const OverlapFuncFilters &FuncFilter,
663                          raw_fd_ostream &OS, bool IsCS) {
664   auto FS = vfs::getRealFileSystem();
665   auto ReaderOrErr = InstrProfReader::create(TestFilename, *FS);
666   if (Error E = ReaderOrErr.takeError()) {
667     // Skip the empty profiles by returning sliently.
668     auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
669     if (ErrorCode != instrprof_error::empty_raw_profile)
670       WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
671                               TestFilename);
672     return;
673   }
674 
675   auto Reader = std::move(ReaderOrErr.get());
676   for (auto &I : *Reader) {
677     OverlapStats FuncOverlap(OverlapStats::FunctionLevel);
678     FuncOverlap.setFuncInfo(I.Name, I.Hash);
679 
680     WC->Writer.overlapRecord(std::move(I), Overlap, FuncOverlap, FuncFilter);
681     FuncOverlap.dump(OS);
682   }
683 }
684 
685 /// Load an input into a writer context.
686 static void
687 loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
688           const InstrProfCorrelator *Correlator, const StringRef ProfiledBinary,
689           WriterContext *WC, const object::BuildIDFetcher *BIDFetcher = nullptr,
690           const ProfCorrelatorKind *BIDFetcherCorrelatorKind = nullptr) {
691   std::unique_lock<std::mutex> CtxGuard{WC->Lock};
692 
693   // Copy the filename, because llvm::ThreadPool copied the input "const
694   // WeightedFile &" by value, making a reference to the filename within it
695   // invalid outside of this packaged task.
696   std::string Filename = Input.Filename;
697 
698   using ::llvm::memprof::RawMemProfReader;
699   if (RawMemProfReader::hasFormat(Input.Filename)) {
700     auto ReaderOrErr = RawMemProfReader::create(Input.Filename, ProfiledBinary);
701     if (!ReaderOrErr) {
702       exitWithError(ReaderOrErr.takeError(), Input.Filename);
703     }
704     std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get());
705     // Check if the profile types can be merged, e.g. clang frontend profiles
706     // should not be merged with memprof profiles.
707     if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
708       consumeError(std::move(E));
709       WC->Errors.emplace_back(
710           make_error<StringError>(
711               "Cannot merge MemProf profile with Clang generated profile.",
712               std::error_code()),
713           Filename);
714       return;
715     }
716 
717     auto MemProfError = [&](Error E) {
718       auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
719       WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
720                               Filename);
721     };
722 
723     WC->Writer.addMemProfData(Reader->takeMemProfData(), MemProfError);
724     return;
725   }
726 
727   auto FS = vfs::getRealFileSystem();
728   // TODO: This only saves the first non-fatal error from InstrProfReader, and
729   // then added to WriterContext::Errors. However, this is not extensible, if
730   // we have more non-fatal errors from InstrProfReader in the future. How
731   // should this interact with different -failure-mode?
732   std::optional<std::pair<Error, std::string>> ReaderWarning;
733   auto Warn = [&](Error E) {
734     if (ReaderWarning) {
735       consumeError(std::move(E));
736       return;
737     }
738     // Only show the first time an error occurs in this file.
739     auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
740     ReaderWarning = {make_error<InstrProfError>(ErrCode, Msg), Filename};
741   };
742 
743   const ProfCorrelatorKind CorrelatorKind = BIDFetcherCorrelatorKind
744                                                 ? *BIDFetcherCorrelatorKind
745                                                 : ProfCorrelatorKind::NONE;
746   auto ReaderOrErr = InstrProfReader::create(Input.Filename, *FS, Correlator,
747                                              BIDFetcher, CorrelatorKind, Warn);
748   if (Error E = ReaderOrErr.takeError()) {
749     // Skip the empty profiles by returning silently.
750     auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
751     if (ErrCode != instrprof_error::empty_raw_profile)
752       WC->Errors.emplace_back(make_error<InstrProfError>(ErrCode, Msg),
753                               Filename);
754     return;
755   }
756 
757   auto Reader = std::move(ReaderOrErr.get());
758   if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
759     consumeError(std::move(E));
760     WC->Errors.emplace_back(
761         make_error<StringError>(
762             "Merge IR generated profile with Clang generated profile.",
763             std::error_code()),
764         Filename);
765     return;
766   }
767 
768   for (auto &I : *Reader) {
769     if (Remapper)
770       I.Name = (*Remapper)(I.Name);
771     const StringRef FuncName = I.Name;
772     bool Reported = false;
773     WC->Writer.addRecord(std::move(I), Input.Weight, [&](Error E) {
774       if (Reported) {
775         consumeError(std::move(E));
776         return;
777       }
778       Reported = true;
779       // Only show hint the first time an error occurs.
780       auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
781       std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
782       bool firstTime = WC->WriterErrorCodes.insert(ErrCode).second;
783       handleMergeWriterError(make_error<InstrProfError>(ErrCode, Msg),
784                              Input.Filename, FuncName, firstTime);
785     });
786   }
787 
788   if (KeepVTableSymbols) {
789     const InstrProfSymtab &symtab = Reader->getSymtab();
790     const auto &VTableNames = symtab.getVTableNames();
791 
792     for (const auto &kv : VTableNames)
793       WC->Writer.addVTableName(kv.getKey());
794   }
795 
796   if (Reader->hasTemporalProfile()) {
797     auto &Traces = Reader->getTemporalProfTraces(Input.Weight);
798     if (!Traces.empty())
799       WC->Writer.addTemporalProfileTraces(
800           Traces, Reader->getTemporalProfTraceStreamSize());
801   }
802   if (Reader->hasError()) {
803     if (Error E = Reader->getError()) {
804       WC->Errors.emplace_back(std::move(E), Filename);
805       return;
806     }
807   }
808 
809   std::vector<llvm::object::BuildID> BinaryIds;
810   if (Error E = Reader->readBinaryIds(BinaryIds)) {
811     WC->Errors.emplace_back(std::move(E), Filename);
812     return;
813   }
814   WC->Writer.addBinaryIds(BinaryIds);
815 
816   if (ReaderWarning) {
817     WC->Errors.emplace_back(std::move(ReaderWarning->first),
818                             ReaderWarning->second);
819   }
820 }
821 
822 /// Merge the \p Src writer context into \p Dst.
823 static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
824   for (auto &ErrorPair : Src->Errors)
825     Dst->Errors.push_back(std::move(ErrorPair));
826   Src->Errors.clear();
827 
828   if (Error E = Dst->Writer.mergeProfileKind(Src->Writer.getProfileKind()))
829     exitWithError(std::move(E));
830 
831   Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer), [&](Error E) {
832     auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
833     std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock};
834     bool firstTime = Dst->WriterErrorCodes.insert(ErrorCode).second;
835     if (firstTime)
836       warn(toString(make_error<InstrProfError>(ErrorCode, Msg)));
837   });
838 }
839 
840 static StringRef
841 getFuncName(const StringMap<InstrProfWriter::ProfilingData>::value_type &Val) {
842   return Val.first();
843 }
844 
845 static std::string
846 getFuncName(const SampleProfileMap::value_type &Val) {
847   return Val.second.getContext().toString();
848 }
849 
850 template <typename T>
851 static void filterFunctions(T &ProfileMap) {
852   bool hasFilter = !FuncNameFilter.empty();
853   bool hasNegativeFilter = !FuncNameNegativeFilter.empty();
854   if (!hasFilter && !hasNegativeFilter)
855     return;
856 
857   // If filter starts with '?' it is MSVC mangled name, not a regex.
858   llvm::Regex ProbablyMSVCMangledName("[?@$_0-9A-Za-z]+");
859   if (hasFilter && FuncNameFilter[0] == '?' &&
860       ProbablyMSVCMangledName.match(FuncNameFilter))
861     FuncNameFilter = llvm::Regex::escape(FuncNameFilter);
862   if (hasNegativeFilter && FuncNameNegativeFilter[0] == '?' &&
863       ProbablyMSVCMangledName.match(FuncNameNegativeFilter))
864     FuncNameNegativeFilter = llvm::Regex::escape(FuncNameNegativeFilter);
865 
866   size_t Count = ProfileMap.size();
867   llvm::Regex Pattern(FuncNameFilter);
868   llvm::Regex NegativePattern(FuncNameNegativeFilter);
869   std::string Error;
870   if (hasFilter && !Pattern.isValid(Error))
871     exitWithError(Error);
872   if (hasNegativeFilter && !NegativePattern.isValid(Error))
873     exitWithError(Error);
874 
875   // Handle MD5 profile, so it is still able to match using the original name.
876   std::string MD5Name = std::to_string(llvm::MD5Hash(FuncNameFilter));
877   std::string NegativeMD5Name =
878       std::to_string(llvm::MD5Hash(FuncNameNegativeFilter));
879 
880   for (auto I = ProfileMap.begin(); I != ProfileMap.end();) {
881     auto Tmp = I++;
882     const auto &FuncName = getFuncName(*Tmp);
883     // Negative filter has higher precedence than positive filter.
884     if ((hasNegativeFilter &&
885          (NegativePattern.match(FuncName) ||
886           (FunctionSamples::UseMD5 && NegativeMD5Name == FuncName))) ||
887         (hasFilter && !(Pattern.match(FuncName) ||
888                         (FunctionSamples::UseMD5 && MD5Name == FuncName))))
889       ProfileMap.erase(Tmp);
890   }
891 
892   llvm::dbgs() << Count - ProfileMap.size() << " of " << Count << " functions "
893                << "in the original profile are filtered.\n";
894 }
895 
896 static void writeInstrProfile(StringRef OutputFilename,
897                               ProfileFormat OutputFormat,
898                               InstrProfWriter &Writer) {
899   std::error_code EC;
900   raw_fd_ostream Output(OutputFilename.data(), EC,
901                         OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF
902                                                 : sys::fs::OF_None);
903   if (EC)
904     exitWithErrorCode(EC, OutputFilename);
905 
906   if (OutputFormat == PF_Text) {
907     if (Error E = Writer.writeText(Output))
908       warn(std::move(E));
909   } else {
910     if (Output.is_displayed())
911       exitWithError("cannot write a non-text format profile to the terminal");
912     if (Error E = Writer.write(Output))
913       warn(std::move(E));
914   }
915 }
916 
917 static void mergeInstrProfile(const WeightedFileVector &Inputs,
918                               SymbolRemapper *Remapper,
919                               int MaxDbgCorrelationWarnings,
920                               const StringRef ProfiledBinary) {
921   const uint64_t TraceReservoirSize = TemporalProfTraceReservoirSize.getValue();
922   const uint64_t MaxTraceLength = TemporalProfMaxTraceLength.getValue();
923   if (OutputFormat == PF_Compact_Binary)
924     exitWithError("Compact Binary is deprecated");
925   if (OutputFormat != PF_Binary && OutputFormat != PF_Ext_Binary &&
926       OutputFormat != PF_Text)
927     exitWithError("unknown format is specified");
928 
929   // TODO: Maybe we should support correlation with mixture of different
930   // correlation modes(w/wo debug-info/object correlation).
931   if (DebugInfoFilename.empty()) {
932     if (!BinaryFilename.empty() && (DebugInfod || !DebugFileDirectory.empty()))
933       exitWithError("Expected only one of -binary-file, -debuginfod or "
934                     "-debug-file-directory");
935   } else if (!BinaryFilename.empty() || DebugInfod ||
936              !DebugFileDirectory.empty()) {
937     exitWithError("Expected only one of -debug-info, -binary-file, -debuginfod "
938                   "or -debug-file-directory");
939   }
940   std::string CorrelateFilename;
941   ProfCorrelatorKind CorrelateKind = ProfCorrelatorKind::NONE;
942   if (!DebugInfoFilename.empty()) {
943     CorrelateFilename = DebugInfoFilename;
944     CorrelateKind = ProfCorrelatorKind::DEBUG_INFO;
945   } else if (!BinaryFilename.empty()) {
946     CorrelateFilename = BinaryFilename;
947     CorrelateKind = ProfCorrelatorKind::BINARY;
948   }
949 
950   std::unique_ptr<InstrProfCorrelator> Correlator;
951   if (CorrelateKind != InstrProfCorrelator::NONE) {
952     if (auto Err = InstrProfCorrelator::get(CorrelateFilename, CorrelateKind)
953                        .moveInto(Correlator))
954       exitWithError(std::move(Err), CorrelateFilename);
955     if (auto Err = Correlator->correlateProfileData(MaxDbgCorrelationWarnings))
956       exitWithError(std::move(Err), CorrelateFilename);
957   }
958 
959   ProfCorrelatorKind BIDFetcherCorrelateKind = ProfCorrelatorKind::NONE;
960   std::unique_ptr<object::BuildIDFetcher> BIDFetcher;
961   if (DebugInfod) {
962     llvm::HTTPClient::initialize();
963     BIDFetcher = std::make_unique<DebuginfodFetcher>(DebugFileDirectory);
964     if (!BIDFetcherProfileCorrelate)
965       exitWithError("Expected --correlate when --debuginfod is provided");
966     BIDFetcherCorrelateKind = BIDFetcherProfileCorrelate;
967   } else if (!DebugFileDirectory.empty()) {
968     BIDFetcher = std::make_unique<object::BuildIDFetcher>(DebugFileDirectory);
969     if (!BIDFetcherProfileCorrelate)
970       exitWithError("Expected --correlate when --debug-file-directory "
971                     "is provided");
972     BIDFetcherCorrelateKind = BIDFetcherProfileCorrelate;
973   } else if (BIDFetcherProfileCorrelate) {
974     exitWithError("Expected --debuginfod or --debug-file-directory when "
975                   "--correlate is provided");
976   }
977 
978   std::mutex ErrorLock;
979   SmallSet<instrprof_error, 4> WriterErrorCodes;
980 
981   // If NumThreads is not specified, auto-detect a good default.
982   if (NumThreads == 0)
983     NumThreads = std::min(hardware_concurrency().compute_thread_count(),
984                           unsigned((Inputs.size() + 1) / 2));
985 
986   // Initialize the writer contexts.
987   SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
988   for (unsigned I = 0; I < NumThreads; ++I)
989     Contexts.emplace_back(std::make_unique<WriterContext>(
990         OutputSparse, ErrorLock, WriterErrorCodes, TraceReservoirSize,
991         MaxTraceLength));
992 
993   if (NumThreads == 1) {
994     for (const auto &Input : Inputs)
995       loadInput(Input, Remapper, Correlator.get(), ProfiledBinary,
996                 Contexts[0].get(), BIDFetcher.get(), &BIDFetcherCorrelateKind);
997   } else {
998     DefaultThreadPool Pool(hardware_concurrency(NumThreads));
999 
1000     // Load the inputs in parallel (N/NumThreads serial steps).
1001     unsigned Ctx = 0;
1002     for (const auto &Input : Inputs) {
1003       Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary,
1004                  Contexts[Ctx].get(), BIDFetcher.get(),
1005                  &BIDFetcherCorrelateKind);
1006       Ctx = (Ctx + 1) % NumThreads;
1007     }
1008     Pool.wait();
1009 
1010     // Merge the writer contexts together (~ lg(NumThreads) serial steps).
1011     unsigned Mid = Contexts.size() / 2;
1012     unsigned End = Contexts.size();
1013     assert(Mid > 0 && "Expected more than one context");
1014     do {
1015       for (unsigned I = 0; I < Mid; ++I)
1016         Pool.async(mergeWriterContexts, Contexts[I].get(),
1017                    Contexts[I + Mid].get());
1018       Pool.wait();
1019       if (End & 1) {
1020         Pool.async(mergeWriterContexts, Contexts[0].get(),
1021                    Contexts[End - 1].get());
1022         Pool.wait();
1023       }
1024       End = Mid;
1025       Mid /= 2;
1026     } while (Mid > 0);
1027   }
1028 
1029   // Handle deferred errors encountered during merging. If the number of errors
1030   // is equal to the number of inputs the merge failed.
1031   unsigned NumErrors = 0;
1032   for (std::unique_ptr<WriterContext> &WC : Contexts) {
1033     for (auto &ErrorPair : WC->Errors) {
1034       ++NumErrors;
1035       warn(toString(std::move(ErrorPair.first)), ErrorPair.second);
1036     }
1037   }
1038   if ((NumErrors == Inputs.size() && FailMode == failIfAllAreInvalid) ||
1039       (NumErrors > 0 && FailMode == failIfAnyAreInvalid))
1040     exitWithError("no profile can be merged");
1041 
1042   filterFunctions(Contexts[0]->Writer.getProfileData());
1043 
1044   writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer);
1045 }
1046 
1047 /// The profile entry for a function in instrumentation profile.
1048 struct InstrProfileEntry {
1049   uint64_t MaxCount = 0;
1050   uint64_t NumEdgeCounters = 0;
1051   float ZeroCounterRatio = 0.0;
1052   InstrProfRecord *ProfRecord;
1053   InstrProfileEntry(InstrProfRecord *Record);
1054   InstrProfileEntry() = default;
1055 };
1056 
1057 InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
1058   ProfRecord = Record;
1059   uint64_t CntNum = Record->Counts.size();
1060   uint64_t ZeroCntNum = 0;
1061   for (size_t I = 0; I < CntNum; ++I) {
1062     MaxCount = std::max(MaxCount, Record->Counts[I]);
1063     ZeroCntNum += !Record->Counts[I];
1064   }
1065   ZeroCounterRatio = (float)ZeroCntNum / CntNum;
1066   NumEdgeCounters = CntNum;
1067 }
1068 
1069 /// Either set all the counters in the instr profile entry \p IFE to
1070 /// -1 / -2 /in order to drop the profile or scale up the
1071 /// counters in \p IFP to be above hot / cold threshold. We use
1072 /// the ratio of zero counters in the profile of a function to
1073 /// decide the profile is helpful or harmful for performance,
1074 /// and to choose whether to scale up or drop it.
1075 static void updateInstrProfileEntry(InstrProfileEntry &IFE, bool SetToHot,
1076                                     uint64_t HotInstrThreshold,
1077                                     uint64_t ColdInstrThreshold,
1078                                     float ZeroCounterThreshold) {
1079   InstrProfRecord *ProfRecord = IFE.ProfRecord;
1080   if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) {
1081     // If all or most of the counters of the function are zero, the
1082     // profile is unaccountable and should be dropped. Reset all the
1083     // counters to be -1 / -2 and PGO profile-use will drop the profile.
1084     // All counters being -1 also implies that the function is hot so
1085     // PGO profile-use will also set the entry count metadata to be
1086     // above hot threshold.
1087     // All counters being -2 implies that the function is warm so
1088     // PGO profile-use will also set the entry count metadata to be
1089     // above cold threshold.
1090     auto Kind =
1091         (SetToHot ? InstrProfRecord::PseudoHot : InstrProfRecord::PseudoWarm);
1092     ProfRecord->setPseudoCount(Kind);
1093     return;
1094   }
1095 
1096   // Scale up the MaxCount to be multiple times above hot / cold threshold.
1097   const unsigned MultiplyFactor = 3;
1098   uint64_t Threshold = (SetToHot ? HotInstrThreshold : ColdInstrThreshold);
1099   uint64_t Numerator = Threshold * MultiplyFactor;
1100 
1101   // Make sure Threshold for warm counters is below the HotInstrThreshold.
1102   if (!SetToHot && Threshold >= HotInstrThreshold) {
1103     Threshold = (HotInstrThreshold + ColdInstrThreshold) / 2;
1104   }
1105 
1106   uint64_t Denominator = IFE.MaxCount;
1107   if (Numerator <= Denominator)
1108     return;
1109   ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) {
1110     warn(toString(make_error<InstrProfError>(E)));
1111   });
1112 }
1113 
1114 const uint64_t ColdPercentileIdx = 15;
1115 const uint64_t HotPercentileIdx = 11;
1116 
1117 using sampleprof::FSDiscriminatorPass;
1118 
1119 // Internal options to set FSDiscriminatorPass. Used in merge and show
1120 // commands.
1121 static cl::opt<FSDiscriminatorPass> FSDiscriminatorPassOption(
1122     "fs-discriminator-pass", cl::init(PassLast), cl::Hidden,
1123     cl::desc("Zero out the discriminator bits for the FS discrimiantor "
1124              "pass beyond this value. The enum values are defined in "
1125              "Support/Discriminator.h"),
1126     cl::values(clEnumVal(Base, "Use base discriminators only"),
1127                clEnumVal(Pass1, "Use base and pass 1 discriminators"),
1128                clEnumVal(Pass2, "Use base and pass 1-2 discriminators"),
1129                clEnumVal(Pass3, "Use base and pass 1-3 discriminators"),
1130                clEnumVal(PassLast, "Use all discriminator bits (default)")));
1131 
1132 static unsigned getDiscriminatorMask() {
1133   return getN1Bits(getFSPassBitEnd(FSDiscriminatorPassOption.getValue()));
1134 }
1135 
1136 /// Adjust the instr profile in \p WC based on the sample profile in
1137 /// \p Reader.
1138 static void
1139 adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
1140                    std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
1141                    unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
1142                    unsigned InstrProfColdThreshold) {
1143   // Function to its entry in instr profile.
1144   StringMap<InstrProfileEntry> InstrProfileMap;
1145   StringMap<StringRef> StaticFuncMap;
1146   InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
1147 
1148   auto checkSampleProfileHasFUnique = [&Reader]() {
1149     for (const auto &PD : Reader->getProfiles()) {
1150       auto &FContext = PD.second.getContext();
1151       if (FContext.toString().find(FunctionSamples::UniqSuffix) !=
1152           std::string::npos) {
1153         return true;
1154       }
1155     }
1156     return false;
1157   };
1158 
1159   bool SampleProfileHasFUnique = checkSampleProfileHasFUnique();
1160 
1161   auto buildStaticFuncMap = [&StaticFuncMap,
1162                              SampleProfileHasFUnique](const StringRef Name) {
1163     std::string FilePrefixes[] = {".cpp", "cc", ".c", ".hpp", ".h"};
1164     size_t PrefixPos = StringRef::npos;
1165     for (auto &FilePrefix : FilePrefixes) {
1166       std::string NamePrefix = FilePrefix + GlobalIdentifierDelimiter;
1167       PrefixPos = Name.find_insensitive(NamePrefix);
1168       if (PrefixPos == StringRef::npos)
1169         continue;
1170       PrefixPos += NamePrefix.size();
1171       break;
1172     }
1173 
1174     if (PrefixPos == StringRef::npos) {
1175       return;
1176     }
1177 
1178     StringRef NewName = Name.drop_front(PrefixPos);
1179     StringRef FName = Name.substr(0, PrefixPos - 1);
1180     if (NewName.size() == 0) {
1181       return;
1182     }
1183 
1184     // This name should have a static linkage.
1185     size_t PostfixPos = NewName.find(FunctionSamples::UniqSuffix);
1186     bool ProfileHasFUnique = (PostfixPos != StringRef::npos);
1187 
1188     // If sample profile and instrumented profile do not agree on symbol
1189     // uniqification.
1190     if (SampleProfileHasFUnique != ProfileHasFUnique) {
1191       // If instrumented profile uses -funique-internal-linkage-symbols,
1192       // we need to trim the name.
1193       if (ProfileHasFUnique) {
1194         NewName = NewName.substr(0, PostfixPos);
1195       } else {
1196         // If sample profile uses -funique-internal-linkage-symbols,
1197         // we build the map.
1198         std::string NStr =
1199             NewName.str() + getUniqueInternalLinkagePostfix(FName);
1200         NewName = StringRef(NStr);
1201         StaticFuncMap[NewName] = Name;
1202         return;
1203       }
1204     }
1205 
1206     auto [It, Inserted] = StaticFuncMap.try_emplace(NewName, Name);
1207     if (!Inserted)
1208       It->second = DuplicateNameStr;
1209   };
1210 
1211   // We need to flatten the SampleFDO profile as the InstrFDO
1212   // profile does not have inlined callsite profiles.
1213   // One caveat is the pre-inlined function -- their samples
1214   // should be collapsed into the caller function.
1215   // Here we do a DFS traversal to get the flatten profile
1216   // info: the sum of entrycount and the max of maxcount.
1217   // Here is the algorithm:
1218   //   recursive (FS, root_name) {
1219   //      name = FS->getName();
1220   //      get samples for FS;
1221   //      if (InstrProf.find(name) {
1222   //        root_name = name;
1223   //      } else {
1224   //        if (name is in static_func map) {
1225   //          root_name = static_name;
1226   //        }
1227   //      }
1228   //      update the Map entry for root_name;
1229   //      for (subfs: FS) {
1230   //        recursive(subfs, root_name);
1231   //      }
1232   //   }
1233   //
1234   // Here is an example.
1235   //
1236   // SampleProfile:
1237   // foo:12345:1000
1238   // 1: 1000
1239   // 2.1: 1000
1240   // 15: 5000
1241   // 4: bar:1000
1242   //  1: 1000
1243   //  2: goo:3000
1244   //   1: 3000
1245   // 8: bar:40000
1246   //  1: 10000
1247   //  2: goo:30000
1248   //   1: 30000
1249   //
1250   // InstrProfile has two entries:
1251   //  foo
1252   //  bar.cc;bar
1253   //
1254   // After BuildMaxSampleMap, we should have the following in FlattenSampleMap:
1255   // {"foo", {1000, 5000}}
1256   // {"bar.cc;bar", {11000, 30000}}
1257   //
1258   // foo's has an entry count of 1000, and max body count of 5000.
1259   // bar.cc;bar has an entry count of 11000 (sum two callsites of 1000 and
1260   // 10000), and max count of 30000 (from the callsite in line 8).
1261   //
1262   // Note that goo's count will remain in bar.cc;bar() as it does not have an
1263   // entry in InstrProfile.
1264   llvm::StringMap<std::pair<uint64_t, uint64_t>> FlattenSampleMap;
1265   auto BuildMaxSampleMap = [&FlattenSampleMap, &StaticFuncMap,
1266                             &InstrProfileMap](const FunctionSamples &FS,
1267                                               const StringRef &RootName) {
1268     auto BuildMaxSampleMapImpl = [&](const FunctionSamples &FS,
1269                                      const StringRef &RootName,
1270                                      auto &BuildImpl) -> void {
1271       std::string NameStr = FS.getFunction().str();
1272       const StringRef Name = NameStr;
1273       const StringRef *NewRootName = &RootName;
1274       uint64_t EntrySample = FS.getHeadSamplesEstimate();
1275       uint64_t MaxBodySample = FS.getMaxCountInside(/* SkipCallSite*/ true);
1276 
1277       auto It = InstrProfileMap.find(Name);
1278       if (It != InstrProfileMap.end()) {
1279         NewRootName = &Name;
1280       } else {
1281         auto NewName = StaticFuncMap.find(Name);
1282         if (NewName != StaticFuncMap.end()) {
1283           It = InstrProfileMap.find(NewName->second);
1284           if (NewName->second != DuplicateNameStr) {
1285             NewRootName = &NewName->second;
1286           }
1287         } else {
1288           // Here the EntrySample is of an inlined function, so we should not
1289           // update the EntrySample in the map.
1290           EntrySample = 0;
1291         }
1292       }
1293       EntrySample += FlattenSampleMap[*NewRootName].first;
1294       MaxBodySample =
1295           std::max(FlattenSampleMap[*NewRootName].second, MaxBodySample);
1296       FlattenSampleMap[*NewRootName] =
1297           std::make_pair(EntrySample, MaxBodySample);
1298 
1299       for (const auto &C : FS.getCallsiteSamples())
1300         for (const auto &F : C.second)
1301           BuildImpl(F.second, *NewRootName, BuildImpl);
1302     };
1303     BuildMaxSampleMapImpl(FS, RootName, BuildMaxSampleMapImpl);
1304   };
1305 
1306   for (auto &PD : WC->Writer.getProfileData()) {
1307     // Populate IPBuilder.
1308     for (const auto &PDV : PD.getValue()) {
1309       InstrProfRecord Record = PDV.second;
1310       IPBuilder.addRecord(Record);
1311     }
1312 
1313     // If a function has multiple entries in instr profile, skip it.
1314     if (PD.getValue().size() != 1)
1315       continue;
1316 
1317     // Initialize InstrProfileMap.
1318     InstrProfRecord *R = &PD.getValue().begin()->second;
1319     StringRef FullName = PD.getKey();
1320     InstrProfileMap[FullName] = InstrProfileEntry(R);
1321     buildStaticFuncMap(FullName);
1322   }
1323 
1324   for (auto &PD : Reader->getProfiles()) {
1325     sampleprof::FunctionSamples &FS = PD.second;
1326     std::string Name = FS.getFunction().str();
1327     BuildMaxSampleMap(FS, Name);
1328   }
1329 
1330   ProfileSummary InstrPS = *IPBuilder.getSummary();
1331   ProfileSummary SamplePS = Reader->getSummary();
1332 
1333   // Compute cold thresholds for instr profile and sample profile.
1334   uint64_t HotSampleThreshold =
1335       ProfileSummaryBuilder::getEntryForPercentile(
1336           SamplePS.getDetailedSummary(),
1337           ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
1338           .MinCount;
1339   uint64_t ColdSampleThreshold =
1340       ProfileSummaryBuilder::getEntryForPercentile(
1341           SamplePS.getDetailedSummary(),
1342           ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
1343           .MinCount;
1344   uint64_t HotInstrThreshold =
1345       ProfileSummaryBuilder::getEntryForPercentile(
1346           InstrPS.getDetailedSummary(),
1347           ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
1348           .MinCount;
1349   uint64_t ColdInstrThreshold =
1350       InstrProfColdThreshold
1351           ? InstrProfColdThreshold
1352           : ProfileSummaryBuilder::getEntryForPercentile(
1353                 InstrPS.getDetailedSummary(),
1354                 ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
1355                 .MinCount;
1356 
1357   // Find hot/warm functions in sample profile which is cold in instr profile
1358   // and adjust the profiles of those functions in the instr profile.
1359   for (const auto &E : FlattenSampleMap) {
1360     uint64_t SampleMaxCount = std::max(E.second.first, E.second.second);
1361     if (SampleMaxCount < ColdSampleThreshold)
1362       continue;
1363     StringRef Name = E.first();
1364     auto It = InstrProfileMap.find(Name);
1365     if (It == InstrProfileMap.end()) {
1366       auto NewName = StaticFuncMap.find(Name);
1367       if (NewName != StaticFuncMap.end()) {
1368         It = InstrProfileMap.find(NewName->second);
1369         if (NewName->second == DuplicateNameStr) {
1370           WithColor::warning()
1371               << "Static function " << Name
1372               << " has multiple promoted names, cannot adjust profile.\n";
1373         }
1374       }
1375     }
1376     if (It == InstrProfileMap.end() ||
1377         It->second.MaxCount > ColdInstrThreshold ||
1378         It->second.NumEdgeCounters < SupplMinSizeThreshold)
1379       continue;
1380     bool SetToHot = SampleMaxCount >= HotSampleThreshold;
1381     updateInstrProfileEntry(It->second, SetToHot, HotInstrThreshold,
1382                             ColdInstrThreshold, ZeroCounterThreshold);
1383   }
1384 }
1385 
1386 /// The main function to supplement instr profile with sample profile.
1387 /// \Inputs contains the instr profile. \p SampleFilename specifies the
1388 /// sample profile. \p OutputFilename specifies the output profile name.
1389 /// \p OutputFormat specifies the output profile format. \p OutputSparse
1390 /// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
1391 /// specifies the minimal size for the functions whose profile will be
1392 /// adjusted. \p ZeroCounterThreshold is the threshold to check whether
1393 /// a function contains too many zero counters and whether its profile
1394 /// should be dropped. \p InstrProfColdThreshold is the user specified
1395 /// cold threshold which will override the cold threshold got from the
1396 /// instr profile summary.
1397 static void supplementInstrProfile(const WeightedFileVector &Inputs,
1398                                    StringRef SampleFilename, bool OutputSparse,
1399                                    unsigned SupplMinSizeThreshold,
1400                                    float ZeroCounterThreshold,
1401                                    unsigned InstrProfColdThreshold) {
1402   if (OutputFilename == "-")
1403     exitWithError("cannot write indexed profdata format to stdout");
1404   if (Inputs.size() != 1)
1405     exitWithError("expect one input to be an instr profile");
1406   if (Inputs[0].Weight != 1)
1407     exitWithError("expect instr profile doesn't have weight");
1408 
1409   StringRef InstrFilename = Inputs[0].Filename;
1410 
1411   // Read sample profile.
1412   LLVMContext Context;
1413   auto FS = vfs::getRealFileSystem();
1414   auto ReaderOrErr = sampleprof::SampleProfileReader::create(
1415       SampleFilename.str(), Context, *FS, FSDiscriminatorPassOption);
1416   if (std::error_code EC = ReaderOrErr.getError())
1417     exitWithErrorCode(EC, SampleFilename);
1418   auto Reader = std::move(ReaderOrErr.get());
1419   if (std::error_code EC = Reader->read())
1420     exitWithErrorCode(EC, SampleFilename);
1421 
1422   // Read instr profile.
1423   std::mutex ErrorLock;
1424   SmallSet<instrprof_error, 4> WriterErrorCodes;
1425   auto WC = std::make_unique<WriterContext>(OutputSparse, ErrorLock,
1426                                             WriterErrorCodes);
1427   loadInput(Inputs[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC.get());
1428   if (WC->Errors.size() > 0)
1429     exitWithError(std::move(WC->Errors[0].first), InstrFilename);
1430 
1431   adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
1432                      InstrProfColdThreshold);
1433   writeInstrProfile(OutputFilename, OutputFormat, WC->Writer);
1434 }
1435 
1436 /// Make a copy of the given function samples with all symbol names remapped
1437 /// by the provided symbol remapper.
1438 static sampleprof::FunctionSamples
1439 remapSamples(const sampleprof::FunctionSamples &Samples,
1440              SymbolRemapper &Remapper, sampleprof_error &Error) {
1441   sampleprof::FunctionSamples Result;
1442   Result.setFunction(Remapper(Samples.getFunction()));
1443   Result.addTotalSamples(Samples.getTotalSamples());
1444   Result.addHeadSamples(Samples.getHeadSamples());
1445   for (const auto &BodySample : Samples.getBodySamples()) {
1446     uint32_t MaskedDiscriminator =
1447         BodySample.first.Discriminator & getDiscriminatorMask();
1448     Result.addBodySamples(BodySample.first.LineOffset, MaskedDiscriminator,
1449                           BodySample.second.getSamples());
1450     for (const auto &Target : BodySample.second.getCallTargets()) {
1451       Result.addCalledTargetSamples(BodySample.first.LineOffset,
1452                                     MaskedDiscriminator,
1453                                     Remapper(Target.first), Target.second);
1454     }
1455   }
1456   for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
1457     sampleprof::FunctionSamplesMap &Target =
1458         Result.functionSamplesAt(CallsiteSamples.first);
1459     for (const auto &Callsite : CallsiteSamples.second) {
1460       sampleprof::FunctionSamples Remapped =
1461           remapSamples(Callsite.second, Remapper, Error);
1462       mergeSampleProfErrors(Error,
1463                             Target[Remapped.getFunction()].merge(Remapped));
1464     }
1465   }
1466   return Result;
1467 }
1468 
1469 static sampleprof::SampleProfileFormat FormatMap[] = {
1470     sampleprof::SPF_None,
1471     sampleprof::SPF_Text,
1472     sampleprof::SPF_None,
1473     sampleprof::SPF_Ext_Binary,
1474     sampleprof::SPF_GCC,
1475     sampleprof::SPF_Binary};
1476 
1477 static std::unique_ptr<MemoryBuffer>
1478 getInputFileBuf(const StringRef &InputFile) {
1479   if (InputFile == "")
1480     return {};
1481 
1482   auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
1483   if (!BufOrError)
1484     exitWithErrorCode(BufOrError.getError(), InputFile);
1485 
1486   return std::move(*BufOrError);
1487 }
1488 
1489 static void populateProfileSymbolList(MemoryBuffer *Buffer,
1490                                       sampleprof::ProfileSymbolList &PSL) {
1491   if (!Buffer)
1492     return;
1493 
1494   SmallVector<StringRef, 32> SymbolVec;
1495   StringRef Data = Buffer->getBuffer();
1496   Data.split(SymbolVec, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1497 
1498   for (StringRef SymbolStr : SymbolVec)
1499     PSL.add(SymbolStr.trim());
1500 }
1501 
1502 static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
1503                                   ProfileFormat OutputFormat,
1504                                   MemoryBuffer *Buffer,
1505                                   sampleprof::ProfileSymbolList &WriterList,
1506                                   bool CompressAllSections, bool UseMD5,
1507                                   bool GenPartialProfile) {
1508   if (SplitLayout) {
1509     if (OutputFormat == PF_Binary)
1510       warn("-split-layout is ignored. Specify -extbinary to enable it");
1511     else
1512       Writer.setUseCtxSplitLayout();
1513   }
1514 
1515   populateProfileSymbolList(Buffer, WriterList);
1516   if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary)
1517     warn("Profile Symbol list is not empty but the output format is not "
1518          "ExtBinary format. The list will be lost in the output. ");
1519 
1520   Writer.setProfileSymbolList(&WriterList);
1521 
1522   if (CompressAllSections) {
1523     if (OutputFormat != PF_Ext_Binary)
1524       warn("-compress-all-section is ignored. Specify -extbinary to enable it");
1525     else
1526       Writer.setToCompressAllSections();
1527   }
1528   if (UseMD5) {
1529     if (OutputFormat != PF_Ext_Binary)
1530       warn("-use-md5 is ignored. Specify -extbinary to enable it");
1531     else
1532       Writer.setUseMD5();
1533   }
1534   if (GenPartialProfile) {
1535     if (OutputFormat != PF_Ext_Binary)
1536       warn("-gen-partial-profile is ignored. Specify -extbinary to enable it");
1537     else
1538       Writer.setPartialProfile();
1539   }
1540 }
1541 
1542 static void mergeSampleProfile(const WeightedFileVector &Inputs,
1543                                SymbolRemapper *Remapper,
1544                                StringRef ProfileSymbolListFile,
1545                                size_t OutputSizeLimit) {
1546   using namespace sampleprof;
1547   SampleProfileMap ProfileMap;
1548   SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
1549   LLVMContext Context;
1550   sampleprof::ProfileSymbolList WriterList;
1551   std::optional<bool> ProfileIsProbeBased;
1552   std::optional<bool> ProfileIsCS;
1553   for (const auto &Input : Inputs) {
1554     auto FS = vfs::getRealFileSystem();
1555     auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, *FS,
1556                                                    FSDiscriminatorPassOption);
1557     if (std::error_code EC = ReaderOrErr.getError()) {
1558       warnOrExitGivenError(FailMode, EC, Input.Filename);
1559       continue;
1560     }
1561 
1562     // We need to keep the readers around until after all the files are
1563     // read so that we do not lose the function names stored in each
1564     // reader's memory. The function names are needed to write out the
1565     // merged profile map.
1566     Readers.push_back(std::move(ReaderOrErr.get()));
1567     const auto Reader = Readers.back().get();
1568     if (std::error_code EC = Reader->read()) {
1569       warnOrExitGivenError(FailMode, EC, Input.Filename);
1570       Readers.pop_back();
1571       continue;
1572     }
1573 
1574     SampleProfileMap &Profiles = Reader->getProfiles();
1575     if (ProfileIsProbeBased &&
1576         ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
1577       exitWithError(
1578           "cannot merge probe-based profile with non-probe-based profile");
1579     ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased;
1580     if (ProfileIsCS && ProfileIsCS != FunctionSamples::ProfileIsCS)
1581       exitWithError("cannot merge CS profile with non-CS profile");
1582     ProfileIsCS = FunctionSamples::ProfileIsCS;
1583     for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end();
1584          I != E; ++I) {
1585       sampleprof_error Result = sampleprof_error::success;
1586       FunctionSamples Remapped =
1587           Remapper ? remapSamples(I->second, *Remapper, Result)
1588                    : FunctionSamples();
1589       FunctionSamples &Samples = Remapper ? Remapped : I->second;
1590       SampleContext FContext = Samples.getContext();
1591       mergeSampleProfErrors(Result,
1592                             ProfileMap[FContext].merge(Samples, Input.Weight));
1593       if (Result != sampleprof_error::success) {
1594         std::error_code EC = make_error_code(Result);
1595         handleMergeWriterError(errorCodeToError(EC), Input.Filename,
1596                                FContext.toString());
1597       }
1598     }
1599 
1600     if (!DropProfileSymbolList) {
1601       std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
1602           Reader->getProfileSymbolList();
1603       if (ReaderList)
1604         WriterList.merge(*ReaderList);
1605     }
1606   }
1607 
1608   if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) {
1609     // Use threshold calculated from profile summary unless specified.
1610     SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1611     auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
1612     uint64_t SampleProfColdThreshold =
1613         ProfileSummaryBuilder::getColdCountThreshold(
1614             (Summary->getDetailedSummary()));
1615 
1616     // Trim and merge cold context profile using cold threshold above;
1617     SampleContextTrimmer(ProfileMap)
1618         .trimAndMergeColdContextProfiles(
1619             SampleProfColdThreshold, SampleTrimColdContext,
1620             SampleMergeColdContext, SampleColdContextFrameDepth, false);
1621   }
1622 
1623   if (ProfileLayout == llvm::sampleprof::SPL_Flat) {
1624     ProfileConverter::flattenProfile(ProfileMap, FunctionSamples::ProfileIsCS);
1625     ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1626   } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) {
1627     ProfileConverter CSConverter(ProfileMap);
1628     CSConverter.convertCSProfiles();
1629     ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1630   }
1631 
1632   filterFunctions(ProfileMap);
1633 
1634   auto WriterOrErr =
1635       SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
1636   if (std::error_code EC = WriterOrErr.getError())
1637     exitWithErrorCode(EC, OutputFilename);
1638 
1639   auto Writer = std::move(WriterOrErr.get());
1640   // WriterList will have StringRef refering to string in Buffer.
1641   // Make sure Buffer lives as long as WriterList.
1642   auto Buffer = getInputFileBuf(ProfileSymbolListFile);
1643   handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList,
1644                         CompressAllSections, UseMD5, GenPartialProfile);
1645 
1646   // If OutputSizeLimit is 0 (default), it is the same as write().
1647   if (std::error_code EC =
1648           Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit))
1649     exitWithErrorCode(EC);
1650 }
1651 
1652 static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
1653   StringRef WeightStr, FileName;
1654   std::tie(WeightStr, FileName) = WeightedFilename.split(',');
1655 
1656   uint64_t Weight;
1657   if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
1658     exitWithError("input weight must be a positive integer");
1659 
1660   return {std::string(FileName), Weight};
1661 }
1662 
1663 static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) {
1664   StringRef Filename = WF.Filename;
1665   uint64_t Weight = WF.Weight;
1666 
1667   // If it's STDIN just pass it on.
1668   if (Filename == "-") {
1669     WNI.push_back({std::string(Filename), Weight});
1670     return;
1671   }
1672 
1673   llvm::sys::fs::file_status Status;
1674   llvm::sys::fs::status(Filename, Status);
1675   if (!llvm::sys::fs::exists(Status))
1676     exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
1677                       Filename);
1678   // If it's a source file, collect it.
1679   if (llvm::sys::fs::is_regular_file(Status)) {
1680     WNI.push_back({std::string(Filename), Weight});
1681     return;
1682   }
1683 
1684   if (llvm::sys::fs::is_directory(Status)) {
1685     std::error_code EC;
1686     for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E;
1687          F != E && !EC; F.increment(EC)) {
1688       if (llvm::sys::fs::is_regular_file(F->path())) {
1689         addWeightedInput(WNI, {F->path(), Weight});
1690       }
1691     }
1692     if (EC)
1693       exitWithErrorCode(EC, Filename);
1694   }
1695 }
1696 
1697 static void parseInputFilenamesFile(MemoryBuffer *Buffer,
1698                                     WeightedFileVector &WFV) {
1699   if (!Buffer)
1700     return;
1701 
1702   SmallVector<StringRef, 8> Entries;
1703   StringRef Data = Buffer->getBuffer();
1704   Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1705   for (const StringRef &FileWeightEntry : Entries) {
1706     StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r");
1707     // Skip comments.
1708     if (SanitizedEntry.starts_with("#"))
1709       continue;
1710     // If there's no comma, it's an unweighted profile.
1711     else if (!SanitizedEntry.contains(','))
1712       addWeightedInput(WFV, {std::string(SanitizedEntry), 1});
1713     else
1714       addWeightedInput(WFV, parseWeightedFile(SanitizedEntry));
1715   }
1716 }
1717 
1718 static int merge_main(StringRef ProgName) {
1719   WeightedFileVector WeightedInputs;
1720   for (StringRef Filename : InputFilenames)
1721     addWeightedInput(WeightedInputs, {std::string(Filename), 1});
1722   for (StringRef WeightedFilename : WeightedInputFilenames)
1723     addWeightedInput(WeightedInputs, parseWeightedFile(WeightedFilename));
1724 
1725   // Make sure that the file buffer stays alive for the duration of the
1726   // weighted input vector's lifetime.
1727   auto Buffer = getInputFileBuf(InputFilenamesFile);
1728   parseInputFilenamesFile(Buffer.get(), WeightedInputs);
1729 
1730   if (WeightedInputs.empty())
1731     exitWithError("no input files specified. See " + ProgName + " merge -help");
1732 
1733   if (DumpInputFileList) {
1734     for (auto &WF : WeightedInputs)
1735       outs() << WF.Weight << "," << WF.Filename << "\n";
1736     return 0;
1737   }
1738 
1739   std::unique_ptr<SymbolRemapper> Remapper;
1740   if (!RemappingFile.empty())
1741     Remapper = SymbolRemapper::create(RemappingFile);
1742 
1743   if (!SupplInstrWithSample.empty()) {
1744     if (ProfileKind != instr)
1745       exitWithError(
1746           "-supplement-instr-with-sample can only work with -instr. ");
1747 
1748     supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputSparse,
1749                            SupplMinSizeThreshold, ZeroCounterThreshold,
1750                            InstrProfColdThreshold);
1751     return 0;
1752   }
1753 
1754   if (ProfileKind == instr)
1755     mergeInstrProfile(WeightedInputs, Remapper.get(), MaxDbgCorrelationWarnings,
1756                       ProfiledBinary);
1757   else
1758     mergeSampleProfile(WeightedInputs, Remapper.get(), ProfileSymbolListFile,
1759                        OutputSizeLimit);
1760   return 0;
1761 }
1762 
1763 /// Computer the overlap b/w profile BaseFilename and profile TestFilename.
1764 static void overlapInstrProfile(const std::string &BaseFilename,
1765                                 const std::string &TestFilename,
1766                                 const OverlapFuncFilters &FuncFilter,
1767                                 raw_fd_ostream &OS, bool IsCS) {
1768   std::mutex ErrorLock;
1769   SmallSet<instrprof_error, 4> WriterErrorCodes;
1770   WriterContext Context(false, ErrorLock, WriterErrorCodes);
1771   WeightedFile WeightedInput{BaseFilename, 1};
1772   OverlapStats Overlap;
1773   Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS);
1774   if (E)
1775     exitWithError(std::move(E), "error in getting profile count sums");
1776   if (Overlap.Base.CountSum < 1.0f) {
1777     OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n";
1778     exit(0);
1779   }
1780   if (Overlap.Test.CountSum < 1.0f) {
1781     OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
1782     exit(0);
1783   }
1784   loadInput(WeightedInput, nullptr, nullptr, /*ProfiledBinary=*/"", &Context);
1785   overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS,
1786                IsCS);
1787   Overlap.dump(OS);
1788 }
1789 
1790 namespace {
1791 struct SampleOverlapStats {
1792   SampleContext BaseName;
1793   SampleContext TestName;
1794   // Number of overlap units
1795   uint64_t OverlapCount = 0;
1796   // Total samples of overlap units
1797   uint64_t OverlapSample = 0;
1798   // Number of and total samples of units that only present in base or test
1799   // profile
1800   uint64_t BaseUniqueCount = 0;
1801   uint64_t BaseUniqueSample = 0;
1802   uint64_t TestUniqueCount = 0;
1803   uint64_t TestUniqueSample = 0;
1804   // Number of units and total samples in base or test profile
1805   uint64_t BaseCount = 0;
1806   uint64_t BaseSample = 0;
1807   uint64_t TestCount = 0;
1808   uint64_t TestSample = 0;
1809   // Number of and total samples of units that present in at least one profile
1810   uint64_t UnionCount = 0;
1811   uint64_t UnionSample = 0;
1812   // Weighted similarity
1813   double Similarity = 0.0;
1814   // For SampleOverlapStats instances representing functions, weights of the
1815   // function in base and test profiles
1816   double BaseWeight = 0.0;
1817   double TestWeight = 0.0;
1818 
1819   SampleOverlapStats() = default;
1820 };
1821 } // end anonymous namespace
1822 
1823 namespace {
1824 struct FuncSampleStats {
1825   uint64_t SampleSum = 0;
1826   uint64_t MaxSample = 0;
1827   uint64_t HotBlockCount = 0;
1828   FuncSampleStats() = default;
1829   FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample,
1830                   uint64_t HotBlockCount)
1831       : SampleSum(SampleSum), MaxSample(MaxSample),
1832         HotBlockCount(HotBlockCount) {}
1833 };
1834 } // end anonymous namespace
1835 
1836 namespace {
1837 enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None };
1838 
1839 // Class for updating merging steps for two sorted maps. The class should be
1840 // instantiated with a map iterator type.
1841 template <class T> class MatchStep {
1842 public:
1843   MatchStep() = delete;
1844 
1845   MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd)
1846       : FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter),
1847         SecondEnd(SecondEnd), Status(MS_None) {}
1848 
1849   bool areBothFinished() const {
1850     return (FirstIter == FirstEnd && SecondIter == SecondEnd);
1851   }
1852 
1853   bool isFirstFinished() const { return FirstIter == FirstEnd; }
1854 
1855   bool isSecondFinished() const { return SecondIter == SecondEnd; }
1856 
1857   /// Advance one step based on the previous match status unless the previous
1858   /// status is MS_None. Then update Status based on the comparison between two
1859   /// container iterators at the current step. If the previous status is
1860   /// MS_None, it means two iterators are at the beginning and no comparison has
1861   /// been made, so we simply update Status without advancing the iterators.
1862   void updateOneStep();
1863 
1864   T getFirstIter() const { return FirstIter; }
1865 
1866   T getSecondIter() const { return SecondIter; }
1867 
1868   MatchStatus getMatchStatus() const { return Status; }
1869 
1870 private:
1871   // Current iterator and end iterator of the first container.
1872   T FirstIter;
1873   T FirstEnd;
1874   // Current iterator and end iterator of the second container.
1875   T SecondIter;
1876   T SecondEnd;
1877   // Match status of the current step.
1878   MatchStatus Status;
1879 };
1880 } // end anonymous namespace
1881 
1882 template <class T> void MatchStep<T>::updateOneStep() {
1883   switch (Status) {
1884   case MS_Match:
1885     ++FirstIter;
1886     ++SecondIter;
1887     break;
1888   case MS_FirstUnique:
1889     ++FirstIter;
1890     break;
1891   case MS_SecondUnique:
1892     ++SecondIter;
1893     break;
1894   case MS_None:
1895     break;
1896   }
1897 
1898   // Update Status according to iterators at the current step.
1899   if (areBothFinished())
1900     return;
1901   if (FirstIter != FirstEnd &&
1902       (SecondIter == SecondEnd || FirstIter->first < SecondIter->first))
1903     Status = MS_FirstUnique;
1904   else if (SecondIter != SecondEnd &&
1905            (FirstIter == FirstEnd || SecondIter->first < FirstIter->first))
1906     Status = MS_SecondUnique;
1907   else
1908     Status = MS_Match;
1909 }
1910 
1911 // Return the sum of line/block samples, the max line/block sample, and the
1912 // number of line/block samples above the given threshold in a function
1913 // including its inlinees.
1914 static void getFuncSampleStats(const sampleprof::FunctionSamples &Func,
1915                                FuncSampleStats &FuncStats,
1916                                uint64_t HotThreshold) {
1917   for (const auto &L : Func.getBodySamples()) {
1918     uint64_t Sample = L.second.getSamples();
1919     FuncStats.SampleSum += Sample;
1920     FuncStats.MaxSample = std::max(FuncStats.MaxSample, Sample);
1921     if (Sample >= HotThreshold)
1922       ++FuncStats.HotBlockCount;
1923   }
1924 
1925   for (const auto &C : Func.getCallsiteSamples()) {
1926     for (const auto &F : C.second)
1927       getFuncSampleStats(F.second, FuncStats, HotThreshold);
1928   }
1929 }
1930 
1931 /// Predicate that determines if a function is hot with a given threshold. We
1932 /// keep it separate from its callsites for possible extension in the future.
1933 static bool isFunctionHot(const FuncSampleStats &FuncStats,
1934                           uint64_t HotThreshold) {
1935   // We intentionally compare the maximum sample count in a function with the
1936   // HotThreshold to get an approximate determination on hot functions.
1937   return (FuncStats.MaxSample >= HotThreshold);
1938 }
1939 
1940 namespace {
1941 class SampleOverlapAggregator {
1942 public:
1943   SampleOverlapAggregator(const std::string &BaseFilename,
1944                           const std::string &TestFilename,
1945                           double LowSimilarityThreshold, double Epsilon,
1946                           const OverlapFuncFilters &FuncFilter)
1947       : BaseFilename(BaseFilename), TestFilename(TestFilename),
1948         LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon),
1949         FuncFilter(FuncFilter) {}
1950 
1951   /// Detect 0-sample input profile and report to output stream. This interface
1952   /// should be called after loadProfiles().
1953   bool detectZeroSampleProfile(raw_fd_ostream &OS) const;
1954 
1955   /// Write out function-level similarity statistics for functions specified by
1956   /// options --function, --value-cutoff, and --similarity-cutoff.
1957   void dumpFuncSimilarity(raw_fd_ostream &OS) const;
1958 
1959   /// Write out program-level similarity and overlap statistics.
1960   void dumpProgramSummary(raw_fd_ostream &OS) const;
1961 
1962   /// Write out hot-function and hot-block statistics for base_profile,
1963   /// test_profile, and their overlap. For both cases, the overlap HO is
1964   /// calculated as follows:
1965   ///    Given the number of functions (or blocks) that are hot in both profiles
1966   ///    HCommon and the number of functions (or blocks) that are hot in at
1967   ///    least one profile HUnion, HO = HCommon / HUnion.
1968   void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const;
1969 
1970   /// This function tries matching functions in base and test profiles. For each
1971   /// pair of matched functions, it aggregates the function-level
1972   /// similarity into a profile-level similarity. It also dump function-level
1973   /// similarity information of functions specified by --function,
1974   /// --value-cutoff, and --similarity-cutoff options. The program-level
1975   /// similarity PS is computed as follows:
1976   ///     Given function-level similarity FS(A) for all function A, the
1977   ///     weight of function A in base profile WB(A), and the weight of function
1978   ///     A in test profile WT(A), compute PS(base_profile, test_profile) =
1979   ///     sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
1980   ///     meaning no-overlap.
1981   void computeSampleProfileOverlap(raw_fd_ostream &OS);
1982 
1983   /// Initialize ProfOverlap with the sum of samples in base and test
1984   /// profiles. This function also computes and keeps the sum of samples and
1985   /// max sample counts of each function in BaseStats and TestStats for later
1986   /// use to avoid re-computations.
1987   void initializeSampleProfileOverlap();
1988 
1989   /// Load profiles specified by BaseFilename and TestFilename.
1990   std::error_code loadProfiles();
1991 
1992   using FuncSampleStatsMap =
1993       std::unordered_map<SampleContext, FuncSampleStats, SampleContext::Hash>;
1994 
1995 private:
1996   SampleOverlapStats ProfOverlap;
1997   SampleOverlapStats HotFuncOverlap;
1998   SampleOverlapStats HotBlockOverlap;
1999   std::string BaseFilename;
2000   std::string TestFilename;
2001   std::unique_ptr<sampleprof::SampleProfileReader> BaseReader;
2002   std::unique_ptr<sampleprof::SampleProfileReader> TestReader;
2003   // BaseStats and TestStats hold FuncSampleStats for each function, with
2004   // function name as the key.
2005   FuncSampleStatsMap BaseStats;
2006   FuncSampleStatsMap TestStats;
2007   // Low similarity threshold in floating point number
2008   double LowSimilarityThreshold;
2009   // Block samples above BaseHotThreshold or TestHotThreshold are considered hot
2010   // for tracking hot blocks.
2011   uint64_t BaseHotThreshold;
2012   uint64_t TestHotThreshold;
2013   // A small threshold used to round the results of floating point accumulations
2014   // to resolve imprecision.
2015   const double Epsilon;
2016   std::multimap<double, SampleOverlapStats, std::greater<double>>
2017       FuncSimilarityDump;
2018   // FuncFilter carries specifications in options --value-cutoff and
2019   // --function.
2020   OverlapFuncFilters FuncFilter;
2021   // Column offsets for printing the function-level details table.
2022   static const unsigned int TestWeightCol = 15;
2023   static const unsigned int SimilarityCol = 30;
2024   static const unsigned int OverlapCol = 43;
2025   static const unsigned int BaseUniqueCol = 53;
2026   static const unsigned int TestUniqueCol = 67;
2027   static const unsigned int BaseSampleCol = 81;
2028   static const unsigned int TestSampleCol = 96;
2029   static const unsigned int FuncNameCol = 111;
2030 
2031   /// Return a similarity of two line/block sample counters in the same
2032   /// function in base and test profiles. The line/block-similarity BS(i) is
2033   /// computed as follows:
2034   ///    For an offsets i, given the sample count at i in base profile BB(i),
2035   ///    the sample count at i in test profile BT(i), the sum of sample counts
2036   ///    in this function in base profile SB, and the sum of sample counts in
2037   ///    this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
2038   ///    BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
2039   double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample,
2040                                 const SampleOverlapStats &FuncOverlap) const;
2041 
2042   void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample,
2043                              uint64_t HotBlockCount);
2044 
2045   void getHotFunctions(const FuncSampleStatsMap &ProfStats,
2046                        FuncSampleStatsMap &HotFunc,
2047                        uint64_t HotThreshold) const;
2048 
2049   void computeHotFuncOverlap();
2050 
2051   /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2052   /// Difference for two sample units in a matched function according to the
2053   /// given match status.
2054   void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample,
2055                                      uint64_t HotBlockCount,
2056                                      SampleOverlapStats &FuncOverlap,
2057                                      double &Difference, MatchStatus Status);
2058 
2059   /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2060   /// Difference for unmatched callees that only present in one profile in a
2061   /// matched caller function.
2062   void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func,
2063                                 SampleOverlapStats &FuncOverlap,
2064                                 double &Difference, MatchStatus Status);
2065 
2066   /// This function updates sample overlap statistics of an overlap function in
2067   /// base and test profile. It also calculates a function-internal similarity
2068   /// FIS as follows:
2069   ///    For offsets i that have samples in at least one profile in this
2070   ///    function A, given BS(i) returned by computeBlockSimilarity(), compute
2071   ///    FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
2072   ///    0.0 meaning no overlap.
2073   double computeSampleFunctionInternalOverlap(
2074       const sampleprof::FunctionSamples &BaseFunc,
2075       const sampleprof::FunctionSamples &TestFunc,
2076       SampleOverlapStats &FuncOverlap);
2077 
2078   /// Function-level similarity (FS) is a weighted value over function internal
2079   /// similarity (FIS). This function computes a function's FS from its FIS by
2080   /// applying the weight.
2081   double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample,
2082                                  uint64_t TestFuncSample) const;
2083 
2084   /// The function-level similarity FS(A) for a function A is computed as
2085   /// follows:
2086   ///     Compute a function-internal similarity FIS(A) by
2087   ///     computeSampleFunctionInternalOverlap(). Then, with the weight of
2088   ///     function A in base profile WB(A), and the weight of function A in test
2089   ///     profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
2090   ///     ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
2091   double
2092   computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc,
2093                                const sampleprof::FunctionSamples *TestFunc,
2094                                SampleOverlapStats *FuncOverlap,
2095                                uint64_t BaseFuncSample,
2096                                uint64_t TestFuncSample);
2097 
2098   /// Profile-level similarity (PS) is a weighted aggregate over function-level
2099   /// similarities (FS). This method weights the FS value by the function
2100   /// weights in the base and test profiles for the aggregation.
2101   double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample,
2102                             uint64_t TestFuncSample) const;
2103 };
2104 } // end anonymous namespace
2105 
2106 bool SampleOverlapAggregator::detectZeroSampleProfile(
2107     raw_fd_ostream &OS) const {
2108   bool HaveZeroSample = false;
2109   if (ProfOverlap.BaseSample == 0) {
2110     OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n";
2111     HaveZeroSample = true;
2112   }
2113   if (ProfOverlap.TestSample == 0) {
2114     OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n";
2115     HaveZeroSample = true;
2116   }
2117   return HaveZeroSample;
2118 }
2119 
2120 double SampleOverlapAggregator::computeBlockSimilarity(
2121     uint64_t BaseSample, uint64_t TestSample,
2122     const SampleOverlapStats &FuncOverlap) const {
2123   double BaseFrac = 0.0;
2124   double TestFrac = 0.0;
2125   if (FuncOverlap.BaseSample > 0)
2126     BaseFrac = static_cast<double>(BaseSample) / FuncOverlap.BaseSample;
2127   if (FuncOverlap.TestSample > 0)
2128     TestFrac = static_cast<double>(TestSample) / FuncOverlap.TestSample;
2129   return 1.0 - std::fabs(BaseFrac - TestFrac);
2130 }
2131 
2132 void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample,
2133                                                     uint64_t TestSample,
2134                                                     uint64_t HotBlockCount) {
2135   bool IsBaseHot = (BaseSample >= BaseHotThreshold);
2136   bool IsTestHot = (TestSample >= TestHotThreshold);
2137   if (!IsBaseHot && !IsTestHot)
2138     return;
2139 
2140   HotBlockOverlap.UnionCount += HotBlockCount;
2141   if (IsBaseHot)
2142     HotBlockOverlap.BaseCount += HotBlockCount;
2143   if (IsTestHot)
2144     HotBlockOverlap.TestCount += HotBlockCount;
2145   if (IsBaseHot && IsTestHot)
2146     HotBlockOverlap.OverlapCount += HotBlockCount;
2147 }
2148 
2149 void SampleOverlapAggregator::getHotFunctions(
2150     const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc,
2151     uint64_t HotThreshold) const {
2152   for (const auto &F : ProfStats) {
2153     if (isFunctionHot(F.second, HotThreshold))
2154       HotFunc.emplace(F.first, F.second);
2155   }
2156 }
2157 
2158 void SampleOverlapAggregator::computeHotFuncOverlap() {
2159   FuncSampleStatsMap BaseHotFunc;
2160   getHotFunctions(BaseStats, BaseHotFunc, BaseHotThreshold);
2161   HotFuncOverlap.BaseCount = BaseHotFunc.size();
2162 
2163   FuncSampleStatsMap TestHotFunc;
2164   getHotFunctions(TestStats, TestHotFunc, TestHotThreshold);
2165   HotFuncOverlap.TestCount = TestHotFunc.size();
2166   HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount;
2167 
2168   for (const auto &F : BaseHotFunc) {
2169     if (TestHotFunc.count(F.first))
2170       ++HotFuncOverlap.OverlapCount;
2171     else
2172       ++HotFuncOverlap.UnionCount;
2173   }
2174 }
2175 
2176 void SampleOverlapAggregator::updateOverlapStatsForFunction(
2177     uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount,
2178     SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) {
2179   assert(Status != MS_None &&
2180          "Match status should be updated before updating overlap statistics");
2181   if (Status == MS_FirstUnique) {
2182     TestSample = 0;
2183     FuncOverlap.BaseUniqueSample += BaseSample;
2184   } else if (Status == MS_SecondUnique) {
2185     BaseSample = 0;
2186     FuncOverlap.TestUniqueSample += TestSample;
2187   } else {
2188     ++FuncOverlap.OverlapCount;
2189   }
2190 
2191   FuncOverlap.UnionSample += std::max(BaseSample, TestSample);
2192   FuncOverlap.OverlapSample += std::min(BaseSample, TestSample);
2193   Difference +=
2194       1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap);
2195   updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount);
2196 }
2197 
2198 void SampleOverlapAggregator::updateForUnmatchedCallee(
2199     const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap,
2200     double &Difference, MatchStatus Status) {
2201   assert((Status == MS_FirstUnique || Status == MS_SecondUnique) &&
2202          "Status must be either of the two unmatched cases");
2203   FuncSampleStats FuncStats;
2204   if (Status == MS_FirstUnique) {
2205     getFuncSampleStats(Func, FuncStats, BaseHotThreshold);
2206     updateOverlapStatsForFunction(FuncStats.SampleSum, 0,
2207                                   FuncStats.HotBlockCount, FuncOverlap,
2208                                   Difference, Status);
2209   } else {
2210     getFuncSampleStats(Func, FuncStats, TestHotThreshold);
2211     updateOverlapStatsForFunction(0, FuncStats.SampleSum,
2212                                   FuncStats.HotBlockCount, FuncOverlap,
2213                                   Difference, Status);
2214   }
2215 }
2216 
2217 double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
2218     const sampleprof::FunctionSamples &BaseFunc,
2219     const sampleprof::FunctionSamples &TestFunc,
2220     SampleOverlapStats &FuncOverlap) {
2221 
2222   using namespace sampleprof;
2223 
2224   double Difference = 0;
2225 
2226   // Accumulate Difference for regular line/block samples in the function.
2227   // We match them through sort-merge join algorithm because
2228   // FunctionSamples::getBodySamples() returns a map of sample counters ordered
2229   // by their offsets.
2230   MatchStep<BodySampleMap::const_iterator> BlockIterStep(
2231       BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(),
2232       TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend());
2233   BlockIterStep.updateOneStep();
2234   while (!BlockIterStep.areBothFinished()) {
2235     uint64_t BaseSample =
2236         BlockIterStep.isFirstFinished()
2237             ? 0
2238             : BlockIterStep.getFirstIter()->second.getSamples();
2239     uint64_t TestSample =
2240         BlockIterStep.isSecondFinished()
2241             ? 0
2242             : BlockIterStep.getSecondIter()->second.getSamples();
2243     updateOverlapStatsForFunction(BaseSample, TestSample, 1, FuncOverlap,
2244                                   Difference, BlockIterStep.getMatchStatus());
2245 
2246     BlockIterStep.updateOneStep();
2247   }
2248 
2249   // Accumulate Difference for callsite lines in the function. We match
2250   // them through sort-merge algorithm because
2251   // FunctionSamples::getCallsiteSamples() returns a map of callsite records
2252   // ordered by their offsets.
2253   MatchStep<CallsiteSampleMap::const_iterator> CallsiteIterStep(
2254       BaseFunc.getCallsiteSamples().cbegin(),
2255       BaseFunc.getCallsiteSamples().cend(),
2256       TestFunc.getCallsiteSamples().cbegin(),
2257       TestFunc.getCallsiteSamples().cend());
2258   CallsiteIterStep.updateOneStep();
2259   while (!CallsiteIterStep.areBothFinished()) {
2260     MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus();
2261     assert(CallsiteStepStatus != MS_None &&
2262            "Match status should be updated before entering loop body");
2263 
2264     if (CallsiteStepStatus != MS_Match) {
2265       auto Callsite = (CallsiteStepStatus == MS_FirstUnique)
2266                           ? CallsiteIterStep.getFirstIter()
2267                           : CallsiteIterStep.getSecondIter();
2268       for (const auto &F : Callsite->second)
2269         updateForUnmatchedCallee(F.second, FuncOverlap, Difference,
2270                                  CallsiteStepStatus);
2271     } else {
2272       // There may be multiple inlinees at the same offset, so we need to try
2273       // matching all of them. This match is implemented through sort-merge
2274       // algorithm because callsite records at the same offset are ordered by
2275       // function names.
2276       MatchStep<FunctionSamplesMap::const_iterator> CalleeIterStep(
2277           CallsiteIterStep.getFirstIter()->second.cbegin(),
2278           CallsiteIterStep.getFirstIter()->second.cend(),
2279           CallsiteIterStep.getSecondIter()->second.cbegin(),
2280           CallsiteIterStep.getSecondIter()->second.cend());
2281       CalleeIterStep.updateOneStep();
2282       while (!CalleeIterStep.areBothFinished()) {
2283         MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus();
2284         if (CalleeStepStatus != MS_Match) {
2285           auto Callee = (CalleeStepStatus == MS_FirstUnique)
2286                             ? CalleeIterStep.getFirstIter()
2287                             : CalleeIterStep.getSecondIter();
2288           updateForUnmatchedCallee(Callee->second, FuncOverlap, Difference,
2289                                    CalleeStepStatus);
2290         } else {
2291           // An inlined function can contain other inlinees inside, so compute
2292           // the Difference recursively.
2293           Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap(
2294                                       CalleeIterStep.getFirstIter()->second,
2295                                       CalleeIterStep.getSecondIter()->second,
2296                                       FuncOverlap);
2297         }
2298         CalleeIterStep.updateOneStep();
2299       }
2300     }
2301     CallsiteIterStep.updateOneStep();
2302   }
2303 
2304   // Difference reflects the total differences of line/block samples in this
2305   // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
2306   // reflect the similarity between function profiles in [0.0f to 1.0f].
2307   return (2.0 - Difference) / 2;
2308 }
2309 
2310 double SampleOverlapAggregator::weightForFuncSimilarity(
2311     double FuncInternalSimilarity, uint64_t BaseFuncSample,
2312     uint64_t TestFuncSample) const {
2313   // Compute the weight as the distance between the function weights in two
2314   // profiles.
2315   double BaseFrac = 0.0;
2316   double TestFrac = 0.0;
2317   assert(ProfOverlap.BaseSample > 0 &&
2318          "Total samples in base profile should be greater than 0");
2319   BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample;
2320   assert(ProfOverlap.TestSample > 0 &&
2321          "Total samples in test profile should be greater than 0");
2322   TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample;
2323   double WeightDistance = std::fabs(BaseFrac - TestFrac);
2324 
2325   // Take WeightDistance into the similarity.
2326   return FuncInternalSimilarity * (1 - WeightDistance);
2327 }
2328 
2329 double
2330 SampleOverlapAggregator::weightByImportance(double FuncSimilarity,
2331                                             uint64_t BaseFuncSample,
2332                                             uint64_t TestFuncSample) const {
2333 
2334   double BaseFrac = 0.0;
2335   double TestFrac = 0.0;
2336   assert(ProfOverlap.BaseSample > 0 &&
2337          "Total samples in base profile should be greater than 0");
2338   BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample / 2.0;
2339   assert(ProfOverlap.TestSample > 0 &&
2340          "Total samples in test profile should be greater than 0");
2341   TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample / 2.0;
2342   return FuncSimilarity * (BaseFrac + TestFrac);
2343 }
2344 
2345 double SampleOverlapAggregator::computeSampleFunctionOverlap(
2346     const sampleprof::FunctionSamples *BaseFunc,
2347     const sampleprof::FunctionSamples *TestFunc,
2348     SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample,
2349     uint64_t TestFuncSample) {
2350   // Default function internal similarity before weighted, meaning two functions
2351   // has no overlap.
2352   const double DefaultFuncInternalSimilarity = 0;
2353   double FuncSimilarity;
2354   double FuncInternalSimilarity;
2355 
2356   // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
2357   // In this case, we use DefaultFuncInternalSimilarity as the function internal
2358   // similarity.
2359   if (!BaseFunc || !TestFunc) {
2360     FuncInternalSimilarity = DefaultFuncInternalSimilarity;
2361   } else {
2362     assert(FuncOverlap != nullptr &&
2363            "FuncOverlap should be provided in this case");
2364     FuncInternalSimilarity = computeSampleFunctionInternalOverlap(
2365         *BaseFunc, *TestFunc, *FuncOverlap);
2366     // Now, FuncInternalSimilarity may be a little less than 0 due to
2367     // imprecision of floating point accumulations. Make it zero if the
2368     // difference is below Epsilon.
2369     FuncInternalSimilarity = (std::fabs(FuncInternalSimilarity - 0) < Epsilon)
2370                                  ? 0
2371                                  : FuncInternalSimilarity;
2372   }
2373   FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity,
2374                                            BaseFuncSample, TestFuncSample);
2375   return FuncSimilarity;
2376 }
2377 
2378 void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
2379   using namespace sampleprof;
2380 
2381   std::unordered_map<SampleContext, const FunctionSamples *,
2382                      SampleContext::Hash>
2383       BaseFuncProf;
2384   const auto &BaseProfiles = BaseReader->getProfiles();
2385   for (const auto &BaseFunc : BaseProfiles) {
2386     BaseFuncProf.emplace(BaseFunc.second.getContext(), &(BaseFunc.second));
2387   }
2388   ProfOverlap.UnionCount = BaseFuncProf.size();
2389 
2390   const auto &TestProfiles = TestReader->getProfiles();
2391   for (const auto &TestFunc : TestProfiles) {
2392     SampleOverlapStats FuncOverlap;
2393     FuncOverlap.TestName = TestFunc.second.getContext();
2394     assert(TestStats.count(FuncOverlap.TestName) &&
2395            "TestStats should have records for all functions in test profile "
2396            "except inlinees");
2397     FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum;
2398 
2399     bool Matched = false;
2400     const auto Match = BaseFuncProf.find(FuncOverlap.TestName);
2401     if (Match == BaseFuncProf.end()) {
2402       const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName];
2403       ++ProfOverlap.TestUniqueCount;
2404       ProfOverlap.TestUniqueSample += FuncStats.SampleSum;
2405       FuncOverlap.TestUniqueSample = FuncStats.SampleSum;
2406 
2407       updateHotBlockOverlap(0, FuncStats.SampleSum, FuncStats.HotBlockCount);
2408 
2409       double FuncSimilarity = computeSampleFunctionOverlap(
2410           nullptr, nullptr, nullptr, 0, FuncStats.SampleSum);
2411       ProfOverlap.Similarity +=
2412           weightByImportance(FuncSimilarity, 0, FuncStats.SampleSum);
2413 
2414       ++ProfOverlap.UnionCount;
2415       ProfOverlap.UnionSample += FuncStats.SampleSum;
2416     } else {
2417       ++ProfOverlap.OverlapCount;
2418 
2419       // Two functions match with each other. Compute function-level overlap and
2420       // aggregate them into profile-level overlap.
2421       FuncOverlap.BaseName = Match->second->getContext();
2422       assert(BaseStats.count(FuncOverlap.BaseName) &&
2423              "BaseStats should have records for all functions in base profile "
2424              "except inlinees");
2425       FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum;
2426 
2427       FuncOverlap.Similarity = computeSampleFunctionOverlap(
2428           Match->second, &TestFunc.second, &FuncOverlap, FuncOverlap.BaseSample,
2429           FuncOverlap.TestSample);
2430       ProfOverlap.Similarity +=
2431           weightByImportance(FuncOverlap.Similarity, FuncOverlap.BaseSample,
2432                              FuncOverlap.TestSample);
2433       ProfOverlap.OverlapSample += FuncOverlap.OverlapSample;
2434       ProfOverlap.UnionSample += FuncOverlap.UnionSample;
2435 
2436       // Accumulate the percentage of base unique and test unique samples into
2437       // ProfOverlap.
2438       ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample;
2439       ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample;
2440 
2441       // Remove matched base functions for later reporting functions not found
2442       // in test profile.
2443       BaseFuncProf.erase(Match);
2444       Matched = true;
2445     }
2446 
2447     // Print function-level similarity information if specified by options.
2448     assert(TestStats.count(FuncOverlap.TestName) &&
2449            "TestStats should have records for all functions in test profile "
2450            "except inlinees");
2451     if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff ||
2452         (Matched && FuncOverlap.Similarity < LowSimilarityThreshold) ||
2453         (Matched && !FuncFilter.NameFilter.empty() &&
2454          FuncOverlap.BaseName.toString().find(FuncFilter.NameFilter) !=
2455              std::string::npos)) {
2456       assert(ProfOverlap.BaseSample > 0 &&
2457              "Total samples in base profile should be greater than 0");
2458       FuncOverlap.BaseWeight =
2459           static_cast<double>(FuncOverlap.BaseSample) / ProfOverlap.BaseSample;
2460       assert(ProfOverlap.TestSample > 0 &&
2461              "Total samples in test profile should be greater than 0");
2462       FuncOverlap.TestWeight =
2463           static_cast<double>(FuncOverlap.TestSample) / ProfOverlap.TestSample;
2464       FuncSimilarityDump.emplace(FuncOverlap.BaseWeight, FuncOverlap);
2465     }
2466   }
2467 
2468   // Traverse through functions in base profile but not in test profile.
2469   for (const auto &F : BaseFuncProf) {
2470     assert(BaseStats.count(F.second->getContext()) &&
2471            "BaseStats should have records for all functions in base profile "
2472            "except inlinees");
2473     const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()];
2474     ++ProfOverlap.BaseUniqueCount;
2475     ProfOverlap.BaseUniqueSample += FuncStats.SampleSum;
2476 
2477     updateHotBlockOverlap(FuncStats.SampleSum, 0, FuncStats.HotBlockCount);
2478 
2479     double FuncSimilarity = computeSampleFunctionOverlap(
2480         nullptr, nullptr, nullptr, FuncStats.SampleSum, 0);
2481     ProfOverlap.Similarity +=
2482         weightByImportance(FuncSimilarity, FuncStats.SampleSum, 0);
2483 
2484     ProfOverlap.UnionSample += FuncStats.SampleSum;
2485   }
2486 
2487   // Now, ProfSimilarity may be a little greater than 1 due to imprecision
2488   // of floating point accumulations. Make it 1.0 if the difference is below
2489   // Epsilon.
2490   ProfOverlap.Similarity = (std::fabs(ProfOverlap.Similarity - 1) < Epsilon)
2491                                ? 1
2492                                : ProfOverlap.Similarity;
2493 
2494   computeHotFuncOverlap();
2495 }
2496 
2497 void SampleOverlapAggregator::initializeSampleProfileOverlap() {
2498   const auto &BaseProf = BaseReader->getProfiles();
2499   for (const auto &I : BaseProf) {
2500     ++ProfOverlap.BaseCount;
2501     FuncSampleStats FuncStats;
2502     getFuncSampleStats(I.second, FuncStats, BaseHotThreshold);
2503     ProfOverlap.BaseSample += FuncStats.SampleSum;
2504     BaseStats.emplace(I.second.getContext(), FuncStats);
2505   }
2506 
2507   const auto &TestProf = TestReader->getProfiles();
2508   for (const auto &I : TestProf) {
2509     ++ProfOverlap.TestCount;
2510     FuncSampleStats FuncStats;
2511     getFuncSampleStats(I.second, FuncStats, TestHotThreshold);
2512     ProfOverlap.TestSample += FuncStats.SampleSum;
2513     TestStats.emplace(I.second.getContext(), FuncStats);
2514   }
2515 
2516   ProfOverlap.BaseName = StringRef(BaseFilename);
2517   ProfOverlap.TestName = StringRef(TestFilename);
2518 }
2519 
2520 void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const {
2521   using namespace sampleprof;
2522 
2523   if (FuncSimilarityDump.empty())
2524     return;
2525 
2526   formatted_raw_ostream FOS(OS);
2527   FOS << "Function-level details:\n";
2528   FOS << "Base weight";
2529   FOS.PadToColumn(TestWeightCol);
2530   FOS << "Test weight";
2531   FOS.PadToColumn(SimilarityCol);
2532   FOS << "Similarity";
2533   FOS.PadToColumn(OverlapCol);
2534   FOS << "Overlap";
2535   FOS.PadToColumn(BaseUniqueCol);
2536   FOS << "Base unique";
2537   FOS.PadToColumn(TestUniqueCol);
2538   FOS << "Test unique";
2539   FOS.PadToColumn(BaseSampleCol);
2540   FOS << "Base samples";
2541   FOS.PadToColumn(TestSampleCol);
2542   FOS << "Test samples";
2543   FOS.PadToColumn(FuncNameCol);
2544   FOS << "Function name\n";
2545   for (const auto &F : FuncSimilarityDump) {
2546     double OverlapPercent =
2547         F.second.UnionSample > 0
2548             ? static_cast<double>(F.second.OverlapSample) / F.second.UnionSample
2549             : 0;
2550     double BaseUniquePercent =
2551         F.second.BaseSample > 0
2552             ? static_cast<double>(F.second.BaseUniqueSample) /
2553                   F.second.BaseSample
2554             : 0;
2555     double TestUniquePercent =
2556         F.second.TestSample > 0
2557             ? static_cast<double>(F.second.TestUniqueSample) /
2558                   F.second.TestSample
2559             : 0;
2560 
2561     FOS << format("%.2f%%", F.second.BaseWeight * 100);
2562     FOS.PadToColumn(TestWeightCol);
2563     FOS << format("%.2f%%", F.second.TestWeight * 100);
2564     FOS.PadToColumn(SimilarityCol);
2565     FOS << format("%.2f%%", F.second.Similarity * 100);
2566     FOS.PadToColumn(OverlapCol);
2567     FOS << format("%.2f%%", OverlapPercent * 100);
2568     FOS.PadToColumn(BaseUniqueCol);
2569     FOS << format("%.2f%%", BaseUniquePercent * 100);
2570     FOS.PadToColumn(TestUniqueCol);
2571     FOS << format("%.2f%%", TestUniquePercent * 100);
2572     FOS.PadToColumn(BaseSampleCol);
2573     FOS << F.second.BaseSample;
2574     FOS.PadToColumn(TestSampleCol);
2575     FOS << F.second.TestSample;
2576     FOS.PadToColumn(FuncNameCol);
2577     FOS << F.second.TestName.toString() << "\n";
2578   }
2579 }
2580 
2581 void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const {
2582   OS << "Profile overlap infomation for base_profile: "
2583      << ProfOverlap.BaseName.toString()
2584      << " and test_profile: " << ProfOverlap.TestName.toString()
2585      << "\nProgram level:\n";
2586 
2587   OS << "  Whole program profile similarity: "
2588      << format("%.3f%%", ProfOverlap.Similarity * 100) << "\n";
2589 
2590   assert(ProfOverlap.UnionSample > 0 &&
2591          "Total samples in two profile should be greater than 0");
2592   double OverlapPercent =
2593       static_cast<double>(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample;
2594   assert(ProfOverlap.BaseSample > 0 &&
2595          "Total samples in base profile should be greater than 0");
2596   double BaseUniquePercent = static_cast<double>(ProfOverlap.BaseUniqueSample) /
2597                              ProfOverlap.BaseSample;
2598   assert(ProfOverlap.TestSample > 0 &&
2599          "Total samples in test profile should be greater than 0");
2600   double TestUniquePercent = static_cast<double>(ProfOverlap.TestUniqueSample) /
2601                              ProfOverlap.TestSample;
2602 
2603   OS << "  Whole program sample overlap: "
2604      << format("%.3f%%", OverlapPercent * 100) << "\n";
2605   OS << "    percentage of samples unique in base profile: "
2606      << format("%.3f%%", BaseUniquePercent * 100) << "\n";
2607   OS << "    percentage of samples unique in test profile: "
2608      << format("%.3f%%", TestUniquePercent * 100) << "\n";
2609   OS << "    total samples in base profile: " << ProfOverlap.BaseSample << "\n"
2610      << "    total samples in test profile: " << ProfOverlap.TestSample << "\n";
2611 
2612   assert(ProfOverlap.UnionCount > 0 &&
2613          "There should be at least one function in two input profiles");
2614   double FuncOverlapPercent =
2615       static_cast<double>(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount;
2616   OS << "  Function overlap: " << format("%.3f%%", FuncOverlapPercent * 100)
2617      << "\n";
2618   OS << "    overlap functions: " << ProfOverlap.OverlapCount << "\n";
2619   OS << "    functions unique in base profile: " << ProfOverlap.BaseUniqueCount
2620      << "\n";
2621   OS << "    functions unique in test profile: " << ProfOverlap.TestUniqueCount
2622      << "\n";
2623 }
2624 
2625 void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
2626     raw_fd_ostream &OS) const {
2627   assert(HotFuncOverlap.UnionCount > 0 &&
2628          "There should be at least one hot function in two input profiles");
2629   OS << "  Hot-function overlap: "
2630      << format("%.3f%%", static_cast<double>(HotFuncOverlap.OverlapCount) /
2631                              HotFuncOverlap.UnionCount * 100)
2632      << "\n";
2633   OS << "    overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n";
2634   OS << "    hot functions unique in base profile: "
2635      << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n";
2636   OS << "    hot functions unique in test profile: "
2637      << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n";
2638 
2639   assert(HotBlockOverlap.UnionCount > 0 &&
2640          "There should be at least one hot block in two input profiles");
2641   OS << "  Hot-block overlap: "
2642      << format("%.3f%%", static_cast<double>(HotBlockOverlap.OverlapCount) /
2643                              HotBlockOverlap.UnionCount * 100)
2644      << "\n";
2645   OS << "    overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n";
2646   OS << "    hot blocks unique in base profile: "
2647      << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n";
2648   OS << "    hot blocks unique in test profile: "
2649      << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n";
2650 }
2651 
2652 std::error_code SampleOverlapAggregator::loadProfiles() {
2653   using namespace sampleprof;
2654 
2655   LLVMContext Context;
2656   auto FS = vfs::getRealFileSystem();
2657   auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context, *FS,
2658                                                      FSDiscriminatorPassOption);
2659   if (std::error_code EC = BaseReaderOrErr.getError())
2660     exitWithErrorCode(EC, BaseFilename);
2661 
2662   auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context, *FS,
2663                                                      FSDiscriminatorPassOption);
2664   if (std::error_code EC = TestReaderOrErr.getError())
2665     exitWithErrorCode(EC, TestFilename);
2666 
2667   BaseReader = std::move(BaseReaderOrErr.get());
2668   TestReader = std::move(TestReaderOrErr.get());
2669 
2670   if (std::error_code EC = BaseReader->read())
2671     exitWithErrorCode(EC, BaseFilename);
2672   if (std::error_code EC = TestReader->read())
2673     exitWithErrorCode(EC, TestFilename);
2674   if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased())
2675     exitWithError(
2676         "cannot compare probe-based profile with non-probe-based profile");
2677   if (BaseReader->profileIsCS() != TestReader->profileIsCS())
2678     exitWithError("cannot compare CS profile with non-CS profile");
2679 
2680   // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
2681   // profile summary.
2682   ProfileSummary &BasePS = BaseReader->getSummary();
2683   ProfileSummary &TestPS = TestReader->getSummary();
2684   BaseHotThreshold =
2685       ProfileSummaryBuilder::getHotCountThreshold(BasePS.getDetailedSummary());
2686   TestHotThreshold =
2687       ProfileSummaryBuilder::getHotCountThreshold(TestPS.getDetailedSummary());
2688 
2689   return std::error_code();
2690 }
2691 
2692 void overlapSampleProfile(const std::string &BaseFilename,
2693                           const std::string &TestFilename,
2694                           const OverlapFuncFilters &FuncFilter,
2695                           uint64_t SimilarityCutoff, raw_fd_ostream &OS) {
2696   using namespace sampleprof;
2697 
2698   // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
2699   // report 2--3 places after decimal point in percentage numbers.
2700   SampleOverlapAggregator OverlapAggr(
2701       BaseFilename, TestFilename,
2702       static_cast<double>(SimilarityCutoff) / 1000000, 0.000005, FuncFilter);
2703   if (std::error_code EC = OverlapAggr.loadProfiles())
2704     exitWithErrorCode(EC);
2705 
2706   OverlapAggr.initializeSampleProfileOverlap();
2707   if (OverlapAggr.detectZeroSampleProfile(OS))
2708     return;
2709 
2710   OverlapAggr.computeSampleProfileOverlap(OS);
2711 
2712   OverlapAggr.dumpProgramSummary(OS);
2713   OverlapAggr.dumpHotFuncAndBlockOverlap(OS);
2714   OverlapAggr.dumpFuncSimilarity(OS);
2715 }
2716 
2717 static int overlap_main() {
2718   std::error_code EC;
2719   raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
2720   if (EC)
2721     exitWithErrorCode(EC, OutputFilename);
2722 
2723   if (ProfileKind == instr)
2724     overlapInstrProfile(BaseFilename, TestFilename,
2725                         OverlapFuncFilters{OverlapValueCutoff, FuncNameFilter},
2726                         OS, IsCS);
2727   else
2728     overlapSampleProfile(BaseFilename, TestFilename,
2729                          OverlapFuncFilters{OverlapValueCutoff, FuncNameFilter},
2730                          SimilarityCutoff, OS);
2731 
2732   return 0;
2733 }
2734 
2735 namespace {
2736 struct ValueSitesStats {
2737   ValueSitesStats() = default;
2738   uint64_t TotalNumValueSites = 0;
2739   uint64_t TotalNumValueSitesWithValueProfile = 0;
2740   uint64_t TotalNumValues = 0;
2741   std::vector<unsigned> ValueSitesHistogram;
2742 };
2743 } // namespace
2744 
2745 static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
2746                                   ValueSitesStats &Stats, raw_fd_ostream &OS,
2747                                   InstrProfSymtab *Symtab) {
2748   uint32_t NS = Func.getNumValueSites(VK);
2749   Stats.TotalNumValueSites += NS;
2750   for (size_t I = 0; I < NS; ++I) {
2751     auto VD = Func.getValueArrayForSite(VK, I);
2752     uint32_t NV = VD.size();
2753     if (NV == 0)
2754       continue;
2755     Stats.TotalNumValues += NV;
2756     Stats.TotalNumValueSitesWithValueProfile++;
2757     if (NV > Stats.ValueSitesHistogram.size())
2758       Stats.ValueSitesHistogram.resize(NV, 0);
2759     Stats.ValueSitesHistogram[NV - 1]++;
2760 
2761     uint64_t SiteSum = 0;
2762     for (const auto &V : VD)
2763       SiteSum += V.Count;
2764     if (SiteSum == 0)
2765       SiteSum = 1;
2766 
2767     for (const auto &V : VD) {
2768       OS << "\t[ " << format("%2u", I) << ", ";
2769       if (Symtab == nullptr)
2770         OS << format("%4" PRIu64, V.Value);
2771       else
2772         OS << Symtab->getFuncOrVarName(V.Value);
2773       OS << ", " << format("%10" PRId64, V.Count) << " ] ("
2774          << format("%.2f%%", (V.Count * 100.0 / SiteSum)) << ")\n";
2775     }
2776   }
2777 }
2778 
2779 static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK,
2780                                 ValueSitesStats &Stats) {
2781   OS << "  Total number of sites: " << Stats.TotalNumValueSites << "\n";
2782   OS << "  Total number of sites with values: "
2783      << Stats.TotalNumValueSitesWithValueProfile << "\n";
2784   OS << "  Total number of profiled values: " << Stats.TotalNumValues << "\n";
2785 
2786   OS << "  Value sites histogram:\n\tNumTargets, SiteCount\n";
2787   for (unsigned I = 0; I < Stats.ValueSitesHistogram.size(); I++) {
2788     if (Stats.ValueSitesHistogram[I] > 0)
2789       OS << "\t" << I + 1 << ", " << Stats.ValueSitesHistogram[I] << "\n";
2790   }
2791 }
2792 
2793 static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
2794   if (SFormat == ShowFormat::Json)
2795     exitWithError("JSON output is not supported for instr profiles");
2796   if (SFormat == ShowFormat::Yaml)
2797     exitWithError("YAML output is not supported for instr profiles");
2798   auto FS = vfs::getRealFileSystem();
2799   auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
2800   std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
2801   if (ShowDetailedSummary && Cutoffs.empty()) {
2802     Cutoffs = ProfileSummaryBuilder::DefaultCutoffs;
2803   }
2804   InstrProfSummaryBuilder Builder(std::move(Cutoffs));
2805   if (Error E = ReaderOrErr.takeError())
2806     exitWithError(std::move(E), Filename);
2807 
2808   auto Reader = std::move(ReaderOrErr.get());
2809   bool IsIRInstr = Reader->isIRLevelProfile();
2810   size_t ShownFunctions = 0;
2811   size_t BelowCutoffFunctions = 0;
2812   int NumVPKind = IPVK_Last - IPVK_First + 1;
2813   std::vector<ValueSitesStats> VPStats(NumVPKind);
2814 
2815   auto MinCmp = [](const std::pair<std::string, uint64_t> &v1,
2816                    const std::pair<std::string, uint64_t> &v2) {
2817     return v1.second > v2.second;
2818   };
2819 
2820   std::priority_queue<std::pair<std::string, uint64_t>,
2821                       std::vector<std::pair<std::string, uint64_t>>,
2822                       decltype(MinCmp)>
2823       HottestFuncs(MinCmp);
2824 
2825   if (!TextFormat && OnlyListBelow) {
2826     OS << "The list of functions with the maximum counter less than "
2827        << ShowValueCutoff << ":\n";
2828   }
2829 
2830   // Add marker so that IR-level instrumentation round-trips properly.
2831   if (TextFormat && IsIRInstr)
2832     OS << ":ir\n";
2833 
2834   for (const auto &Func : *Reader) {
2835     if (Reader->isIRLevelProfile()) {
2836       bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
2837       if (FuncIsCS != ShowCS)
2838         continue;
2839     }
2840     bool Show = ShowAllFunctions ||
2841                 (!FuncNameFilter.empty() && Func.Name.contains(FuncNameFilter));
2842 
2843     bool doTextFormatDump = (Show && TextFormat);
2844 
2845     if (doTextFormatDump) {
2846       InstrProfSymtab &Symtab = Reader->getSymtab();
2847       InstrProfWriter::writeRecordInText(Func.Name, Func.Hash, Func, Symtab,
2848                                          OS);
2849       continue;
2850     }
2851 
2852     assert(Func.Counts.size() > 0 && "function missing entry counter");
2853     Builder.addRecord(Func);
2854 
2855     if (ShowCovered) {
2856       if (llvm::any_of(Func.Counts, [](uint64_t C) { return C; }))
2857         OS << Func.Name << "\n";
2858       continue;
2859     }
2860 
2861     uint64_t FuncMax = 0;
2862     uint64_t FuncSum = 0;
2863 
2864     auto PseudoKind = Func.getCountPseudoKind();
2865     if (PseudoKind != InstrProfRecord::NotPseudo) {
2866       if (Show) {
2867         if (!ShownFunctions)
2868           OS << "Counters:\n";
2869         ++ShownFunctions;
2870         OS << "  " << Func.Name << ":\n"
2871            << "    Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
2872            << "    Counters: " << Func.Counts.size();
2873         if (PseudoKind == InstrProfRecord::PseudoHot)
2874           OS << "    <PseudoHot>\n";
2875         else if (PseudoKind == InstrProfRecord::PseudoWarm)
2876           OS << "    <PseudoWarm>\n";
2877         else
2878           llvm_unreachable("Unknown PseudoKind");
2879       }
2880       continue;
2881     }
2882 
2883     for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
2884       FuncMax = std::max(FuncMax, Func.Counts[I]);
2885       FuncSum += Func.Counts[I];
2886     }
2887 
2888     if (FuncMax < ShowValueCutoff) {
2889       ++BelowCutoffFunctions;
2890       if (OnlyListBelow) {
2891         OS << "  " << Func.Name << ": (Max = " << FuncMax
2892            << " Sum = " << FuncSum << ")\n";
2893       }
2894       continue;
2895     } else if (OnlyListBelow)
2896       continue;
2897 
2898     if (TopNFunctions) {
2899       if (HottestFuncs.size() == TopNFunctions) {
2900         if (HottestFuncs.top().second < FuncMax) {
2901           HottestFuncs.pop();
2902           HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax));
2903         }
2904       } else
2905         HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax));
2906     }
2907 
2908     if (Show) {
2909       if (!ShownFunctions)
2910         OS << "Counters:\n";
2911 
2912       ++ShownFunctions;
2913 
2914       OS << "  " << Func.Name << ":\n"
2915          << "    Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
2916          << "    Counters: " << Func.Counts.size() << "\n";
2917       if (!IsIRInstr)
2918         OS << "    Function count: " << Func.Counts[0] << "\n";
2919 
2920       if (ShowIndirectCallTargets)
2921         OS << "    Indirect Call Site Count: "
2922            << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n";
2923 
2924       if (ShowVTables)
2925         OS << "    Number of instrumented vtables: "
2926            << Func.getNumValueSites(IPVK_VTableTarget) << "\n";
2927 
2928       uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize);
2929       if (ShowMemOPSizes && NumMemOPCalls > 0)
2930         OS << "    Number of Memory Intrinsics Calls: " << NumMemOPCalls
2931            << "\n";
2932 
2933       if (ShowCounts) {
2934         OS << "    Block counts: [";
2935         size_t Start = (IsIRInstr ? 0 : 1);
2936         for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) {
2937           OS << (I == Start ? "" : ", ") << Func.Counts[I];
2938         }
2939         OS << "]\n";
2940       }
2941 
2942       if (ShowIndirectCallTargets) {
2943         OS << "    Indirect Target Results:\n";
2944         traverseAllValueSites(Func, IPVK_IndirectCallTarget,
2945                               VPStats[IPVK_IndirectCallTarget], OS,
2946                               &(Reader->getSymtab()));
2947       }
2948 
2949       if (ShowVTables) {
2950         OS << "    VTable Results:\n";
2951         traverseAllValueSites(Func, IPVK_VTableTarget,
2952                               VPStats[IPVK_VTableTarget], OS,
2953                               &(Reader->getSymtab()));
2954       }
2955 
2956       if (ShowMemOPSizes && NumMemOPCalls > 0) {
2957         OS << "    Memory Intrinsic Size Results:\n";
2958         traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS,
2959                               nullptr);
2960       }
2961     }
2962   }
2963   if (Reader->hasError())
2964     exitWithError(Reader->getError(), Filename);
2965 
2966   if (TextFormat || ShowCovered)
2967     return 0;
2968   std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
2969   bool IsIR = Reader->isIRLevelProfile();
2970   OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
2971   if (IsIR)
2972     OS << "  entry_first = " << Reader->instrEntryBBEnabled();
2973   OS << "\n";
2974   if (ShowAllFunctions || !FuncNameFilter.empty())
2975     OS << "Functions shown: " << ShownFunctions << "\n";
2976   OS << "Total functions: " << PS->getNumFunctions() << "\n";
2977   if (ShowValueCutoff > 0) {
2978     OS << "Number of functions with maximum count (< " << ShowValueCutoff
2979        << "): " << BelowCutoffFunctions << "\n";
2980     OS << "Number of functions with maximum count (>= " << ShowValueCutoff
2981        << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n";
2982   }
2983   OS << "Maximum function count: " << PS->getMaxFunctionCount() << "\n";
2984   OS << "Maximum internal block count: " << PS->getMaxInternalCount() << "\n";
2985 
2986   if (TopNFunctions) {
2987     std::vector<std::pair<std::string, uint64_t>> SortedHottestFuncs;
2988     while (!HottestFuncs.empty()) {
2989       SortedHottestFuncs.emplace_back(HottestFuncs.top());
2990       HottestFuncs.pop();
2991     }
2992     OS << "Top " << TopNFunctions
2993        << " functions with the largest internal block counts: \n";
2994     for (auto &hotfunc : llvm::reverse(SortedHottestFuncs))
2995       OS << "  " << hotfunc.first << ", max count = " << hotfunc.second << "\n";
2996   }
2997 
2998   if (ShownFunctions && ShowIndirectCallTargets) {
2999     OS << "Statistics for indirect call sites profile:\n";
3000     showValueSitesStats(OS, IPVK_IndirectCallTarget,
3001                         VPStats[IPVK_IndirectCallTarget]);
3002   }
3003 
3004   if (ShownFunctions && ShowVTables) {
3005     OS << "Statistics for vtable profile:\n";
3006     showValueSitesStats(OS, IPVK_VTableTarget, VPStats[IPVK_VTableTarget]);
3007   }
3008 
3009   if (ShownFunctions && ShowMemOPSizes) {
3010     OS << "Statistics for memory intrinsic calls sizes profile:\n";
3011     showValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]);
3012   }
3013 
3014   if (ShowDetailedSummary) {
3015     OS << "Total number of blocks: " << PS->getNumCounts() << "\n";
3016     OS << "Total count: " << PS->getTotalCount() << "\n";
3017     PS->printDetailedSummary(OS);
3018   }
3019 
3020   if (ShowBinaryIds)
3021     if (Error E = Reader->printBinaryIds(OS))
3022       exitWithError(std::move(E), Filename);
3023 
3024   if (ShowProfileVersion)
3025     OS << "Profile version: " << Reader->getVersion() << "\n";
3026 
3027   if (ShowTemporalProfTraces) {
3028     auto &Traces = Reader->getTemporalProfTraces();
3029     OS << "Temporal Profile Traces (samples=" << Traces.size()
3030        << " seen=" << Reader->getTemporalProfTraceStreamSize() << "):\n";
3031     for (unsigned i = 0; i < Traces.size(); i++) {
3032       OS << "  Temporal Profile Trace " << i << " (weight=" << Traces[i].Weight
3033          << " count=" << Traces[i].FunctionNameRefs.size() << "):\n";
3034       for (auto &NameRef : Traces[i].FunctionNameRefs)
3035         OS << "    " << Reader->getSymtab().getFuncOrVarName(NameRef) << "\n";
3036     }
3037   }
3038 
3039   return 0;
3040 }
3041 
3042 static void showSectionInfo(sampleprof::SampleProfileReader *Reader,
3043                             raw_fd_ostream &OS) {
3044   if (!Reader->dumpSectionInfo(OS)) {
3045     WithColor::warning() << "-show-sec-info-only is only supported for "
3046                          << "sample profile in extbinary format and is "
3047                          << "ignored for other formats.\n";
3048     return;
3049   }
3050 }
3051 
3052 namespace {
3053 struct HotFuncInfo {
3054   std::string FuncName;
3055   uint64_t TotalCount = 0;
3056   double TotalCountPercent = 0.0f;
3057   uint64_t MaxCount = 0;
3058   uint64_t EntryCount = 0;
3059 
3060   HotFuncInfo() = default;
3061 
3062   HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES)
3063       : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP),
3064         MaxCount(MS), EntryCount(ES) {}
3065 };
3066 } // namespace
3067 
3068 // Print out detailed information about hot functions in PrintValues vector.
3069 // Users specify titles and offset of every columns through ColumnTitle and
3070 // ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same
3071 // and at least 4. Besides, users can optionally give a HotFuncMetric string to
3072 // print out or let it be an empty string.
3073 static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
3074                                 const std::vector<int> &ColumnOffset,
3075                                 const std::vector<HotFuncInfo> &PrintValues,
3076                                 uint64_t HotFuncCount, uint64_t TotalFuncCount,
3077                                 uint64_t HotProfCount, uint64_t TotalProfCount,
3078                                 const std::string &HotFuncMetric,
3079                                 uint32_t TopNFunctions, raw_fd_ostream &OS) {
3080   assert(ColumnOffset.size() == ColumnTitle.size() &&
3081          "ColumnOffset and ColumnTitle should have the same size");
3082   assert(ColumnTitle.size() >= 4 &&
3083          "ColumnTitle should have at least 4 elements");
3084   assert(TotalFuncCount > 0 &&
3085          "There should be at least one function in the profile");
3086   double TotalProfPercent = 0;
3087   if (TotalProfCount > 0)
3088     TotalProfPercent = static_cast<double>(HotProfCount) / TotalProfCount * 100;
3089 
3090   formatted_raw_ostream FOS(OS);
3091   FOS << HotFuncCount << " out of " << TotalFuncCount
3092       << " functions with profile ("
3093       << format("%.2f%%",
3094                 (static_cast<double>(HotFuncCount) / TotalFuncCount * 100))
3095       << ") are considered hot functions";
3096   if (!HotFuncMetric.empty())
3097     FOS << " (" << HotFuncMetric << ")";
3098   FOS << ".\n";
3099   FOS << HotProfCount << " out of " << TotalProfCount << " profile counts ("
3100       << format("%.2f%%", TotalProfPercent) << ") are from hot functions.\n";
3101 
3102   for (size_t I = 0; I < ColumnTitle.size(); ++I) {
3103     FOS.PadToColumn(ColumnOffset[I]);
3104     FOS << ColumnTitle[I];
3105   }
3106   FOS << "\n";
3107 
3108   uint32_t Count = 0;
3109   for (const auto &R : PrintValues) {
3110     if (TopNFunctions && (Count++ == TopNFunctions))
3111       break;
3112     FOS.PadToColumn(ColumnOffset[0]);
3113     FOS << R.TotalCount << " (" << format("%.2f%%", R.TotalCountPercent) << ")";
3114     FOS.PadToColumn(ColumnOffset[1]);
3115     FOS << R.MaxCount;
3116     FOS.PadToColumn(ColumnOffset[2]);
3117     FOS << R.EntryCount;
3118     FOS.PadToColumn(ColumnOffset[3]);
3119     FOS << R.FuncName << "\n";
3120   }
3121 }
3122 
3123 static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles,
3124                                ProfileSummary &PS, uint32_t TopN,
3125                                raw_fd_ostream &OS) {
3126   using namespace sampleprof;
3127 
3128   const uint32_t HotFuncCutoff = 990000;
3129   auto &SummaryVector = PS.getDetailedSummary();
3130   uint64_t MinCountThreshold = 0;
3131   for (const ProfileSummaryEntry &SummaryEntry : SummaryVector) {
3132     if (SummaryEntry.Cutoff == HotFuncCutoff) {
3133       MinCountThreshold = SummaryEntry.MinCount;
3134       break;
3135     }
3136   }
3137 
3138   // Traverse all functions in the profile and keep only hot functions.
3139   // The following loop also calculates the sum of total samples of all
3140   // functions.
3141   std::multimap<uint64_t, std::pair<const FunctionSamples *, const uint64_t>,
3142                 std::greater<uint64_t>>
3143       HotFunc;
3144   uint64_t ProfileTotalSample = 0;
3145   uint64_t HotFuncSample = 0;
3146   uint64_t HotFuncCount = 0;
3147 
3148   for (const auto &I : Profiles) {
3149     FuncSampleStats FuncStats;
3150     const FunctionSamples &FuncProf = I.second;
3151     ProfileTotalSample += FuncProf.getTotalSamples();
3152     getFuncSampleStats(FuncProf, FuncStats, MinCountThreshold);
3153 
3154     if (isFunctionHot(FuncStats, MinCountThreshold)) {
3155       HotFunc.emplace(FuncProf.getTotalSamples(),
3156                       std::make_pair(&(I.second), FuncStats.MaxSample));
3157       HotFuncSample += FuncProf.getTotalSamples();
3158       ++HotFuncCount;
3159     }
3160   }
3161 
3162   std::vector<std::string> ColumnTitle{"Total sample (%)", "Max sample",
3163                                        "Entry sample", "Function name"};
3164   std::vector<int> ColumnOffset{0, 24, 42, 58};
3165   std::string Metric =
3166       std::string("max sample >= ") + std::to_string(MinCountThreshold);
3167   std::vector<HotFuncInfo> PrintValues;
3168   for (const auto &FuncPair : HotFunc) {
3169     const FunctionSamples &Func = *FuncPair.second.first;
3170     double TotalSamplePercent =
3171         (ProfileTotalSample > 0)
3172             ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample
3173             : 0;
3174     PrintValues.emplace_back(
3175         HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(),
3176                     TotalSamplePercent, FuncPair.second.second,
3177                     Func.getHeadSamplesEstimate()));
3178   }
3179   dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
3180                       Profiles.size(), HotFuncSample, ProfileTotalSample,
3181                       Metric, TopN, OS);
3182 
3183   return 0;
3184 }
3185 
3186 static int showSampleProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3187   if (SFormat == ShowFormat::Yaml)
3188     exitWithError("YAML output is not supported for sample profiles");
3189   using namespace sampleprof;
3190   LLVMContext Context;
3191   auto FS = vfs::getRealFileSystem();
3192   auto ReaderOrErr = SampleProfileReader::create(Filename, Context, *FS,
3193                                                  FSDiscriminatorPassOption);
3194   if (std::error_code EC = ReaderOrErr.getError())
3195     exitWithErrorCode(EC, Filename);
3196 
3197   auto Reader = std::move(ReaderOrErr.get());
3198   if (ShowSectionInfoOnly) {
3199     showSectionInfo(Reader.get(), OS);
3200     return 0;
3201   }
3202 
3203   if (std::error_code EC = Reader->read())
3204     exitWithErrorCode(EC, Filename);
3205 
3206   if (ShowAllFunctions || FuncNameFilter.empty()) {
3207     if (SFormat == ShowFormat::Json)
3208       Reader->dumpJson(OS);
3209     else
3210       Reader->dump(OS);
3211   } else {
3212     if (SFormat == ShowFormat::Json)
3213       exitWithError(
3214           "the JSON format is supported only when all functions are to "
3215           "be printed");
3216 
3217     // TODO: parse context string to support filtering by contexts.
3218     FunctionSamples *FS = Reader->getSamplesFor(StringRef(FuncNameFilter));
3219     Reader->dumpFunctionProfile(FS ? *FS : FunctionSamples(), OS);
3220   }
3221 
3222   if (ShowProfileSymbolList) {
3223     std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
3224         Reader->getProfileSymbolList();
3225     ReaderList->dump(OS);
3226   }
3227 
3228   if (ShowDetailedSummary) {
3229     auto &PS = Reader->getSummary();
3230     PS.printSummary(OS);
3231     PS.printDetailedSummary(OS);
3232   }
3233 
3234   if (ShowHotFuncList || TopNFunctions)
3235     showHotFunctionList(Reader->getProfiles(), Reader->getSummary(),
3236                         TopNFunctions, OS);
3237 
3238   return 0;
3239 }
3240 
3241 static int showMemProfProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3242   if (SFormat == ShowFormat::Json)
3243     exitWithError("JSON output is not supported for MemProf");
3244   auto ReaderOr = llvm::memprof::RawMemProfReader::create(
3245       Filename, ProfiledBinary, /*KeepNames=*/true);
3246   if (Error E = ReaderOr.takeError())
3247     // Since the error can be related to the profile or the binary we do not
3248     // pass whence. Instead additional context is provided where necessary in
3249     // the error message.
3250     exitWithError(std::move(E), /*Whence*/ "");
3251 
3252   std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
3253       ReaderOr.get().release());
3254 
3255   Reader->printYAML(OS);
3256   return 0;
3257 }
3258 
3259 static int showDebugInfoCorrelation(const std::string &Filename,
3260                                     ShowFormat SFormat, raw_fd_ostream &OS) {
3261   if (SFormat == ShowFormat::Json)
3262     exitWithError("JSON output is not supported for debug info correlation");
3263   std::unique_ptr<InstrProfCorrelator> Correlator;
3264   if (auto Err =
3265           InstrProfCorrelator::get(Filename, InstrProfCorrelator::DEBUG_INFO)
3266               .moveInto(Correlator))
3267     exitWithError(std::move(Err), Filename);
3268   if (SFormat == ShowFormat::Yaml) {
3269     if (auto Err = Correlator->dumpYaml(MaxDbgCorrelationWarnings, OS))
3270       exitWithError(std::move(Err), Filename);
3271     return 0;
3272   }
3273 
3274   if (auto Err = Correlator->correlateProfileData(MaxDbgCorrelationWarnings))
3275     exitWithError(std::move(Err), Filename);
3276 
3277   InstrProfSymtab Symtab;
3278   if (auto Err = Symtab.create(
3279           StringRef(Correlator->getNamesPointer(), Correlator->getNamesSize())))
3280     exitWithError(std::move(Err), Filename);
3281 
3282   if (ShowProfileSymbolList)
3283     Symtab.dumpNames(OS);
3284   // TODO: Read "Profile Data Type" from debug info to compute and show how many
3285   // counters the section holds.
3286   if (ShowDetailedSummary)
3287     OS << "Counters section size: 0x"
3288        << Twine::utohexstr(Correlator->getCountersSectionSize()) << " bytes\n";
3289   OS << "Found " << Correlator->getDataSize() << " functions\n";
3290 
3291   return 0;
3292 }
3293 
3294 static int show_main(StringRef ProgName) {
3295   if (Filename.empty() && DebugInfoFilename.empty())
3296     exitWithError(
3297         "the positional argument '<profdata-file>' is required unless '--" +
3298         DebugInfoFilename.ArgStr + "' is provided");
3299 
3300   if (Filename == OutputFilename) {
3301     errs() << ProgName
3302            << " show: Input file name cannot be the same as the output file "
3303               "name!\n";
3304     return 1;
3305   }
3306   if (JsonFormat)
3307     SFormat = ShowFormat::Json;
3308 
3309   std::error_code EC;
3310   raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3311   if (EC)
3312     exitWithErrorCode(EC, OutputFilename);
3313 
3314   if (ShowAllFunctions && !FuncNameFilter.empty())
3315     WithColor::warning() << "-function argument ignored: showing all functions\n";
3316 
3317   if (!DebugInfoFilename.empty())
3318     return showDebugInfoCorrelation(DebugInfoFilename, SFormat, OS);
3319 
3320   if (ShowProfileKind == instr)
3321     return showInstrProfile(SFormat, OS);
3322   if (ShowProfileKind == sample)
3323     return showSampleProfile(SFormat, OS);
3324   return showMemProfProfile(SFormat, OS);
3325 }
3326 
3327 static int order_main() {
3328   std::error_code EC;
3329   raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3330   if (EC)
3331     exitWithErrorCode(EC, OutputFilename);
3332   auto FS = vfs::getRealFileSystem();
3333   auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
3334   if (Error E = ReaderOrErr.takeError())
3335     exitWithError(std::move(E), Filename);
3336 
3337   auto Reader = std::move(ReaderOrErr.get());
3338   for (auto &I : *Reader) {
3339     // Read all entries
3340     (void)I;
3341   }
3342   ArrayRef Traces = Reader->getTemporalProfTraces();
3343   if (NumTestTraces && NumTestTraces >= Traces.size())
3344     exitWithError(
3345         "--" + NumTestTraces.ArgStr +
3346         " must be smaller than the total number of traces: expected: < " +
3347         Twine(Traces.size()) + ", actual: " + Twine(NumTestTraces));
3348   ArrayRef TestTraces = Traces.take_back(NumTestTraces);
3349   Traces = Traces.drop_back(NumTestTraces);
3350 
3351   std::vector<BPFunctionNode> Nodes;
3352   TemporalProfTraceTy::createBPFunctionNodes(Traces, Nodes);
3353   BalancedPartitioningConfig Config;
3354   BalancedPartitioning BP(Config);
3355   BP.run(Nodes);
3356 
3357   OS << "# Ordered " << Nodes.size() << " functions\n";
3358   if (!TestTraces.empty()) {
3359     // Since we don't know the symbol sizes, we assume 32 functions per page.
3360     DenseMap<BPFunctionNode::IDT, unsigned> IdToPageNumber;
3361     for (auto &Node : Nodes)
3362       IdToPageNumber[Node.Id] = IdToPageNumber.size() / 32;
3363 
3364     SmallSet<unsigned, 0> TouchedPages;
3365     unsigned Area = 0;
3366     for (auto &Trace : TestTraces) {
3367       for (auto Id : Trace.FunctionNameRefs) {
3368         auto It = IdToPageNumber.find(Id);
3369         if (It == IdToPageNumber.end())
3370           continue;
3371         TouchedPages.insert(It->getSecond());
3372         Area += TouchedPages.size();
3373       }
3374       TouchedPages.clear();
3375     }
3376     OS << "# Total area under the page fault curve: " << (float)Area << "\n";
3377   }
3378   OS << "# Warning: Mach-O may prefix symbols with \"_\" depending on the "
3379         "linkage and this output does not take that into account. Some "
3380         "post-processing may be required before passing to the linker via "
3381         "-order_file.\n";
3382   for (auto &N : Nodes) {
3383     auto [Filename, ParsedFuncName] =
3384         getParsedIRPGOName(Reader->getSymtab().getFuncOrVarName(N.Id));
3385     if (!Filename.empty())
3386       OS << "# " << Filename << "\n";
3387     OS << ParsedFuncName << "\n";
3388   }
3389   return 0;
3390 }
3391 
3392 int llvm_profdata_main(int argc, char **argvNonConst,
3393                        const llvm::ToolContext &) {
3394   const char **argv = const_cast<const char **>(argvNonConst);
3395 
3396   StringRef ProgName(sys::path::filename(argv[0]));
3397 
3398   if (argc < 2) {
3399     errs()
3400         << ProgName
3401         << ": No subcommand specified! Run llvm-profdata --help for usage.\n";
3402     return 1;
3403   }
3404 
3405   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data\n");
3406 
3407   if (ShowSubcommand)
3408     return show_main(ProgName);
3409 
3410   if (OrderSubcommand)
3411     return order_main();
3412 
3413   if (OverlapSubcommand)
3414     return overlap_main();
3415 
3416   if (MergeSubcommand)
3417     return merge_main(ProgName);
3418 
3419   errs() << ProgName
3420          << ": Unknown command. Run llvm-profdata --help for usage.\n";
3421   return 1;
3422 }
3423