xref: /llvm-project/bolt/lib/Profile/YAMLProfileWriter.cpp (revision 4be3083bb39836466680a15f970ecc883d2e362a)
1 //===- bolt/Profile/YAMLProfileWriter.cpp - YAML profile serializer -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Profile/YAMLProfileWriter.h"
10 #include "bolt/Core/BinaryBasicBlock.h"
11 #include "bolt/Core/BinaryFunction.h"
12 #include "bolt/Profile/BoltAddressTranslation.h"
13 #include "bolt/Profile/DataAggregator.h"
14 #include "bolt/Profile/ProfileReaderBase.h"
15 #include "bolt/Rewrite/RewriteInstance.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/FileSystem.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #undef  DEBUG_TYPE
21 #define DEBUG_TYPE "bolt-prof"
22 
23 namespace opts {
24 extern llvm::cl::opt<bool> ProfileUseDFS;
25 } // namespace opts
26 
27 namespace llvm {
28 namespace bolt {
29 
30 const BinaryFunction *YAMLProfileWriter::setCSIDestination(
31     const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI,
32     const MCSymbol *Symbol, const BoltAddressTranslation *BAT,
33     uint32_t Offset) {
34   CSI.DestId = 0; // designated for unknown functions
35   CSI.EntryDiscriminator = 0;
36 
37   if (Symbol) {
38     uint64_t EntryID = 0;
39     if (const BinaryFunction *Callee =
40             BC.getFunctionForSymbol(Symbol, &EntryID)) {
41       if (BAT && BAT->isBATFunction(Callee->getAddress()))
42         std::tie(Callee, EntryID) = BAT->translateSymbol(BC, *Symbol, Offset);
43       else if (const BinaryBasicBlock *BB =
44                    Callee->getBasicBlockContainingOffset(Offset))
45         BC.getFunctionForSymbol(Callee->getSecondaryEntryPointSymbol(*BB),
46                                 &EntryID);
47       CSI.DestId = Callee->getFunctionNumber();
48       CSI.EntryDiscriminator = EntryID;
49       return Callee;
50     }
51   }
52   return nullptr;
53 }
54 
55 yaml::bolt::BinaryFunctionProfile
56 YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
57                            const BoltAddressTranslation *BAT) {
58   yaml::bolt::BinaryFunctionProfile YamlBF;
59   const BinaryContext &BC = BF.getBinaryContext();
60 
61   const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR;
62 
63   // Prepare function and block hashes
64   BF.computeHash(UseDFS);
65   BF.computeBlockHashes();
66 
67   YamlBF.Name = DataAggregator::getLocationName(BF, BAT);
68   YamlBF.Id = BF.getFunctionNumber();
69   YamlBF.Hash = BF.getHash();
70   YamlBF.NumBasicBlocks = BF.size();
71   YamlBF.ExecCount = BF.getKnownExecutionCount();
72 
73   BinaryFunction::BasicBlockOrderType Order;
74   llvm::copy(UseDFS ? BF.dfs() : BF.getLayout().blocks(),
75              std::back_inserter(Order));
76 
77   const FunctionLayout Layout = BF.getLayout();
78   Layout.updateLayoutIndices(Order);
79 
80   for (const BinaryBasicBlock *BB : Order) {
81     yaml::bolt::BinaryBasicBlockProfile YamlBB;
82     YamlBB.Index = BB->getLayoutIndex();
83     YamlBB.NumInstructions = BB->getNumNonPseudos();
84     YamlBB.Hash = BB->getHash();
85 
86     if (!LBRProfile) {
87       YamlBB.EventCount = BB->getKnownExecutionCount();
88       if (YamlBB.EventCount)
89         YamlBF.Blocks.emplace_back(YamlBB);
90       continue;
91     }
92 
93     YamlBB.ExecCount = BB->getKnownExecutionCount();
94 
95     for (const MCInst &Instr : *BB) {
96       if (!BC.MIB->isCall(Instr) && !BC.MIB->isIndirectBranch(Instr))
97         continue;
98 
99       SmallVector<std::pair<StringRef, yaml::bolt::CallSiteInfo>> CSTargets;
100       yaml::bolt::CallSiteInfo CSI;
101       std::optional<uint32_t> Offset = BC.MIB->getOffset(Instr);
102       if (!Offset || *Offset < BB->getInputOffset())
103         continue;
104       CSI.Offset = *Offset - BB->getInputOffset();
105 
106       if (BC.MIB->isIndirectCall(Instr) || BC.MIB->isIndirectBranch(Instr)) {
107         const auto ICSP = BC.MIB->tryGetAnnotationAs<IndirectCallSiteProfile>(
108             Instr, "CallProfile");
109         if (!ICSP)
110           continue;
111         for (const IndirectCallProfile &CSP : ICSP.get()) {
112           StringRef TargetName = "";
113           const BinaryFunction *Callee =
114               setCSIDestination(BC, CSI, CSP.Symbol, BAT);
115           if (Callee)
116             TargetName = Callee->getOneName();
117           CSI.Count = CSP.Count;
118           CSI.Mispreds = CSP.Mispreds;
119           CSTargets.emplace_back(TargetName, CSI);
120         }
121       } else { // direct call or a tail call
122         StringRef TargetName = "";
123         const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(Instr);
124         const BinaryFunction *const Callee =
125             setCSIDestination(BC, CSI, CalleeSymbol, BAT);
126         if (Callee)
127           TargetName = Callee->getOneName();
128 
129         auto getAnnotationWithDefault = [&](const MCInst &Inst, StringRef Ann) {
130           return BC.MIB->getAnnotationWithDefault(Instr, Ann, 0ull);
131         };
132         if (BC.MIB->getConditionalTailCall(Instr)) {
133           CSI.Count = getAnnotationWithDefault(Instr, "CTCTakenCount");
134           CSI.Mispreds = getAnnotationWithDefault(Instr, "CTCMispredCount");
135         } else {
136           CSI.Count = getAnnotationWithDefault(Instr, "Count");
137         }
138 
139         if (CSI.Count)
140           CSTargets.emplace_back(TargetName, CSI);
141       }
142       // Sort targets in a similar way to getBranchData, see Location::operator<
143       llvm::sort(CSTargets, [](const auto &RHS, const auto &LHS) {
144         if (RHS.first != LHS.first)
145           return RHS.first < LHS.first;
146         return RHS.second.Offset < LHS.second.Offset;
147       });
148       for (auto &KV : CSTargets)
149         YamlBB.CallSites.push_back(KV.second);
150     }
151 
152     // Skip printing if there's no profile data for non-entry basic block.
153     // Include landing pads with non-zero execution count.
154     if (YamlBB.CallSites.empty() && !BB->isEntryPoint() &&
155         !(BB->isLandingPad() && BB->getKnownExecutionCount() != 0)) {
156       // Include blocks having successors or predecessors with positive counts.
157       uint64_t SuccessorExecCount = 0;
158       for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo :
159            BB->branch_info())
160         SuccessorExecCount += BranchInfo.Count;
161       uint64_t PredecessorExecCount = 0;
162       for (auto Pred : BB->predecessors())
163         PredecessorExecCount += Pred->getBranchInfo(*BB).Count;
164       if (!SuccessorExecCount && !PredecessorExecCount)
165         continue;
166     }
167 
168     auto BranchInfo = BB->branch_info_begin();
169     for (const BinaryBasicBlock *Successor : BB->successors()) {
170       yaml::bolt::SuccessorInfo YamlSI;
171       YamlSI.Index = Successor->getLayoutIndex();
172       YamlSI.Count = BranchInfo->Count;
173       YamlSI.Mispreds = BranchInfo->MispredictedCount;
174 
175       YamlBB.Successors.emplace_back(YamlSI);
176 
177       ++BranchInfo;
178     }
179 
180     YamlBF.Blocks.emplace_back(YamlBB);
181   }
182   return YamlBF;
183 }
184 
185 std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) {
186   const BinaryContext &BC = RI.getBinaryContext();
187   const auto &Functions = BC.getBinaryFunctions();
188 
189   std::error_code EC;
190   OS = std::make_unique<raw_fd_ostream>(Filename, EC, sys::fs::OF_None);
191   if (EC) {
192     errs() << "BOLT-WARNING: " << EC.message() << " : unable to open "
193            << Filename << " for output.\n";
194     return EC;
195   }
196 
197   yaml::bolt::BinaryProfile BP;
198 
199   // Fill out the header info.
200   BP.Header.Version = 1;
201   BP.Header.FileName = std::string(BC.getFilename());
202   std::optional<StringRef> BuildID = BC.getFileBuildID();
203   BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>";
204   BP.Header.Origin = std::string(RI.getProfileReader()->getReaderName());
205   BP.Header.IsDFSOrder = opts::ProfileUseDFS;
206   BP.Header.HashFunction = HashFunction::Default;
207 
208   StringSet<> EventNames = RI.getProfileReader()->getEventNames();
209   if (!EventNames.empty()) {
210     std::string Sep;
211     for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames) {
212       BP.Header.EventNames += Sep + EventEntry.first().str();
213       Sep = ",";
214     }
215   }
216 
217   // Make sure the profile is consistent across all functions.
218   uint16_t ProfileFlags = BinaryFunction::PF_NONE;
219   for (const auto &BFI : Functions) {
220     const BinaryFunction &BF = BFI.second;
221     if (BF.hasProfile() && !BF.empty()) {
222       assert(BF.getProfileFlags() != BinaryFunction::PF_NONE);
223       if (ProfileFlags == BinaryFunction::PF_NONE)
224         ProfileFlags = BF.getProfileFlags();
225 
226       assert(BF.getProfileFlags() == ProfileFlags &&
227              "expected consistent profile flags across all functions");
228     }
229   }
230   BP.Header.Flags = ProfileFlags;
231 
232   // Add all function objects.
233   for (const auto &BFI : Functions) {
234     const BinaryFunction &BF = BFI.second;
235     if (BF.hasProfile()) {
236       if (!BF.hasValidProfile() && !RI.getProfileReader()->isTrustedSource())
237         continue;
238 
239       BP.Functions.emplace_back(convert(BF, opts::ProfileUseDFS));
240     }
241   }
242 
243   // Write the profile.
244   yaml::Output Out(*OS, nullptr, 0);
245   Out << BP;
246 
247   return std::error_code();
248 }
249 
250 } // namespace bolt
251 } // namespace llvm
252