xref: /llvm-project/bolt/lib/Profile/BoltAddressTranslation.cpp (revision 8f1d94aaea5c18b83cd3b0df3be3a48ef1d3833d)
1 //===- bolt/Profile/BoltAddressTranslation.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Profile/BoltAddressTranslation.h"
10 #include "bolt/Core/BinaryFunction.h"
11 #include "llvm/Support/Errc.h"
12 #include "llvm/Support/Error.h"
13 #include "llvm/Support/LEB128.h"
14 
15 #define DEBUG_TYPE "bolt-bat"
16 
17 namespace llvm {
18 namespace bolt {
19 
20 const char *BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";
21 
22 void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
23                                                const BinaryBasicBlock &BB,
24                                                uint64_t FuncAddress) {
25   const uint64_t BBOutputOffset =
26       BB.getOutputAddressRange().first - FuncAddress;
27   const uint32_t BBInputOffset = BB.getInputOffset();
28 
29   // Every output BB must track back to an input BB for profile collection
30   // in bolted binaries. If we are missing an offset, it means this block was
31   // created by a pass. We will skip writing any entries for it, and this means
32   // any traffic happening in this block will map to the previous block in the
33   // layout. This covers the case where an input basic block is split into two,
34   // and the second one lacks any offset.
35   if (BBInputOffset == BinaryBasicBlock::INVALID_OFFSET)
36     return;
37 
38   LLVM_DEBUG(dbgs() << "BB " << BB.getName() << "\n");
39   LLVM_DEBUG(dbgs() << "  Key: " << Twine::utohexstr(BBOutputOffset)
40                     << " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
41   // In case of conflicts (same Key mapping to different Vals), the last
42   // update takes precedence. Of course it is not ideal to have conflicts and
43   // those happen when we have an empty BB that either contained only
44   // NOPs or a jump to the next block (successor). Either way, the successor
45   // and this deleted block will both share the same output address (the same
46   // key), and we need to map back. We choose here to privilege the successor by
47   // allowing it to overwrite the previously inserted key in the map.
48   Map[BBOutputOffset] = BBInputOffset << 1;
49 
50   const auto &IOAddressMap =
51       BB.getFunction()->getBinaryContext().getIOAddressMap();
52 
53   for (const auto &[InputOffset, Sym] : BB.getLocSyms()) {
54     const auto InputAddress = BB.getFunction()->getAddress() + InputOffset;
55     const auto OutputAddress = IOAddressMap.lookup(InputAddress);
56     assert(OutputAddress && "Unknown instruction address");
57     const auto OutputOffset = *OutputAddress - FuncAddress;
58 
59     // Is this the first instruction in the BB? No need to duplicate the entry.
60     if (OutputOffset == BBOutputOffset)
61       continue;
62 
63     LLVM_DEBUG(dbgs() << "  Key: " << Twine::utohexstr(OutputOffset) << " Val: "
64                       << Twine::utohexstr(InputOffset) << " (branch)\n");
65     Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset,
66                                              (InputOffset << 1) | BRANCHENTRY));
67   }
68 }
69 
70 void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
71   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Writing BOLT Address Translation Tables\n");
72   for (auto &BFI : BC.getBinaryFunctions()) {
73     const BinaryFunction &Function = BFI.second;
74     // We don't need a translation table if the body of the function hasn't
75     // changed
76     if (Function.isIgnored() || (!BC.HasRelocations && !Function.isSimple()))
77       continue;
78 
79     LLVM_DEBUG(dbgs() << "Function name: " << Function.getPrintName() << "\n");
80     LLVM_DEBUG(dbgs() << " Address reference: 0x"
81                       << Twine::utohexstr(Function.getOutputAddress()) << "\n");
82 
83     MapTy Map;
84     for (const BinaryBasicBlock *const BB :
85          Function.getLayout().getMainFragment())
86       writeEntriesForBB(Map, *BB, Function.getOutputAddress());
87     Maps.emplace(Function.getOutputAddress(), std::move(Map));
88 
89     if (!Function.isSplit())
90       continue;
91 
92     // Split maps
93     LLVM_DEBUG(dbgs() << " Cold part\n");
94     for (const FunctionFragment &FF :
95          Function.getLayout().getSplitFragments()) {
96       Map.clear();
97       for (const BinaryBasicBlock *const BB : FF)
98         writeEntriesForBB(Map, *BB, FF.getAddress());
99 
100       Maps.emplace(FF.getAddress(), std::move(Map));
101       ColdPartSource.emplace(FF.getAddress(), Function.getOutputAddress());
102     }
103   }
104 
105   // Output addresses are delta-encoded
106   uint64_t PrevAddress = 0;
107   writeMaps</*Cold=*/false>(Maps, PrevAddress, OS);
108   writeMaps</*Cold=*/true>(Maps, PrevAddress, OS);
109 
110   outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
111 }
112 
113 template <bool Cold>
114 void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
115                                        uint64_t &PrevAddress, raw_ostream &OS) {
116   const uint32_t NumFuncs =
117       llvm::count_if(llvm::make_first_range(Maps), [&](const uint64_t Address) {
118         return Cold == ColdPartSource.count(Address);
119       });
120   encodeULEB128(NumFuncs, OS);
121   LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << (Cold ? " cold" : "")
122                     << " functions for BAT.\n");
123   size_t PrevIndex = 0;
124   for (auto &MapEntry : Maps) {
125     const uint64_t Address = MapEntry.first;
126     // Only process cold fragments in cold mode, and vice versa.
127     if (Cold != ColdPartSource.count(Address))
128       continue;
129     MapTy &Map = MapEntry.second;
130     const uint32_t NumEntries = Map.size();
131     LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
132                       << Twine::utohexstr(Address) << ".\n");
133     encodeULEB128(Address - PrevAddress, OS);
134     PrevAddress = Address;
135     if (Cold) {
136       size_t HotIndex =
137           std::distance(ColdPartSource.begin(), ColdPartSource.find(Address));
138       encodeULEB128(HotIndex - PrevIndex, OS);
139       PrevIndex = HotIndex;
140     }
141     encodeULEB128(NumEntries, OS);
142     uint64_t InOffset = 0;
143     // Output and Input addresses and delta-encoded
144     for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
145       const uint64_t OutputAddress = KeyVal.first + Address;
146       encodeULEB128(OutputAddress - PrevAddress, OS);
147       PrevAddress = OutputAddress;
148       encodeSLEB128(KeyVal.second - InOffset, OS);
149       InOffset = KeyVal.second;
150     }
151   }
152 }
153 
154 std::error_code BoltAddressTranslation::parse(StringRef Buf) {
155   DataExtractor DE = DataExtractor(Buf, true, 8);
156   uint64_t Offset = 0;
157   if (Buf.size() < 12)
158     return make_error_code(llvm::errc::io_error);
159 
160   const uint32_t NameSz = DE.getU32(&Offset);
161   const uint32_t DescSz = DE.getU32(&Offset);
162   const uint32_t Type = DE.getU32(&Offset);
163 
164   if (Type != BinarySection::NT_BOLT_BAT ||
165       Buf.size() + Offset < alignTo(NameSz, 4) + DescSz)
166     return make_error_code(llvm::errc::io_error);
167 
168   StringRef Name = Buf.slice(Offset, Offset + NameSz);
169   Offset = alignTo(Offset + NameSz, 4);
170   if (Name.substr(0, 4) != "BOLT")
171     return make_error_code(llvm::errc::io_error);
172 
173   Error Err(Error::success());
174   std::vector<uint64_t> HotFuncs;
175   uint64_t PrevAddress = 0;
176   parseMaps</*Cold=*/false>(HotFuncs, PrevAddress, DE, Offset, Err);
177   parseMaps</*Cold=*/true>(HotFuncs, PrevAddress, DE, Offset, Err);
178   outs() << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n";
179   return errorToErrorCode(std::move(Err));
180 }
181 
182 template <bool Cold>
183 void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
184                                        uint64_t &PrevAddress, DataExtractor &DE,
185                                        uint64_t &Offset, Error &Err) {
186   const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err);
187   LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << (Cold ? " cold" : "")
188                     << " functions\n");
189   size_t HotIndex = 0;
190   for (uint32_t I = 0; I < NumFunctions; ++I) {
191     const uint64_t Address = PrevAddress + DE.getULEB128(&Offset, &Err);
192     PrevAddress = Address;
193     if (Cold) {
194       HotIndex += DE.getULEB128(&Offset, &Err);
195       ColdPartSource.emplace(Address, HotFuncs[HotIndex]);
196     } else {
197       HotFuncs.push_back(Address);
198     }
199     const uint32_t NumEntries = DE.getULEB128(&Offset, &Err);
200     MapTy Map;
201 
202     LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
203                       << Twine::utohexstr(Address) << "\n");
204     uint64_t InputOffset = 0;
205     for (uint32_t J = 0; J < NumEntries; ++J) {
206       const uint64_t OutputDelta = DE.getULEB128(&Offset, &Err);
207       const uint64_t OutputAddress = PrevAddress + OutputDelta;
208       const uint64_t OutputOffset = OutputAddress - Address;
209       PrevAddress = OutputAddress;
210       const int64_t InputDelta = DE.getSLEB128(&Offset, &Err);
211       InputOffset += InputDelta;
212       Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset));
213       LLVM_DEBUG(
214           dbgs() << formatv("{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}\n",
215                             OutputOffset, InputOffset, OutputDelta,
216                             encodeULEB128(OutputDelta, nulls()), InputDelta,
217                             encodeSLEB128(InputDelta, nulls()), OutputAddress));
218     }
219     Maps.insert(std::pair<uint64_t, MapTy>(Address, Map));
220   }
221 }
222 
223 void BoltAddressTranslation::dump(raw_ostream &OS) {
224   const size_t NumTables = Maps.size();
225   OS << "BAT tables for " << NumTables << " functions:\n";
226   for (const auto &MapEntry : Maps) {
227     OS << "Function Address: 0x" << Twine::utohexstr(MapEntry.first) << "\n";
228     OS << "BB mappings:\n";
229     for (const auto &Entry : MapEntry.second) {
230       const bool IsBranch = Entry.second & BRANCHENTRY;
231       const uint32_t Val = Entry.second >> 1; // dropping BRANCHENTRY bit
232       OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
233          << "0x" << Twine::utohexstr(Val);
234       if (IsBranch)
235         OS << " (branch)";
236       OS << "\n";
237     }
238     OS << "\n";
239   }
240   const size_t NumColdParts = ColdPartSource.size();
241   if (!NumColdParts)
242     return;
243 
244   OS << NumColdParts << " cold mappings:\n";
245   for (const auto &Entry : ColdPartSource) {
246     OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
247        << Twine::utohexstr(Entry.second) << "\n";
248   }
249   OS << "\n";
250 }
251 
252 uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress,
253                                            uint64_t Offset,
254                                            bool IsBranchSrc) const {
255   auto Iter = Maps.find(FuncAddress);
256   if (Iter == Maps.end())
257     return Offset;
258 
259   const MapTy &Map = Iter->second;
260   auto KeyVal = Map.upper_bound(Offset);
261   if (KeyVal == Map.begin())
262     return Offset;
263 
264   --KeyVal;
265 
266   const uint32_t Val = KeyVal->second >> 1; // dropping BRANCHENTRY bit
267   // Branch source addresses are translated to the first instruction of the
268   // source BB to avoid accounting for modifications BOLT may have made in the
269   // BB regarding deletion/addition of instructions.
270   if (IsBranchSrc)
271     return Val;
272   return Offset - KeyVal->first + Val;
273 }
274 
275 std::optional<BoltAddressTranslation::FallthroughListTy>
276 BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress,
277                                                uint64_t From,
278                                                uint64_t To) const {
279   SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
280 
281   // Filter out trivial case
282   if (From >= To)
283     return Res;
284 
285   From -= FuncAddress;
286   To -= FuncAddress;
287 
288   auto Iter = Maps.find(FuncAddress);
289   if (Iter == Maps.end())
290     return std::nullopt;
291 
292   const MapTy &Map = Iter->second;
293   auto FromIter = Map.upper_bound(From);
294   if (FromIter == Map.begin())
295     return Res;
296   // Skip instruction entries, to create fallthroughs we are only interested in
297   // BB boundaries
298   do {
299     if (FromIter == Map.begin())
300       return Res;
301     --FromIter;
302   } while (FromIter->second & BRANCHENTRY);
303 
304   auto ToIter = Map.upper_bound(To);
305   if (ToIter == Map.begin())
306     return Res;
307   --ToIter;
308   if (FromIter->first >= ToIter->first)
309     return Res;
310 
311   for (auto Iter = FromIter; Iter != ToIter;) {
312     const uint32_t Src = Iter->first;
313     if (Iter->second & BRANCHENTRY) {
314       ++Iter;
315       continue;
316     }
317 
318     ++Iter;
319     while (Iter->second & BRANCHENTRY && Iter != ToIter)
320       ++Iter;
321     if (Iter->second & BRANCHENTRY)
322       break;
323     Res.emplace_back(Src, Iter->first);
324   }
325 
326   return Res;
327 }
328 
329 uint64_t BoltAddressTranslation::fetchParentAddress(uint64_t Address) const {
330   auto Iter = ColdPartSource.find(Address);
331   if (Iter == ColdPartSource.end())
332     return 0;
333   return Iter->second;
334 }
335 
336 bool BoltAddressTranslation::enabledFor(
337     llvm::object::ELFObjectFileBase *InputFile) const {
338   for (const SectionRef &Section : InputFile->sections()) {
339     Expected<StringRef> SectionNameOrErr = Section.getName();
340     if (Error E = SectionNameOrErr.takeError())
341       continue;
342 
343     if (SectionNameOrErr.get() == SECTION_NAME)
344       return true;
345   }
346   return false;
347 }
348 } // namespace bolt
349 } // namespace llvm
350