xref: /llvm-project/bolt/lib/Profile/BoltAddressTranslation.cpp (revision 52cf07116bf0a8cab87b0f55176d198bcaa02575)
1 //===- bolt/Profile/BoltAddressTranslation.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Profile/BoltAddressTranslation.h"
10 #include "bolt/Core/BinaryFunction.h"
11 #include "llvm/ADT/APInt.h"
12 #include "llvm/Support/Errc.h"
13 #include "llvm/Support/Error.h"
14 #include "llvm/Support/LEB128.h"
15 
16 #define DEBUG_TYPE "bolt-bat"
17 
18 namespace llvm {
19 namespace bolt {
20 
21 const char *BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";
22 
23 void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
24                                                const BinaryBasicBlock &BB,
25                                                uint64_t FuncAddress) {
26   const uint64_t BBOutputOffset =
27       BB.getOutputAddressRange().first - FuncAddress;
28   const uint32_t BBInputOffset = BB.getInputOffset();
29 
30   // Every output BB must track back to an input BB for profile collection
31   // in bolted binaries. If we are missing an offset, it means this block was
32   // created by a pass. We will skip writing any entries for it, and this means
33   // any traffic happening in this block will map to the previous block in the
34   // layout. This covers the case where an input basic block is split into two,
35   // and the second one lacks any offset.
36   if (BBInputOffset == BinaryBasicBlock::INVALID_OFFSET)
37     return;
38 
39   LLVM_DEBUG(dbgs() << "BB " << BB.getName() << "\n");
40   LLVM_DEBUG(dbgs() << "  Key: " << Twine::utohexstr(BBOutputOffset)
41                     << " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
42   // In case of conflicts (same Key mapping to different Vals), the last
43   // update takes precedence. Of course it is not ideal to have conflicts and
44   // those happen when we have an empty BB that either contained only
45   // NOPs or a jump to the next block (successor). Either way, the successor
46   // and this deleted block will both share the same output address (the same
47   // key), and we need to map back. We choose here to privilege the successor by
48   // allowing it to overwrite the previously inserted key in the map.
49   Map[BBOutputOffset] = BBInputOffset << 1;
50 
51   const auto &IOAddressMap =
52       BB.getFunction()->getBinaryContext().getIOAddressMap();
53 
54   for (const auto &[InputOffset, Sym] : BB.getLocSyms()) {
55     const auto InputAddress = BB.getFunction()->getAddress() + InputOffset;
56     const auto OutputAddress = IOAddressMap.lookup(InputAddress);
57     assert(OutputAddress && "Unknown instruction address");
58     const auto OutputOffset = *OutputAddress - FuncAddress;
59 
60     // Is this the first instruction in the BB? No need to duplicate the entry.
61     if (OutputOffset == BBOutputOffset)
62       continue;
63 
64     LLVM_DEBUG(dbgs() << "  Key: " << Twine::utohexstr(OutputOffset) << " Val: "
65                       << Twine::utohexstr(InputOffset) << " (branch)\n");
66     Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset,
67                                              (InputOffset << 1) | BRANCHENTRY));
68   }
69 }
70 
71 void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
72   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Writing BOLT Address Translation Tables\n");
73   for (auto &BFI : BC.getBinaryFunctions()) {
74     const BinaryFunction &Function = BFI.second;
75     // We don't need a translation table if the body of the function hasn't
76     // changed
77     if (Function.isIgnored() || (!BC.HasRelocations && !Function.isSimple()))
78       continue;
79 
80     LLVM_DEBUG(dbgs() << "Function name: " << Function.getPrintName() << "\n");
81     LLVM_DEBUG(dbgs() << " Address reference: 0x"
82                       << Twine::utohexstr(Function.getOutputAddress()) << "\n");
83 
84     MapTy Map;
85     for (const BinaryBasicBlock *const BB :
86          Function.getLayout().getMainFragment())
87       writeEntriesForBB(Map, *BB, Function.getOutputAddress());
88     Maps.emplace(Function.getOutputAddress(), std::move(Map));
89 
90     if (!Function.isSplit())
91       continue;
92 
93     // Split maps
94     LLVM_DEBUG(dbgs() << " Cold part\n");
95     for (const FunctionFragment &FF :
96          Function.getLayout().getSplitFragments()) {
97       Map.clear();
98       for (const BinaryBasicBlock *const BB : FF)
99         writeEntriesForBB(Map, *BB, FF.getAddress());
100 
101       Maps.emplace(FF.getAddress(), std::move(Map));
102       ColdPartSource.emplace(FF.getAddress(), Function.getOutputAddress());
103     }
104   }
105 
106   // Output addresses are delta-encoded
107   uint64_t PrevAddress = 0;
108   writeMaps</*Cold=*/false>(Maps, PrevAddress, OS);
109   writeMaps</*Cold=*/true>(Maps, PrevAddress, OS);
110 
111   BC.outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
112 }
113 
114 APInt BoltAddressTranslation::calculateBranchEntriesBitMask(MapTy &Map,
115                                                             size_t EqualElems) {
116   APInt BitMask(alignTo(EqualElems, 8), 0);
117   size_t Index = 0;
118   for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
119     if (Index == EqualElems)
120       break;
121     const uint32_t OutputOffset = KeyVal.second;
122     if (OutputOffset & BRANCHENTRY)
123       BitMask.setBit(Index);
124     ++Index;
125   }
126   return BitMask;
127 }
128 
129 size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map) const {
130   size_t EqualOffsets = 0;
131   for (const std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
132     const uint32_t OutputOffset = KeyVal.first;
133     const uint32_t InputOffset = KeyVal.second >> 1;
134     if (OutputOffset == InputOffset)
135       ++EqualOffsets;
136     else
137       break;
138   }
139   return EqualOffsets;
140 }
141 
142 template <bool Cold>
143 void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
144                                        uint64_t &PrevAddress, raw_ostream &OS) {
145   const uint32_t NumFuncs =
146       llvm::count_if(llvm::make_first_range(Maps), [&](const uint64_t Address) {
147         return Cold == ColdPartSource.count(Address);
148       });
149   encodeULEB128(NumFuncs, OS);
150   LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << (Cold ? " cold" : "")
151                     << " functions for BAT.\n");
152   size_t PrevIndex = 0;
153   for (auto &MapEntry : Maps) {
154     const uint64_t Address = MapEntry.first;
155     // Only process cold fragments in cold mode, and vice versa.
156     if (Cold != ColdPartSource.count(Address))
157       continue;
158     MapTy &Map = MapEntry.second;
159     const uint32_t NumEntries = Map.size();
160     LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
161                       << Twine::utohexstr(Address) << ".\n");
162     encodeULEB128(Address - PrevAddress, OS);
163     PrevAddress = Address;
164     if (Cold) {
165       size_t HotIndex =
166           std::distance(ColdPartSource.begin(), ColdPartSource.find(Address));
167       encodeULEB128(HotIndex - PrevIndex, OS);
168       PrevIndex = HotIndex;
169     }
170     encodeULEB128(NumEntries, OS);
171     // For hot fragments only: encode the number of equal offsets
172     // (output = input) in the beginning of the function. Only encode one offset
173     // in these cases.
174     const size_t EqualElems = Cold ? 0 : getNumEqualOffsets(Map);
175     if (!Cold) {
176       encodeULEB128(EqualElems, OS);
177       if (EqualElems) {
178         const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
179         APInt BranchEntries = calculateBranchEntriesBitMask(Map, EqualElems);
180         OS.write(reinterpret_cast<const char *>(BranchEntries.getRawData()),
181                  BranchEntriesBytes);
182         LLVM_DEBUG({
183           dbgs() << "BranchEntries: ";
184           SmallString<8> BitMaskStr;
185           BranchEntries.toString(BitMaskStr, 2, false);
186           dbgs() << BitMaskStr << '\n';
187         });
188       }
189     }
190     size_t Index = 0;
191     uint64_t InOffset = 0;
192     // Output and Input addresses and delta-encoded
193     for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
194       const uint64_t OutputAddress = KeyVal.first + Address;
195       encodeULEB128(OutputAddress - PrevAddress, OS);
196       PrevAddress = OutputAddress;
197       if (Index++ >= EqualElems)
198         encodeSLEB128(KeyVal.second - InOffset, OS);
199       InOffset = KeyVal.second; // Keeping InOffset as if BRANCHENTRY is encoded
200     }
201   }
202 }
203 
204 std::error_code BoltAddressTranslation::parse(raw_ostream &OS, StringRef Buf) {
205   DataExtractor DE = DataExtractor(Buf, true, 8);
206   uint64_t Offset = 0;
207   if (Buf.size() < 12)
208     return make_error_code(llvm::errc::io_error);
209 
210   const uint32_t NameSz = DE.getU32(&Offset);
211   const uint32_t DescSz = DE.getU32(&Offset);
212   const uint32_t Type = DE.getU32(&Offset);
213 
214   if (Type != BinarySection::NT_BOLT_BAT ||
215       Buf.size() + Offset < alignTo(NameSz, 4) + DescSz)
216     return make_error_code(llvm::errc::io_error);
217 
218   StringRef Name = Buf.slice(Offset, Offset + NameSz);
219   Offset = alignTo(Offset + NameSz, 4);
220   if (Name.substr(0, 4) != "BOLT")
221     return make_error_code(llvm::errc::io_error);
222 
223   Error Err(Error::success());
224   std::vector<uint64_t> HotFuncs;
225   uint64_t PrevAddress = 0;
226   parseMaps</*Cold=*/false>(HotFuncs, PrevAddress, DE, Offset, Err);
227   parseMaps</*Cold=*/true>(HotFuncs, PrevAddress, DE, Offset, Err);
228   OS << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n";
229   return errorToErrorCode(std::move(Err));
230 }
231 
232 template <bool Cold>
233 void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
234                                        uint64_t &PrevAddress, DataExtractor &DE,
235                                        uint64_t &Offset, Error &Err) {
236   const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err);
237   LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << (Cold ? " cold" : "")
238                     << " functions\n");
239   size_t HotIndex = 0;
240   for (uint32_t I = 0; I < NumFunctions; ++I) {
241     const uint64_t Address = PrevAddress + DE.getULEB128(&Offset, &Err);
242     PrevAddress = Address;
243     if (Cold) {
244       HotIndex += DE.getULEB128(&Offset, &Err);
245       ColdPartSource.emplace(Address, HotFuncs[HotIndex]);
246     } else {
247       HotFuncs.push_back(Address);
248     }
249     const uint32_t NumEntries = DE.getULEB128(&Offset, &Err);
250     // Equal offsets, hot fragments only.
251     size_t EqualElems = 0;
252     APInt BEBitMask;
253     if (!Cold) {
254       EqualElems = DE.getULEB128(&Offset, &Err);
255       LLVM_DEBUG(dbgs() << formatv("Equal offsets: {0}, {1} bytes\n",
256                                    EqualElems, getULEB128Size(EqualElems)));
257       if (EqualElems) {
258         const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
259         BEBitMask = APInt(alignTo(EqualElems, 8), 0);
260         LoadIntFromMemory(
261             BEBitMask,
262             reinterpret_cast<const uint8_t *>(
263                 DE.getBytes(&Offset, BranchEntriesBytes, &Err).data()),
264             BranchEntriesBytes);
265         LLVM_DEBUG({
266           dbgs() << "BEBitMask: ";
267           SmallString<8> BitMaskStr;
268           BEBitMask.toString(BitMaskStr, 2, false);
269           dbgs() << BitMaskStr << ", " << BranchEntriesBytes << " bytes\n";
270         });
271       }
272     }
273     MapTy Map;
274 
275     LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
276                       << Twine::utohexstr(Address) << "\n");
277     uint64_t InputOffset = 0;
278     for (uint32_t J = 0; J < NumEntries; ++J) {
279       const uint64_t OutputDelta = DE.getULEB128(&Offset, &Err);
280       const uint64_t OutputAddress = PrevAddress + OutputDelta;
281       const uint64_t OutputOffset = OutputAddress - Address;
282       PrevAddress = OutputAddress;
283       int64_t InputDelta = 0;
284       if (J < EqualElems) {
285         InputOffset = (OutputOffset << 1) | BEBitMask[J];
286       } else {
287         InputDelta = DE.getSLEB128(&Offset, &Err);
288         InputOffset += InputDelta;
289       }
290       Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset));
291       LLVM_DEBUG(
292           dbgs() << formatv("{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}\n",
293                             OutputOffset, InputOffset, OutputDelta,
294                             getULEB128Size(OutputDelta), InputDelta,
295                             (J < EqualElems) ? 0 : getSLEB128Size(InputDelta),
296                             OutputAddress));
297     }
298     Maps.insert(std::pair<uint64_t, MapTy>(Address, Map));
299   }
300 }
301 
302 void BoltAddressTranslation::dump(raw_ostream &OS) {
303   const size_t NumTables = Maps.size();
304   OS << "BAT tables for " << NumTables << " functions:\n";
305   for (const auto &MapEntry : Maps) {
306     OS << "Function Address: 0x" << Twine::utohexstr(MapEntry.first) << "\n";
307     OS << "BB mappings:\n";
308     for (const auto &Entry : MapEntry.second) {
309       const bool IsBranch = Entry.second & BRANCHENTRY;
310       const uint32_t Val = Entry.second >> 1; // dropping BRANCHENTRY bit
311       OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
312          << "0x" << Twine::utohexstr(Val);
313       if (IsBranch)
314         OS << " (branch)";
315       OS << "\n";
316     }
317     OS << "\n";
318   }
319   const size_t NumColdParts = ColdPartSource.size();
320   if (!NumColdParts)
321     return;
322 
323   OS << NumColdParts << " cold mappings:\n";
324   for (const auto &Entry : ColdPartSource) {
325     OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
326        << Twine::utohexstr(Entry.second) << "\n";
327   }
328   OS << "\n";
329 }
330 
331 uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress,
332                                            uint64_t Offset,
333                                            bool IsBranchSrc) const {
334   auto Iter = Maps.find(FuncAddress);
335   if (Iter == Maps.end())
336     return Offset;
337 
338   const MapTy &Map = Iter->second;
339   auto KeyVal = Map.upper_bound(Offset);
340   if (KeyVal == Map.begin())
341     return Offset;
342 
343   --KeyVal;
344 
345   const uint32_t Val = KeyVal->second >> 1; // dropping BRANCHENTRY bit
346   // Branch source addresses are translated to the first instruction of the
347   // source BB to avoid accounting for modifications BOLT may have made in the
348   // BB regarding deletion/addition of instructions.
349   if (IsBranchSrc)
350     return Val;
351   return Offset - KeyVal->first + Val;
352 }
353 
354 std::optional<BoltAddressTranslation::FallthroughListTy>
355 BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress,
356                                                uint64_t From,
357                                                uint64_t To) const {
358   SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
359 
360   // Filter out trivial case
361   if (From >= To)
362     return Res;
363 
364   From -= FuncAddress;
365   To -= FuncAddress;
366 
367   auto Iter = Maps.find(FuncAddress);
368   if (Iter == Maps.end())
369     return std::nullopt;
370 
371   const MapTy &Map = Iter->second;
372   auto FromIter = Map.upper_bound(From);
373   if (FromIter == Map.begin())
374     return Res;
375   // Skip instruction entries, to create fallthroughs we are only interested in
376   // BB boundaries
377   do {
378     if (FromIter == Map.begin())
379       return Res;
380     --FromIter;
381   } while (FromIter->second & BRANCHENTRY);
382 
383   auto ToIter = Map.upper_bound(To);
384   if (ToIter == Map.begin())
385     return Res;
386   --ToIter;
387   if (FromIter->first >= ToIter->first)
388     return Res;
389 
390   for (auto Iter = FromIter; Iter != ToIter;) {
391     const uint32_t Src = Iter->first;
392     if (Iter->second & BRANCHENTRY) {
393       ++Iter;
394       continue;
395     }
396 
397     ++Iter;
398     while (Iter->second & BRANCHENTRY && Iter != ToIter)
399       ++Iter;
400     if (Iter->second & BRANCHENTRY)
401       break;
402     Res.emplace_back(Src, Iter->first);
403   }
404 
405   return Res;
406 }
407 
408 uint64_t BoltAddressTranslation::fetchParentAddress(uint64_t Address) const {
409   auto Iter = ColdPartSource.find(Address);
410   if (Iter == ColdPartSource.end())
411     return 0;
412   return Iter->second;
413 }
414 
415 bool BoltAddressTranslation::enabledFor(
416     llvm::object::ELFObjectFileBase *InputFile) const {
417   for (const SectionRef &Section : InputFile->sections()) {
418     Expected<StringRef> SectionNameOrErr = Section.getName();
419     if (Error E = SectionNameOrErr.takeError())
420       continue;
421 
422     if (SectionNameOrErr.get() == SECTION_NAME)
423       return true;
424   }
425   return false;
426 }
427 } // namespace bolt
428 } // namespace llvm
429