xref: /llvm-project/bolt/lib/Profile/BoltAddressTranslation.cpp (revision 3b3de48fd84b8269d5f45ee0a9dc6b7448368424)
1 //===- bolt/Profile/BoltAddressTranslation.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Profile/BoltAddressTranslation.h"
10 #include "bolt/Core/BinaryFunction.h"
11 #include "llvm/ADT/APInt.h"
12 #include "llvm/Support/Errc.h"
13 #include "llvm/Support/Error.h"
14 #include "llvm/Support/LEB128.h"
15 
16 #define DEBUG_TYPE "bolt-bat"
17 
18 namespace llvm {
19 namespace bolt {
20 
21 const char *BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";
22 
23 void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
24                                                const BinaryBasicBlock &BB,
25                                                uint64_t FuncAddress) {
26   uint64_t HotFuncAddress = ColdPartSource.count(FuncAddress)
27                                 ? ColdPartSource[FuncAddress]
28                                 : FuncAddress;
29   const uint64_t BBOutputOffset =
30       BB.getOutputAddressRange().first - FuncAddress;
31   const uint32_t BBInputOffset = BB.getInputOffset();
32 
33   // Every output BB must track back to an input BB for profile collection
34   // in bolted binaries. If we are missing an offset, it means this block was
35   // created by a pass. We will skip writing any entries for it, and this means
36   // any traffic happening in this block will map to the previous block in the
37   // layout. This covers the case where an input basic block is split into two,
38   // and the second one lacks any offset.
39   if (BBInputOffset == BinaryBasicBlock::INVALID_OFFSET)
40     return;
41 
42   LLVM_DEBUG(dbgs() << "BB " << BB.getName() << "\n");
43   LLVM_DEBUG(dbgs() << "  Key: " << Twine::utohexstr(BBOutputOffset)
44                     << " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
45   LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n",
46                                getBBHash(HotFuncAddress, BBInputOffset)));
47   (void)HotFuncAddress;
48   LLVM_DEBUG(dbgs() << formatv(" Index: {0}\n",
49                                getBBIndex(HotFuncAddress, BBInputOffset)));
50   // In case of conflicts (same Key mapping to different Vals), the last
51   // update takes precedence. Of course it is not ideal to have conflicts and
52   // those happen when we have an empty BB that either contained only
53   // NOPs or a jump to the next block (successor). Either way, the successor
54   // and this deleted block will both share the same output address (the same
55   // key), and we need to map back. We choose here to privilege the successor by
56   // allowing it to overwrite the previously inserted key in the map.
57   Map[BBOutputOffset] = BBInputOffset << 1;
58 
59   const auto &IOAddressMap =
60       BB.getFunction()->getBinaryContext().getIOAddressMap();
61 
62   for (const auto &[InputOffset, Sym] : BB.getLocSyms()) {
63     const auto InputAddress = BB.getFunction()->getAddress() + InputOffset;
64     const auto OutputAddress = IOAddressMap.lookup(InputAddress);
65     assert(OutputAddress && "Unknown instruction address");
66     const auto OutputOffset = *OutputAddress - FuncAddress;
67 
68     // Is this the first instruction in the BB? No need to duplicate the entry.
69     if (OutputOffset == BBOutputOffset)
70       continue;
71 
72     LLVM_DEBUG(dbgs() << "  Key: " << Twine::utohexstr(OutputOffset) << " Val: "
73                       << Twine::utohexstr(InputOffset) << " (branch)\n");
74     Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset,
75                                              (InputOffset << 1) | BRANCHENTRY));
76   }
77 }
78 
79 void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
80   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Writing BOLT Address Translation Tables\n");
81   for (auto &BFI : BC.getBinaryFunctions()) {
82     const BinaryFunction &Function = BFI.second;
83     const uint64_t InputAddress = Function.getAddress();
84     const uint64_t OutputAddress = Function.getOutputAddress();
85     // We don't need a translation table if the body of the function hasn't
86     // changed
87     if (Function.isIgnored() || (!BC.HasRelocations && !Function.isSimple()))
88       continue;
89 
90     // TBD: handle BAT functions w/multiple entry points.
91     if (Function.isMultiEntry())
92       continue;
93 
94     LLVM_DEBUG(dbgs() << "Function name: " << Function.getPrintName() << "\n");
95     LLVM_DEBUG(dbgs() << " Address reference: 0x"
96                       << Twine::utohexstr(Function.getOutputAddress()) << "\n");
97     LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n", getBFHash(OutputAddress)));
98 
99     MapTy Map;
100     for (const BinaryBasicBlock *const BB :
101          Function.getLayout().getMainFragment())
102       writeEntriesForBB(Map, *BB, Function.getOutputAddress());
103     Maps.emplace(Function.getOutputAddress(), std::move(Map));
104     ReverseMap.emplace(OutputAddress, InputAddress);
105 
106     if (!Function.isSplit())
107       continue;
108 
109     // Split maps
110     LLVM_DEBUG(dbgs() << " Cold part\n");
111     for (const FunctionFragment &FF :
112          Function.getLayout().getSplitFragments()) {
113       ColdPartSource.emplace(FF.getAddress(), Function.getOutputAddress());
114       Map.clear();
115       for (const BinaryBasicBlock *const BB : FF)
116         writeEntriesForBB(Map, *BB, FF.getAddress());
117 
118       Maps.emplace(FF.getAddress(), std::move(Map));
119     }
120   }
121 
122   // Output addresses are delta-encoded
123   uint64_t PrevAddress = 0;
124   writeMaps</*Cold=*/false>(Maps, PrevAddress, OS);
125   writeMaps</*Cold=*/true>(Maps, PrevAddress, OS);
126 
127   BC.outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
128   const uint64_t NumBBHashes = std::accumulate(
129       FuncHashes.begin(), FuncHashes.end(), 0ull,
130       [](size_t Acc, const auto &B) { return Acc + B.second.second.size(); });
131   BC.outs() << "BOLT-INFO: Wrote " << FuncHashes.size() << " function and "
132             << NumBBHashes << " basic block hashes\n";
133 }
134 
135 APInt BoltAddressTranslation::calculateBranchEntriesBitMask(MapTy &Map,
136                                                             size_t EqualElems) {
137   APInt BitMask(alignTo(EqualElems, 8), 0);
138   size_t Index = 0;
139   for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
140     if (Index == EqualElems)
141       break;
142     const uint32_t OutputOffset = KeyVal.second;
143     if (OutputOffset & BRANCHENTRY)
144       BitMask.setBit(Index);
145     ++Index;
146   }
147   return BitMask;
148 }
149 
150 size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map) const {
151   size_t EqualOffsets = 0;
152   for (const std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
153     const uint32_t OutputOffset = KeyVal.first;
154     const uint32_t InputOffset = KeyVal.second >> 1;
155     if (OutputOffset == InputOffset)
156       ++EqualOffsets;
157     else
158       break;
159   }
160   return EqualOffsets;
161 }
162 
163 template <bool Cold>
164 void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
165                                        uint64_t &PrevAddress, raw_ostream &OS) {
166   const uint32_t NumFuncs =
167       llvm::count_if(llvm::make_first_range(Maps), [&](const uint64_t Address) {
168         return Cold == ColdPartSource.count(Address);
169       });
170   encodeULEB128(NumFuncs, OS);
171   LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << (Cold ? " cold" : "")
172                     << " functions for BAT.\n");
173   size_t PrevIndex = 0;
174   for (auto &MapEntry : Maps) {
175     const uint64_t Address = MapEntry.first;
176     // Only process cold fragments in cold mode, and vice versa.
177     if (Cold != ColdPartSource.count(Address))
178       continue;
179     // NB: here we use the input address because hashes are saved early (in
180     // `saveMetadata`) before output addresses are assigned.
181     const uint64_t HotInputAddress =
182         ReverseMap[Cold ? ColdPartSource[Address] : Address];
183     std::pair<size_t, BBHashMap> &FuncHashPair = FuncHashes[HotInputAddress];
184     MapTy &Map = MapEntry.second;
185     const uint32_t NumEntries = Map.size();
186     LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
187                       << Twine::utohexstr(Address) << ".\n");
188     encodeULEB128(Address - PrevAddress, OS);
189     PrevAddress = Address;
190     if (Cold) {
191       size_t HotIndex =
192           std::distance(ColdPartSource.begin(), ColdPartSource.find(Address));
193       encodeULEB128(HotIndex - PrevIndex, OS);
194       PrevIndex = HotIndex;
195     } else {
196       // Function hash
197       LLVM_DEBUG(dbgs() << "Hash: " << formatv("{0:x}\n", FuncHashPair.first));
198       OS.write(reinterpret_cast<char *>(&FuncHashPair.first), 8);
199     }
200     encodeULEB128(NumEntries, OS);
201     // For hot fragments only: encode the number of equal offsets
202     // (output = input) in the beginning of the function. Only encode one offset
203     // in these cases.
204     const size_t EqualElems = Cold ? 0 : getNumEqualOffsets(Map);
205     if (!Cold) {
206       encodeULEB128(EqualElems, OS);
207       if (EqualElems) {
208         const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
209         APInt BranchEntries = calculateBranchEntriesBitMask(Map, EqualElems);
210         OS.write(reinterpret_cast<const char *>(BranchEntries.getRawData()),
211                  BranchEntriesBytes);
212         LLVM_DEBUG({
213           dbgs() << "BranchEntries: ";
214           SmallString<8> BitMaskStr;
215           BranchEntries.toString(BitMaskStr, 2, false);
216           dbgs() << BitMaskStr << '\n';
217         });
218       }
219     }
220     size_t Index = 0;
221     uint64_t InOffset = 0;
222     size_t PrevBBIndex = 0;
223     // Output and Input addresses and delta-encoded
224     for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
225       const uint64_t OutputAddress = KeyVal.first + Address;
226       encodeULEB128(OutputAddress - PrevAddress, OS);
227       PrevAddress = OutputAddress;
228       if (Index++ >= EqualElems)
229         encodeSLEB128(KeyVal.second - InOffset, OS);
230       InOffset = KeyVal.second; // Keeping InOffset as if BRANCHENTRY is encoded
231       if ((InOffset & BRANCHENTRY) == 0) {
232         unsigned BBIndex;
233         size_t BBHash;
234         std::tie(BBIndex, BBHash) = FuncHashPair.second[InOffset >> 1];
235         OS.write(reinterpret_cast<char *>(&BBHash), 8);
236         // Basic block index in the input binary
237         encodeULEB128(BBIndex - PrevBBIndex, OS);
238         PrevBBIndex = BBIndex;
239         LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x} {3}\n", KeyVal.first,
240                                      InOffset >> 1, BBHash, BBIndex));
241       }
242     }
243   }
244 }
245 
246 std::error_code BoltAddressTranslation::parse(raw_ostream &OS, StringRef Buf) {
247   DataExtractor DE = DataExtractor(Buf, true, 8);
248   uint64_t Offset = 0;
249   if (Buf.size() < 12)
250     return make_error_code(llvm::errc::io_error);
251 
252   const uint32_t NameSz = DE.getU32(&Offset);
253   const uint32_t DescSz = DE.getU32(&Offset);
254   const uint32_t Type = DE.getU32(&Offset);
255 
256   if (Type != BinarySection::NT_BOLT_BAT ||
257       Buf.size() + Offset < alignTo(NameSz, 4) + DescSz)
258     return make_error_code(llvm::errc::io_error);
259 
260   StringRef Name = Buf.slice(Offset, Offset + NameSz);
261   Offset = alignTo(Offset + NameSz, 4);
262   if (Name.substr(0, 4) != "BOLT")
263     return make_error_code(llvm::errc::io_error);
264 
265   Error Err(Error::success());
266   std::vector<uint64_t> HotFuncs;
267   uint64_t PrevAddress = 0;
268   parseMaps</*Cold=*/false>(HotFuncs, PrevAddress, DE, Offset, Err);
269   parseMaps</*Cold=*/true>(HotFuncs, PrevAddress, DE, Offset, Err);
270   OS << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n";
271   return errorToErrorCode(std::move(Err));
272 }
273 
274 template <bool Cold>
275 void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
276                                        uint64_t &PrevAddress, DataExtractor &DE,
277                                        uint64_t &Offset, Error &Err) {
278   const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err);
279   LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << (Cold ? " cold" : "")
280                     << " functions\n");
281   size_t HotIndex = 0;
282   for (uint32_t I = 0; I < NumFunctions; ++I) {
283     const uint64_t Address = PrevAddress + DE.getULEB128(&Offset, &Err);
284     uint64_t HotAddress = Cold ? 0 : Address;
285     PrevAddress = Address;
286     if (Cold) {
287       HotIndex += DE.getULEB128(&Offset, &Err);
288       HotAddress = HotFuncs[HotIndex];
289       ColdPartSource.emplace(Address, HotAddress);
290     } else {
291       HotFuncs.push_back(Address);
292       // Function hash
293       const size_t FuncHash = DE.getU64(&Offset, &Err);
294       FuncHashes[Address].first = FuncHash;
295       LLVM_DEBUG(dbgs() << formatv("{0:x}: hash {1:x}\n", Address, FuncHash));
296     }
297     const uint32_t NumEntries = DE.getULEB128(&Offset, &Err);
298     // Equal offsets, hot fragments only.
299     size_t EqualElems = 0;
300     APInt BEBitMask;
301     if (!Cold) {
302       EqualElems = DE.getULEB128(&Offset, &Err);
303       LLVM_DEBUG(dbgs() << formatv("Equal offsets: {0}, {1} bytes\n",
304                                    EqualElems, getULEB128Size(EqualElems)));
305       if (EqualElems) {
306         const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
307         BEBitMask = APInt(alignTo(EqualElems, 8), 0);
308         LoadIntFromMemory(
309             BEBitMask,
310             reinterpret_cast<const uint8_t *>(
311                 DE.getBytes(&Offset, BranchEntriesBytes, &Err).data()),
312             BranchEntriesBytes);
313         LLVM_DEBUG({
314           dbgs() << "BEBitMask: ";
315           SmallString<8> BitMaskStr;
316           BEBitMask.toString(BitMaskStr, 2, false);
317           dbgs() << BitMaskStr << ", " << BranchEntriesBytes << " bytes\n";
318         });
319       }
320     }
321     MapTy Map;
322 
323     LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
324                       << Twine::utohexstr(Address) << "\n");
325     uint64_t InputOffset = 0;
326     size_t BBIndex = 0;
327     for (uint32_t J = 0; J < NumEntries; ++J) {
328       const uint64_t OutputDelta = DE.getULEB128(&Offset, &Err);
329       const uint64_t OutputAddress = PrevAddress + OutputDelta;
330       const uint64_t OutputOffset = OutputAddress - Address;
331       PrevAddress = OutputAddress;
332       int64_t InputDelta = 0;
333       if (J < EqualElems) {
334         InputOffset = (OutputOffset << 1) | BEBitMask[J];
335       } else {
336         InputDelta = DE.getSLEB128(&Offset, &Err);
337         InputOffset += InputDelta;
338       }
339       Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset));
340       size_t BBHash = 0;
341       size_t BBIndexDelta = 0;
342       const bool IsBranchEntry = InputOffset & BRANCHENTRY;
343       if (!IsBranchEntry) {
344         BBHash = DE.getU64(&Offset, &Err);
345         BBIndexDelta = DE.getULEB128(&Offset, &Err);
346         BBIndex += BBIndexDelta;
347         // Map basic block hash to hot fragment by input offset
348         FuncHashes[HotAddress].second.emplace(InputOffset >> 1,
349                                               std::pair(BBIndex, BBHash));
350       }
351       LLVM_DEBUG({
352         dbgs() << formatv(
353             "{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}", OutputOffset,
354             InputOffset, OutputDelta, getULEB128Size(OutputDelta), InputDelta,
355             (J < EqualElems) ? 0 : getSLEB128Size(InputDelta), OutputAddress);
356         if (!IsBranchEntry) {
357           dbgs() << formatv(" {0:x} {1}/{2}b", BBHash, BBIndex,
358                             getULEB128Size(BBIndexDelta));
359         }
360         dbgs() << '\n';
361       });
362     }
363     Maps.insert(std::pair<uint64_t, MapTy>(Address, Map));
364   }
365 }
366 
367 void BoltAddressTranslation::dump(raw_ostream &OS) {
368   const size_t NumTables = Maps.size();
369   OS << "BAT tables for " << NumTables << " functions:\n";
370   for (const auto &MapEntry : Maps) {
371     const uint64_t Address = MapEntry.first;
372     const uint64_t HotAddress = fetchParentAddress(Address);
373     OS << "Function Address: 0x" << Twine::utohexstr(Address);
374     if (HotAddress == 0)
375       OS << formatv(", hash: {0:x}", getBFHash(Address));
376     OS << "\n";
377     OS << "BB mappings:\n";
378     for (const auto &Entry : MapEntry.second) {
379       const bool IsBranch = Entry.second & BRANCHENTRY;
380       const uint32_t Val = Entry.second >> 1; // dropping BRANCHENTRY bit
381       OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
382          << "0x" << Twine::utohexstr(Val);
383       if (IsBranch)
384         OS << " (branch)";
385       else
386         OS << formatv(" hash: {0:x}",
387                       getBBHash(HotAddress ? HotAddress : Address, Val));
388       OS << "\n";
389     }
390     OS << "\n";
391   }
392   const size_t NumColdParts = ColdPartSource.size();
393   if (!NumColdParts)
394     return;
395 
396   OS << NumColdParts << " cold mappings:\n";
397   for (const auto &Entry : ColdPartSource) {
398     OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
399        << Twine::utohexstr(Entry.second) << "\n";
400   }
401   OS << "\n";
402 }
403 
404 uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress,
405                                            uint64_t Offset,
406                                            bool IsBranchSrc) const {
407   auto Iter = Maps.find(FuncAddress);
408   if (Iter == Maps.end())
409     return Offset;
410 
411   const MapTy &Map = Iter->second;
412   auto KeyVal = Map.upper_bound(Offset);
413   if (KeyVal == Map.begin())
414     return Offset;
415 
416   --KeyVal;
417 
418   const uint32_t Val = KeyVal->second >> 1; // dropping BRANCHENTRY bit
419   // Branch source addresses are translated to the first instruction of the
420   // source BB to avoid accounting for modifications BOLT may have made in the
421   // BB regarding deletion/addition of instructions.
422   if (IsBranchSrc)
423     return Val;
424   return Offset - KeyVal->first + Val;
425 }
426 
427 std::optional<BoltAddressTranslation::FallthroughListTy>
428 BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress,
429                                                uint64_t From,
430                                                uint64_t To) const {
431   SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
432 
433   // Filter out trivial case
434   if (From >= To)
435     return Res;
436 
437   From -= FuncAddress;
438   To -= FuncAddress;
439 
440   auto Iter = Maps.find(FuncAddress);
441   if (Iter == Maps.end())
442     return std::nullopt;
443 
444   const MapTy &Map = Iter->second;
445   auto FromIter = Map.upper_bound(From);
446   if (FromIter == Map.begin())
447     return Res;
448   // Skip instruction entries, to create fallthroughs we are only interested in
449   // BB boundaries
450   do {
451     if (FromIter == Map.begin())
452       return Res;
453     --FromIter;
454   } while (FromIter->second & BRANCHENTRY);
455 
456   auto ToIter = Map.upper_bound(To);
457   if (ToIter == Map.begin())
458     return Res;
459   --ToIter;
460   if (FromIter->first >= ToIter->first)
461     return Res;
462 
463   for (auto Iter = FromIter; Iter != ToIter;) {
464     const uint32_t Src = Iter->first;
465     if (Iter->second & BRANCHENTRY) {
466       ++Iter;
467       continue;
468     }
469 
470     ++Iter;
471     while (Iter->second & BRANCHENTRY && Iter != ToIter)
472       ++Iter;
473     if (Iter->second & BRANCHENTRY)
474       break;
475     Res.emplace_back(Src, Iter->first);
476   }
477 
478   return Res;
479 }
480 
481 uint64_t BoltAddressTranslation::fetchParentAddress(uint64_t Address) const {
482   auto Iter = ColdPartSource.find(Address);
483   if (Iter == ColdPartSource.end())
484     return 0;
485   return Iter->second;
486 }
487 
488 bool BoltAddressTranslation::enabledFor(
489     llvm::object::ELFObjectFileBase *InputFile) const {
490   for (const SectionRef &Section : InputFile->sections()) {
491     Expected<StringRef> SectionNameOrErr = Section.getName();
492     if (Error E = SectionNameOrErr.takeError())
493       continue;
494 
495     if (SectionNameOrErr.get() == SECTION_NAME)
496       return true;
497   }
498   return false;
499 }
500 
501 void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
502   for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
503     // We don't need a translation table if the body of the function hasn't
504     // changed
505     if (BF.isIgnored() || (!BC.HasRelocations && !BF.isSimple()))
506       continue;
507     // Prepare function and block hashes
508     FuncHashes[BF.getAddress()].first = BF.computeHash();
509     BF.computeBlockHashes();
510     for (const BinaryBasicBlock &BB : BF)
511       FuncHashes[BF.getAddress()].second.emplace(
512           BB.getInputOffset(), std::pair(BB.getIndex(), BB.getHash()));
513   }
514 }
515 
516 unsigned BoltAddressTranslation::getBBIndex(uint64_t FuncOutputAddress,
517                                             uint32_t BBInputOffset) const {
518   return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).first;
519 }
520 
521 size_t BoltAddressTranslation::getBBHash(uint64_t FuncOutputAddress,
522                                          uint32_t BBInputOffset) const {
523   return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).second;
524 }
525 
526 size_t BoltAddressTranslation::getBFHash(uint64_t OutputAddress) const {
527   return FuncHashes.at(OutputAddress).first;
528 }
529 
530 } // namespace bolt
531 } // namespace llvm
532