xref: /llvm-project/bolt/lib/Profile/BoltAddressTranslation.cpp (revision f66d631bf8dc0fe33c6ba88c3dc7f00ac5946065)
1 //===- bolt/Profile/BoltAddressTranslation.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Profile/BoltAddressTranslation.h"
10 #include "bolt/Core/BinaryFunction.h"
11 #include "llvm/ADT/APInt.h"
12 #include "llvm/Support/Errc.h"
13 #include "llvm/Support/Error.h"
14 #include "llvm/Support/LEB128.h"
15 
16 #define DEBUG_TYPE "bolt-bat"
17 
18 namespace llvm {
19 namespace bolt {
20 
21 const char *BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";
22 
23 void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
24                                                const BinaryBasicBlock &BB,
25                                                uint64_t FuncAddress) {
26   uint64_t HotFuncAddress = ColdPartSource.count(FuncAddress)
27                                 ? ColdPartSource[FuncAddress]
28                                 : FuncAddress;
29   const uint64_t BBOutputOffset =
30       BB.getOutputAddressRange().first - FuncAddress;
31   const uint32_t BBInputOffset = BB.getInputOffset();
32 
33   // Every output BB must track back to an input BB for profile collection
34   // in bolted binaries. If we are missing an offset, it means this block was
35   // created by a pass. We will skip writing any entries for it, and this means
36   // any traffic happening in this block will map to the previous block in the
37   // layout. This covers the case where an input basic block is split into two,
38   // and the second one lacks any offset.
39   if (BBInputOffset == BinaryBasicBlock::INVALID_OFFSET)
40     return;
41 
42   LLVM_DEBUG(dbgs() << "BB " << BB.getName() << "\n");
43   LLVM_DEBUG(dbgs() << "  Key: " << Twine::utohexstr(BBOutputOffset)
44                     << " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
45   LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n",
46                                getBBHash(HotFuncAddress, BBInputOffset)));
47   (void)HotFuncAddress;
48   // In case of conflicts (same Key mapping to different Vals), the last
49   // update takes precedence. Of course it is not ideal to have conflicts and
50   // those happen when we have an empty BB that either contained only
51   // NOPs or a jump to the next block (successor). Either way, the successor
52   // and this deleted block will both share the same output address (the same
53   // key), and we need to map back. We choose here to privilege the successor by
54   // allowing it to overwrite the previously inserted key in the map.
55   Map[BBOutputOffset] = BBInputOffset << 1;
56 
57   const auto &IOAddressMap =
58       BB.getFunction()->getBinaryContext().getIOAddressMap();
59 
60   for (const auto &[InputOffset, Sym] : BB.getLocSyms()) {
61     const auto InputAddress = BB.getFunction()->getAddress() + InputOffset;
62     const auto OutputAddress = IOAddressMap.lookup(InputAddress);
63     assert(OutputAddress && "Unknown instruction address");
64     const auto OutputOffset = *OutputAddress - FuncAddress;
65 
66     // Is this the first instruction in the BB? No need to duplicate the entry.
67     if (OutputOffset == BBOutputOffset)
68       continue;
69 
70     LLVM_DEBUG(dbgs() << "  Key: " << Twine::utohexstr(OutputOffset) << " Val: "
71                       << Twine::utohexstr(InputOffset) << " (branch)\n");
72     Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset,
73                                              (InputOffset << 1) | BRANCHENTRY));
74   }
75 }
76 
77 void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
78   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Writing BOLT Address Translation Tables\n");
79   for (auto &BFI : BC.getBinaryFunctions()) {
80     const BinaryFunction &Function = BFI.second;
81     const uint64_t InputAddress = Function.getAddress();
82     const uint64_t OutputAddress = Function.getOutputAddress();
83     // We don't need a translation table if the body of the function hasn't
84     // changed
85     if (Function.isIgnored() || (!BC.HasRelocations && !Function.isSimple()))
86       continue;
87 
88     // TBD: handle BAT functions w/multiple entry points.
89     if (Function.isMultiEntry())
90       continue;
91 
92     LLVM_DEBUG(dbgs() << "Function name: " << Function.getPrintName() << "\n");
93     LLVM_DEBUG(dbgs() << " Address reference: 0x"
94                       << Twine::utohexstr(Function.getOutputAddress()) << "\n");
95     LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n", getBFHash(OutputAddress)));
96 
97     MapTy Map;
98     for (const BinaryBasicBlock *const BB :
99          Function.getLayout().getMainFragment())
100       writeEntriesForBB(Map, *BB, Function.getOutputAddress());
101     Maps.emplace(Function.getOutputAddress(), std::move(Map));
102     ReverseMap.emplace(OutputAddress, InputAddress);
103 
104     if (!Function.isSplit())
105       continue;
106 
107     // Split maps
108     LLVM_DEBUG(dbgs() << " Cold part\n");
109     for (const FunctionFragment &FF :
110          Function.getLayout().getSplitFragments()) {
111       ColdPartSource.emplace(FF.getAddress(), Function.getOutputAddress());
112       Map.clear();
113       for (const BinaryBasicBlock *const BB : FF)
114         writeEntriesForBB(Map, *BB, FF.getAddress());
115 
116       Maps.emplace(FF.getAddress(), std::move(Map));
117     }
118   }
119 
120   // Output addresses are delta-encoded
121   uint64_t PrevAddress = 0;
122   writeMaps</*Cold=*/false>(Maps, PrevAddress, OS);
123   writeMaps</*Cold=*/true>(Maps, PrevAddress, OS);
124 
125   BC.outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
126   const uint64_t NumBBHashes = std::accumulate(
127       FuncHashes.begin(), FuncHashes.end(), 0ull,
128       [](size_t Acc, const auto &B) { return Acc + B.second.second.size(); });
129   BC.outs() << "BOLT-INFO: Wrote " << FuncHashes.size() << " function and "
130             << NumBBHashes << " basic block hashes\n";
131 }
132 
133 APInt BoltAddressTranslation::calculateBranchEntriesBitMask(MapTy &Map,
134                                                             size_t EqualElems) {
135   APInt BitMask(alignTo(EqualElems, 8), 0);
136   size_t Index = 0;
137   for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
138     if (Index == EqualElems)
139       break;
140     const uint32_t OutputOffset = KeyVal.second;
141     if (OutputOffset & BRANCHENTRY)
142       BitMask.setBit(Index);
143     ++Index;
144   }
145   return BitMask;
146 }
147 
148 size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map) const {
149   size_t EqualOffsets = 0;
150   for (const std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
151     const uint32_t OutputOffset = KeyVal.first;
152     const uint32_t InputOffset = KeyVal.second >> 1;
153     if (OutputOffset == InputOffset)
154       ++EqualOffsets;
155     else
156       break;
157   }
158   return EqualOffsets;
159 }
160 
161 template <bool Cold>
162 void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
163                                        uint64_t &PrevAddress, raw_ostream &OS) {
164   const uint32_t NumFuncs =
165       llvm::count_if(llvm::make_first_range(Maps), [&](const uint64_t Address) {
166         return Cold == ColdPartSource.count(Address);
167       });
168   encodeULEB128(NumFuncs, OS);
169   LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << (Cold ? " cold" : "")
170                     << " functions for BAT.\n");
171   size_t PrevIndex = 0;
172   for (auto &MapEntry : Maps) {
173     const uint64_t Address = MapEntry.first;
174     // Only process cold fragments in cold mode, and vice versa.
175     if (Cold != ColdPartSource.count(Address))
176       continue;
177     // NB: here we use the input address because hashes are saved early (in
178     // `saveMetadata`) before output addresses are assigned.
179     const uint64_t HotInputAddress =
180         ReverseMap[Cold ? ColdPartSource[Address] : Address];
181     std::pair<size_t, BBHashMap> &FuncHashPair = FuncHashes[HotInputAddress];
182     MapTy &Map = MapEntry.second;
183     const uint32_t NumEntries = Map.size();
184     LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
185                       << Twine::utohexstr(Address) << ".\n");
186     encodeULEB128(Address - PrevAddress, OS);
187     PrevAddress = Address;
188     if (Cold) {
189       size_t HotIndex =
190           std::distance(ColdPartSource.begin(), ColdPartSource.find(Address));
191       encodeULEB128(HotIndex - PrevIndex, OS);
192       PrevIndex = HotIndex;
193     } else {
194       // Function hash
195       LLVM_DEBUG(dbgs() << "Hash: " << formatv("{0:x}\n", FuncHashPair.first));
196       OS.write(reinterpret_cast<char *>(&FuncHashPair.first), 8);
197     }
198     encodeULEB128(NumEntries, OS);
199     // For hot fragments only: encode the number of equal offsets
200     // (output = input) in the beginning of the function. Only encode one offset
201     // in these cases.
202     const size_t EqualElems = Cold ? 0 : getNumEqualOffsets(Map);
203     if (!Cold) {
204       encodeULEB128(EqualElems, OS);
205       if (EqualElems) {
206         const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
207         APInt BranchEntries = calculateBranchEntriesBitMask(Map, EqualElems);
208         OS.write(reinterpret_cast<const char *>(BranchEntries.getRawData()),
209                  BranchEntriesBytes);
210         LLVM_DEBUG({
211           dbgs() << "BranchEntries: ";
212           SmallString<8> BitMaskStr;
213           BranchEntries.toString(BitMaskStr, 2, false);
214           dbgs() << BitMaskStr << '\n';
215         });
216       }
217     }
218     size_t Index = 0;
219     uint64_t InOffset = 0;
220     // Output and Input addresses and delta-encoded
221     for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
222       const uint64_t OutputAddress = KeyVal.first + Address;
223       encodeULEB128(OutputAddress - PrevAddress, OS);
224       PrevAddress = OutputAddress;
225       if (Index++ >= EqualElems)
226         encodeSLEB128(KeyVal.second - InOffset, OS);
227       InOffset = KeyVal.second; // Keeping InOffset as if BRANCHENTRY is encoded
228       if ((InOffset & BRANCHENTRY) == 0) {
229         // Basic block hash
230         size_t BBHash = FuncHashPair.second[InOffset >> 1];
231         OS.write(reinterpret_cast<char *>(&BBHash), 8);
232         LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x}\n", KeyVal.first,
233                                      InOffset >> 1, BBHash));
234       }
235     }
236   }
237 }
238 
239 std::error_code BoltAddressTranslation::parse(raw_ostream &OS, StringRef Buf) {
240   DataExtractor DE = DataExtractor(Buf, true, 8);
241   uint64_t Offset = 0;
242   if (Buf.size() < 12)
243     return make_error_code(llvm::errc::io_error);
244 
245   const uint32_t NameSz = DE.getU32(&Offset);
246   const uint32_t DescSz = DE.getU32(&Offset);
247   const uint32_t Type = DE.getU32(&Offset);
248 
249   if (Type != BinarySection::NT_BOLT_BAT ||
250       Buf.size() + Offset < alignTo(NameSz, 4) + DescSz)
251     return make_error_code(llvm::errc::io_error);
252 
253   StringRef Name = Buf.slice(Offset, Offset + NameSz);
254   Offset = alignTo(Offset + NameSz, 4);
255   if (Name.substr(0, 4) != "BOLT")
256     return make_error_code(llvm::errc::io_error);
257 
258   Error Err(Error::success());
259   std::vector<uint64_t> HotFuncs;
260   uint64_t PrevAddress = 0;
261   parseMaps</*Cold=*/false>(HotFuncs, PrevAddress, DE, Offset, Err);
262   parseMaps</*Cold=*/true>(HotFuncs, PrevAddress, DE, Offset, Err);
263   OS << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n";
264   return errorToErrorCode(std::move(Err));
265 }
266 
267 template <bool Cold>
268 void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
269                                        uint64_t &PrevAddress, DataExtractor &DE,
270                                        uint64_t &Offset, Error &Err) {
271   const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err);
272   LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << (Cold ? " cold" : "")
273                     << " functions\n");
274   size_t HotIndex = 0;
275   for (uint32_t I = 0; I < NumFunctions; ++I) {
276     const uint64_t Address = PrevAddress + DE.getULEB128(&Offset, &Err);
277     uint64_t HotAddress = Cold ? 0 : Address;
278     PrevAddress = Address;
279     if (Cold) {
280       HotIndex += DE.getULEB128(&Offset, &Err);
281       HotAddress = HotFuncs[HotIndex];
282       ColdPartSource.emplace(Address, HotAddress);
283     } else {
284       HotFuncs.push_back(Address);
285       // Function hash
286       const size_t FuncHash = DE.getU64(&Offset, &Err);
287       FuncHashes[Address].first = FuncHash;
288       LLVM_DEBUG(dbgs() << formatv("{0:x}: hash {1:x}\n", Address, FuncHash));
289     }
290     const uint32_t NumEntries = DE.getULEB128(&Offset, &Err);
291     // Equal offsets, hot fragments only.
292     size_t EqualElems = 0;
293     APInt BEBitMask;
294     if (!Cold) {
295       EqualElems = DE.getULEB128(&Offset, &Err);
296       LLVM_DEBUG(dbgs() << formatv("Equal offsets: {0}, {1} bytes\n",
297                                    EqualElems, getULEB128Size(EqualElems)));
298       if (EqualElems) {
299         const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
300         BEBitMask = APInt(alignTo(EqualElems, 8), 0);
301         LoadIntFromMemory(
302             BEBitMask,
303             reinterpret_cast<const uint8_t *>(
304                 DE.getBytes(&Offset, BranchEntriesBytes, &Err).data()),
305             BranchEntriesBytes);
306         LLVM_DEBUG({
307           dbgs() << "BEBitMask: ";
308           SmallString<8> BitMaskStr;
309           BEBitMask.toString(BitMaskStr, 2, false);
310           dbgs() << BitMaskStr << ", " << BranchEntriesBytes << " bytes\n";
311         });
312       }
313     }
314     MapTy Map;
315 
316     LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
317                       << Twine::utohexstr(Address) << "\n");
318     uint64_t InputOffset = 0;
319     for (uint32_t J = 0; J < NumEntries; ++J) {
320       const uint64_t OutputDelta = DE.getULEB128(&Offset, &Err);
321       const uint64_t OutputAddress = PrevAddress + OutputDelta;
322       const uint64_t OutputOffset = OutputAddress - Address;
323       PrevAddress = OutputAddress;
324       int64_t InputDelta = 0;
325       if (J < EqualElems) {
326         InputOffset = (OutputOffset << 1) | BEBitMask[J];
327       } else {
328         InputDelta = DE.getSLEB128(&Offset, &Err);
329         InputOffset += InputDelta;
330       }
331       Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset));
332       size_t BBHash = 0;
333       const bool IsBranchEntry = InputOffset & BRANCHENTRY;
334       if (!IsBranchEntry) {
335         BBHash = DE.getU64(&Offset, &Err);
336         // Map basic block hash to hot fragment by input offset
337         FuncHashes[HotAddress].second.emplace(InputOffset >> 1, BBHash);
338       }
339       LLVM_DEBUG({
340         dbgs() << formatv(
341             "{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}", OutputOffset,
342             InputOffset, OutputDelta, getULEB128Size(OutputDelta), InputDelta,
343             (J < EqualElems) ? 0 : getSLEB128Size(InputDelta), OutputAddress);
344         if (BBHash)
345           dbgs() << formatv(" {0:x}", BBHash);
346         dbgs() << '\n';
347       });
348     }
349     Maps.insert(std::pair<uint64_t, MapTy>(Address, Map));
350   }
351 }
352 
353 void BoltAddressTranslation::dump(raw_ostream &OS) {
354   const size_t NumTables = Maps.size();
355   OS << "BAT tables for " << NumTables << " functions:\n";
356   for (const auto &MapEntry : Maps) {
357     const uint64_t Address = MapEntry.first;
358     const uint64_t HotAddress = fetchParentAddress(Address);
359     OS << "Function Address: 0x" << Twine::utohexstr(Address);
360     if (HotAddress == 0)
361       OS << formatv(", hash: {0:x}", getBFHash(Address));
362     OS << "\n";
363     OS << "BB mappings:\n";
364     for (const auto &Entry : MapEntry.second) {
365       const bool IsBranch = Entry.second & BRANCHENTRY;
366       const uint32_t Val = Entry.second >> 1; // dropping BRANCHENTRY bit
367       OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
368          << "0x" << Twine::utohexstr(Val);
369       if (IsBranch)
370         OS << " (branch)";
371       else
372         OS << formatv(" hash: {0:x}",
373                       getBBHash(HotAddress ? HotAddress : Address, Val));
374       OS << "\n";
375     }
376     OS << "\n";
377   }
378   const size_t NumColdParts = ColdPartSource.size();
379   if (!NumColdParts)
380     return;
381 
382   OS << NumColdParts << " cold mappings:\n";
383   for (const auto &Entry : ColdPartSource) {
384     OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
385        << Twine::utohexstr(Entry.second) << "\n";
386   }
387   OS << "\n";
388 }
389 
390 uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress,
391                                            uint64_t Offset,
392                                            bool IsBranchSrc) const {
393   auto Iter = Maps.find(FuncAddress);
394   if (Iter == Maps.end())
395     return Offset;
396 
397   const MapTy &Map = Iter->second;
398   auto KeyVal = Map.upper_bound(Offset);
399   if (KeyVal == Map.begin())
400     return Offset;
401 
402   --KeyVal;
403 
404   const uint32_t Val = KeyVal->second >> 1; // dropping BRANCHENTRY bit
405   // Branch source addresses are translated to the first instruction of the
406   // source BB to avoid accounting for modifications BOLT may have made in the
407   // BB regarding deletion/addition of instructions.
408   if (IsBranchSrc)
409     return Val;
410   return Offset - KeyVal->first + Val;
411 }
412 
413 std::optional<BoltAddressTranslation::FallthroughListTy>
414 BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress,
415                                                uint64_t From,
416                                                uint64_t To) const {
417   SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
418 
419   // Filter out trivial case
420   if (From >= To)
421     return Res;
422 
423   From -= FuncAddress;
424   To -= FuncAddress;
425 
426   auto Iter = Maps.find(FuncAddress);
427   if (Iter == Maps.end())
428     return std::nullopt;
429 
430   const MapTy &Map = Iter->second;
431   auto FromIter = Map.upper_bound(From);
432   if (FromIter == Map.begin())
433     return Res;
434   // Skip instruction entries, to create fallthroughs we are only interested in
435   // BB boundaries
436   do {
437     if (FromIter == Map.begin())
438       return Res;
439     --FromIter;
440   } while (FromIter->second & BRANCHENTRY);
441 
442   auto ToIter = Map.upper_bound(To);
443   if (ToIter == Map.begin())
444     return Res;
445   --ToIter;
446   if (FromIter->first >= ToIter->first)
447     return Res;
448 
449   for (auto Iter = FromIter; Iter != ToIter;) {
450     const uint32_t Src = Iter->first;
451     if (Iter->second & BRANCHENTRY) {
452       ++Iter;
453       continue;
454     }
455 
456     ++Iter;
457     while (Iter->second & BRANCHENTRY && Iter != ToIter)
458       ++Iter;
459     if (Iter->second & BRANCHENTRY)
460       break;
461     Res.emplace_back(Src, Iter->first);
462   }
463 
464   return Res;
465 }
466 
467 uint64_t BoltAddressTranslation::fetchParentAddress(uint64_t Address) const {
468   auto Iter = ColdPartSource.find(Address);
469   if (Iter == ColdPartSource.end())
470     return 0;
471   return Iter->second;
472 }
473 
474 bool BoltAddressTranslation::enabledFor(
475     llvm::object::ELFObjectFileBase *InputFile) const {
476   for (const SectionRef &Section : InputFile->sections()) {
477     Expected<StringRef> SectionNameOrErr = Section.getName();
478     if (Error E = SectionNameOrErr.takeError())
479       continue;
480 
481     if (SectionNameOrErr.get() == SECTION_NAME)
482       return true;
483   }
484   return false;
485 }
486 
487 void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
488   for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
489     // We don't need a translation table if the body of the function hasn't
490     // changed
491     if (BF.isIgnored() || (!BC.HasRelocations && !BF.isSimple()))
492       continue;
493     // Prepare function and block hashes
494     FuncHashes[BF.getAddress()].first = BF.computeHash();
495     BF.computeBlockHashes();
496     for (const BinaryBasicBlock &BB : BF)
497       FuncHashes[BF.getAddress()].second.emplace(BB.getInputOffset(),
498                                                  BB.getHash());
499   }
500 }
501 
502 size_t BoltAddressTranslation::getBBHash(uint64_t FuncOutputAddress,
503                                          uint32_t BBInputOffset) const {
504   return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset);
505 }
506 
507 size_t BoltAddressTranslation::getBFHash(uint64_t OutputAddress) const {
508   return FuncHashes.at(OutputAddress).first;
509 }
510 
511 } // namespace bolt
512 } // namespace llvm
513