xref: /llvm-project/bolt/lib/Profile/BoltAddressTranslation.cpp (revision ad00e7e5ed5ab050151c115b627e11a8e3960e25)
1 //===- bolt/Profile/BoltAddressTranslation.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Profile/BoltAddressTranslation.h"
10 #include "bolt/Core/BinaryFunction.h"
11 #include "llvm/ADT/APInt.h"
12 #include "llvm/Support/Errc.h"
13 #include "llvm/Support/Error.h"
14 #include "llvm/Support/LEB128.h"
15 
16 #define DEBUG_TYPE "bolt-bat"
17 
18 namespace llvm {
19 namespace bolt {
20 
21 const char *BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";
22 
23 void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
24                                                const BinaryBasicBlock &BB,
25                                                uint64_t FuncAddress) {
26   uint64_t HotFuncAddress = ColdPartSource.count(FuncAddress)
27                                 ? ColdPartSource[FuncAddress]
28                                 : FuncAddress;
29   const uint64_t BBOutputOffset =
30       BB.getOutputAddressRange().first - FuncAddress;
31   const uint32_t BBInputOffset = BB.getInputOffset();
32 
33   // Every output BB must track back to an input BB for profile collection
34   // in bolted binaries. If we are missing an offset, it means this block was
35   // created by a pass. We will skip writing any entries for it, and this means
36   // any traffic happening in this block will map to the previous block in the
37   // layout. This covers the case where an input basic block is split into two,
38   // and the second one lacks any offset.
39   if (BBInputOffset == BinaryBasicBlock::INVALID_OFFSET)
40     return;
41 
42   LLVM_DEBUG(dbgs() << "BB " << BB.getName() << "\n");
43   LLVM_DEBUG(dbgs() << "  Key: " << Twine::utohexstr(BBOutputOffset)
44                     << " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
45   LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n",
46                                getBBHash(HotFuncAddress, BBInputOffset)));
47   // In case of conflicts (same Key mapping to different Vals), the last
48   // update takes precedence. Of course it is not ideal to have conflicts and
49   // those happen when we have an empty BB that either contained only
50   // NOPs or a jump to the next block (successor). Either way, the successor
51   // and this deleted block will both share the same output address (the same
52   // key), and we need to map back. We choose here to privilege the successor by
53   // allowing it to overwrite the previously inserted key in the map.
54   Map[BBOutputOffset] = BBInputOffset << 1;
55 
56   const auto &IOAddressMap =
57       BB.getFunction()->getBinaryContext().getIOAddressMap();
58 
59   for (const auto &[InputOffset, Sym] : BB.getLocSyms()) {
60     const auto InputAddress = BB.getFunction()->getAddress() + InputOffset;
61     const auto OutputAddress = IOAddressMap.lookup(InputAddress);
62     assert(OutputAddress && "Unknown instruction address");
63     const auto OutputOffset = *OutputAddress - FuncAddress;
64 
65     // Is this the first instruction in the BB? No need to duplicate the entry.
66     if (OutputOffset == BBOutputOffset)
67       continue;
68 
69     LLVM_DEBUG(dbgs() << "  Key: " << Twine::utohexstr(OutputOffset) << " Val: "
70                       << Twine::utohexstr(InputOffset) << " (branch)\n");
71     Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset,
72                                              (InputOffset << 1) | BRANCHENTRY));
73   }
74 }
75 
76 void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
77   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Writing BOLT Address Translation Tables\n");
78   for (auto &BFI : BC.getBinaryFunctions()) {
79     const BinaryFunction &Function = BFI.second;
80     const uint64_t InputAddress = Function.getAddress();
81     const uint64_t OutputAddress = Function.getOutputAddress();
82     // We don't need a translation table if the body of the function hasn't
83     // changed
84     if (Function.isIgnored() || (!BC.HasRelocations && !Function.isSimple()))
85       continue;
86 
87     // TBD: handle BAT functions w/multiple entry points.
88     if (Function.isMultiEntry())
89       continue;
90 
91     LLVM_DEBUG(dbgs() << "Function name: " << Function.getPrintName() << "\n");
92     LLVM_DEBUG(dbgs() << " Address reference: 0x"
93                       << Twine::utohexstr(Function.getOutputAddress()) << "\n");
94     LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n", getBFHash(OutputAddress)));
95 
96     MapTy Map;
97     for (const BinaryBasicBlock *const BB :
98          Function.getLayout().getMainFragment())
99       writeEntriesForBB(Map, *BB, Function.getOutputAddress());
100     Maps.emplace(Function.getOutputAddress(), std::move(Map));
101     ReverseMap.emplace(OutputAddress, InputAddress);
102 
103     if (!Function.isSplit())
104       continue;
105 
106     // Split maps
107     LLVM_DEBUG(dbgs() << " Cold part\n");
108     for (const FunctionFragment &FF :
109          Function.getLayout().getSplitFragments()) {
110       ColdPartSource.emplace(FF.getAddress(), Function.getOutputAddress());
111       Map.clear();
112       for (const BinaryBasicBlock *const BB : FF)
113         writeEntriesForBB(Map, *BB, FF.getAddress());
114 
115       Maps.emplace(FF.getAddress(), std::move(Map));
116     }
117   }
118 
119   // Output addresses are delta-encoded
120   uint64_t PrevAddress = 0;
121   writeMaps</*Cold=*/false>(Maps, PrevAddress, OS);
122   writeMaps</*Cold=*/true>(Maps, PrevAddress, OS);
123 
124   BC.outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
125   const uint64_t NumBBHashes = std::accumulate(
126       FuncHashes.begin(), FuncHashes.end(), 0ull,
127       [](size_t Acc, const auto &B) { return Acc + B.second.second.size(); });
128   BC.outs() << "BOLT-INFO: Wrote " << FuncHashes.size() << " function and "
129             << NumBBHashes << " basic block hashes\n";
130 }
131 
132 APInt BoltAddressTranslation::calculateBranchEntriesBitMask(MapTy &Map,
133                                                             size_t EqualElems) {
134   APInt BitMask(alignTo(EqualElems, 8), 0);
135   size_t Index = 0;
136   for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
137     if (Index == EqualElems)
138       break;
139     const uint32_t OutputOffset = KeyVal.second;
140     if (OutputOffset & BRANCHENTRY)
141       BitMask.setBit(Index);
142     ++Index;
143   }
144   return BitMask;
145 }
146 
147 size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map) const {
148   size_t EqualOffsets = 0;
149   for (const std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
150     const uint32_t OutputOffset = KeyVal.first;
151     const uint32_t InputOffset = KeyVal.second >> 1;
152     if (OutputOffset == InputOffset)
153       ++EqualOffsets;
154     else
155       break;
156   }
157   return EqualOffsets;
158 }
159 
160 template <bool Cold>
161 void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
162                                        uint64_t &PrevAddress, raw_ostream &OS) {
163   const uint32_t NumFuncs =
164       llvm::count_if(llvm::make_first_range(Maps), [&](const uint64_t Address) {
165         return Cold == ColdPartSource.count(Address);
166       });
167   encodeULEB128(NumFuncs, OS);
168   LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << (Cold ? " cold" : "")
169                     << " functions for BAT.\n");
170   size_t PrevIndex = 0;
171   for (auto &MapEntry : Maps) {
172     const uint64_t Address = MapEntry.first;
173     // Only process cold fragments in cold mode, and vice versa.
174     if (Cold != ColdPartSource.count(Address))
175       continue;
176     // NB: here we use the input address because hashes are saved early (in
177     // `saveMetadata`) before output addresses are assigned.
178     const uint64_t HotInputAddress =
179         ReverseMap[Cold ? ColdPartSource[Address] : Address];
180     std::pair<size_t, BBHashMap> &FuncHashPair = FuncHashes[HotInputAddress];
181     MapTy &Map = MapEntry.second;
182     const uint32_t NumEntries = Map.size();
183     LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
184                       << Twine::utohexstr(Address) << ".\n");
185     encodeULEB128(Address - PrevAddress, OS);
186     PrevAddress = Address;
187     if (Cold) {
188       size_t HotIndex =
189           std::distance(ColdPartSource.begin(), ColdPartSource.find(Address));
190       encodeULEB128(HotIndex - PrevIndex, OS);
191       PrevIndex = HotIndex;
192     } else {
193       // Function hash
194       LLVM_DEBUG(dbgs() << "Hash: " << formatv("{0:x}\n", FuncHashPair.first));
195       OS.write(reinterpret_cast<char *>(&FuncHashPair.first), 8);
196     }
197     encodeULEB128(NumEntries, OS);
198     // For hot fragments only: encode the number of equal offsets
199     // (output = input) in the beginning of the function. Only encode one offset
200     // in these cases.
201     const size_t EqualElems = Cold ? 0 : getNumEqualOffsets(Map);
202     if (!Cold) {
203       encodeULEB128(EqualElems, OS);
204       if (EqualElems) {
205         const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
206         APInt BranchEntries = calculateBranchEntriesBitMask(Map, EqualElems);
207         OS.write(reinterpret_cast<const char *>(BranchEntries.getRawData()),
208                  BranchEntriesBytes);
209         LLVM_DEBUG({
210           dbgs() << "BranchEntries: ";
211           SmallString<8> BitMaskStr;
212           BranchEntries.toString(BitMaskStr, 2, false);
213           dbgs() << BitMaskStr << '\n';
214         });
215       }
216     }
217     size_t Index = 0;
218     uint64_t InOffset = 0;
219     // Output and Input addresses and delta-encoded
220     for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
221       const uint64_t OutputAddress = KeyVal.first + Address;
222       encodeULEB128(OutputAddress - PrevAddress, OS);
223       PrevAddress = OutputAddress;
224       if (Index++ >= EqualElems)
225         encodeSLEB128(KeyVal.second - InOffset, OS);
226       InOffset = KeyVal.second; // Keeping InOffset as if BRANCHENTRY is encoded
227       if ((InOffset & BRANCHENTRY) == 0) {
228         // Basic block hash
229         size_t BBHash = FuncHashPair.second[InOffset >> 1];
230         OS.write(reinterpret_cast<char *>(&BBHash), 8);
231         LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x}\n", KeyVal.first,
232                                      InOffset >> 1, BBHash));
233       }
234     }
235   }
236 }
237 
238 std::error_code BoltAddressTranslation::parse(raw_ostream &OS, StringRef Buf) {
239   DataExtractor DE = DataExtractor(Buf, true, 8);
240   uint64_t Offset = 0;
241   if (Buf.size() < 12)
242     return make_error_code(llvm::errc::io_error);
243 
244   const uint32_t NameSz = DE.getU32(&Offset);
245   const uint32_t DescSz = DE.getU32(&Offset);
246   const uint32_t Type = DE.getU32(&Offset);
247 
248   if (Type != BinarySection::NT_BOLT_BAT ||
249       Buf.size() + Offset < alignTo(NameSz, 4) + DescSz)
250     return make_error_code(llvm::errc::io_error);
251 
252   StringRef Name = Buf.slice(Offset, Offset + NameSz);
253   Offset = alignTo(Offset + NameSz, 4);
254   if (Name.substr(0, 4) != "BOLT")
255     return make_error_code(llvm::errc::io_error);
256 
257   Error Err(Error::success());
258   std::vector<uint64_t> HotFuncs;
259   uint64_t PrevAddress = 0;
260   parseMaps</*Cold=*/false>(HotFuncs, PrevAddress, DE, Offset, Err);
261   parseMaps</*Cold=*/true>(HotFuncs, PrevAddress, DE, Offset, Err);
262   OS << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n";
263   return errorToErrorCode(std::move(Err));
264 }
265 
266 template <bool Cold>
267 void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
268                                        uint64_t &PrevAddress, DataExtractor &DE,
269                                        uint64_t &Offset, Error &Err) {
270   const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err);
271   LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << (Cold ? " cold" : "")
272                     << " functions\n");
273   size_t HotIndex = 0;
274   for (uint32_t I = 0; I < NumFunctions; ++I) {
275     const uint64_t Address = PrevAddress + DE.getULEB128(&Offset, &Err);
276     uint64_t HotAddress = Cold ? 0 : Address;
277     PrevAddress = Address;
278     if (Cold) {
279       HotIndex += DE.getULEB128(&Offset, &Err);
280       HotAddress = HotFuncs[HotIndex];
281       ColdPartSource.emplace(Address, HotAddress);
282     } else {
283       HotFuncs.push_back(Address);
284       // Function hash
285       const size_t FuncHash = DE.getU64(&Offset, &Err);
286       FuncHashes[Address].first = FuncHash;
287       LLVM_DEBUG(dbgs() << formatv("{0:x}: hash {1:x}\n", Address, FuncHash));
288     }
289     const uint32_t NumEntries = DE.getULEB128(&Offset, &Err);
290     // Equal offsets, hot fragments only.
291     size_t EqualElems = 0;
292     APInt BEBitMask;
293     if (!Cold) {
294       EqualElems = DE.getULEB128(&Offset, &Err);
295       LLVM_DEBUG(dbgs() << formatv("Equal offsets: {0}, {1} bytes\n",
296                                    EqualElems, getULEB128Size(EqualElems)));
297       if (EqualElems) {
298         const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
299         BEBitMask = APInt(alignTo(EqualElems, 8), 0);
300         LoadIntFromMemory(
301             BEBitMask,
302             reinterpret_cast<const uint8_t *>(
303                 DE.getBytes(&Offset, BranchEntriesBytes, &Err).data()),
304             BranchEntriesBytes);
305         LLVM_DEBUG({
306           dbgs() << "BEBitMask: ";
307           SmallString<8> BitMaskStr;
308           BEBitMask.toString(BitMaskStr, 2, false);
309           dbgs() << BitMaskStr << ", " << BranchEntriesBytes << " bytes\n";
310         });
311       }
312     }
313     MapTy Map;
314 
315     LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
316                       << Twine::utohexstr(Address) << "\n");
317     uint64_t InputOffset = 0;
318     for (uint32_t J = 0; J < NumEntries; ++J) {
319       const uint64_t OutputDelta = DE.getULEB128(&Offset, &Err);
320       const uint64_t OutputAddress = PrevAddress + OutputDelta;
321       const uint64_t OutputOffset = OutputAddress - Address;
322       PrevAddress = OutputAddress;
323       int64_t InputDelta = 0;
324       if (J < EqualElems) {
325         InputOffset = (OutputOffset << 1) | BEBitMask[J];
326       } else {
327         InputDelta = DE.getSLEB128(&Offset, &Err);
328         InputOffset += InputDelta;
329       }
330       Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset));
331       size_t BBHash = 0;
332       const bool IsBranchEntry = InputOffset & BRANCHENTRY;
333       if (!IsBranchEntry) {
334         BBHash = DE.getU64(&Offset, &Err);
335         // Map basic block hash to hot fragment by input offset
336         FuncHashes[HotAddress].second.emplace(InputOffset >> 1, BBHash);
337       }
338       LLVM_DEBUG({
339         dbgs() << formatv(
340             "{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}", OutputOffset,
341             InputOffset, OutputDelta, getULEB128Size(OutputDelta), InputDelta,
342             (J < EqualElems) ? 0 : getSLEB128Size(InputDelta), OutputAddress);
343         if (BBHash)
344           dbgs() << formatv(" {0:x}", BBHash);
345         dbgs() << '\n';
346       });
347     }
348     Maps.insert(std::pair<uint64_t, MapTy>(Address, Map));
349   }
350 }
351 
352 void BoltAddressTranslation::dump(raw_ostream &OS) {
353   const size_t NumTables = Maps.size();
354   OS << "BAT tables for " << NumTables << " functions:\n";
355   for (const auto &MapEntry : Maps) {
356     const uint64_t Address = MapEntry.first;
357     const uint64_t HotAddress = fetchParentAddress(Address);
358     OS << "Function Address: 0x" << Twine::utohexstr(Address);
359     if (HotAddress == 0)
360       OS << formatv(", hash: {0:x}", getBFHash(Address));
361     OS << "\n";
362     OS << "BB mappings:\n";
363     for (const auto &Entry : MapEntry.second) {
364       const bool IsBranch = Entry.second & BRANCHENTRY;
365       const uint32_t Val = Entry.second >> 1; // dropping BRANCHENTRY bit
366       OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
367          << "0x" << Twine::utohexstr(Val);
368       if (IsBranch)
369         OS << " (branch)";
370       else
371         OS << formatv(" hash: {0:x}",
372                       getBBHash(HotAddress ? HotAddress : Address, Val));
373       OS << "\n";
374     }
375     OS << "\n";
376   }
377   const size_t NumColdParts = ColdPartSource.size();
378   if (!NumColdParts)
379     return;
380 
381   OS << NumColdParts << " cold mappings:\n";
382   for (const auto &Entry : ColdPartSource) {
383     OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
384        << Twine::utohexstr(Entry.second) << "\n";
385   }
386   OS << "\n";
387 }
388 
389 uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress,
390                                            uint64_t Offset,
391                                            bool IsBranchSrc) const {
392   auto Iter = Maps.find(FuncAddress);
393   if (Iter == Maps.end())
394     return Offset;
395 
396   const MapTy &Map = Iter->second;
397   auto KeyVal = Map.upper_bound(Offset);
398   if (KeyVal == Map.begin())
399     return Offset;
400 
401   --KeyVal;
402 
403   const uint32_t Val = KeyVal->second >> 1; // dropping BRANCHENTRY bit
404   // Branch source addresses are translated to the first instruction of the
405   // source BB to avoid accounting for modifications BOLT may have made in the
406   // BB regarding deletion/addition of instructions.
407   if (IsBranchSrc)
408     return Val;
409   return Offset - KeyVal->first + Val;
410 }
411 
412 std::optional<BoltAddressTranslation::FallthroughListTy>
413 BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress,
414                                                uint64_t From,
415                                                uint64_t To) const {
416   SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
417 
418   // Filter out trivial case
419   if (From >= To)
420     return Res;
421 
422   From -= FuncAddress;
423   To -= FuncAddress;
424 
425   auto Iter = Maps.find(FuncAddress);
426   if (Iter == Maps.end())
427     return std::nullopt;
428 
429   const MapTy &Map = Iter->second;
430   auto FromIter = Map.upper_bound(From);
431   if (FromIter == Map.begin())
432     return Res;
433   // Skip instruction entries, to create fallthroughs we are only interested in
434   // BB boundaries
435   do {
436     if (FromIter == Map.begin())
437       return Res;
438     --FromIter;
439   } while (FromIter->second & BRANCHENTRY);
440 
441   auto ToIter = Map.upper_bound(To);
442   if (ToIter == Map.begin())
443     return Res;
444   --ToIter;
445   if (FromIter->first >= ToIter->first)
446     return Res;
447 
448   for (auto Iter = FromIter; Iter != ToIter;) {
449     const uint32_t Src = Iter->first;
450     if (Iter->second & BRANCHENTRY) {
451       ++Iter;
452       continue;
453     }
454 
455     ++Iter;
456     while (Iter->second & BRANCHENTRY && Iter != ToIter)
457       ++Iter;
458     if (Iter->second & BRANCHENTRY)
459       break;
460     Res.emplace_back(Src, Iter->first);
461   }
462 
463   return Res;
464 }
465 
466 uint64_t BoltAddressTranslation::fetchParentAddress(uint64_t Address) const {
467   auto Iter = ColdPartSource.find(Address);
468   if (Iter == ColdPartSource.end())
469     return 0;
470   return Iter->second;
471 }
472 
473 bool BoltAddressTranslation::enabledFor(
474     llvm::object::ELFObjectFileBase *InputFile) const {
475   for (const SectionRef &Section : InputFile->sections()) {
476     Expected<StringRef> SectionNameOrErr = Section.getName();
477     if (Error E = SectionNameOrErr.takeError())
478       continue;
479 
480     if (SectionNameOrErr.get() == SECTION_NAME)
481       return true;
482   }
483   return false;
484 }
485 
486 void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
487   for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
488     // We don't need a translation table if the body of the function hasn't
489     // changed
490     if (BF.isIgnored() || (!BC.HasRelocations && !BF.isSimple()))
491       continue;
492     // Prepare function and block hashes
493     FuncHashes[BF.getAddress()].first = BF.computeHash();
494     BF.computeBlockHashes();
495     for (const BinaryBasicBlock &BB : BF)
496       FuncHashes[BF.getAddress()].second.emplace(BB.getInputOffset(),
497                                                  BB.getHash());
498   }
499 }
500 
501 size_t BoltAddressTranslation::getBBHash(uint64_t FuncOutputAddress,
502                                          uint32_t BBInputOffset) const {
503   return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset);
504 }
505 
506 size_t BoltAddressTranslation::getBFHash(uint64_t OutputAddress) const {
507   return FuncHashes.at(OutputAddress).first;
508 }
509 
510 } // namespace bolt
511 } // namespace llvm
512