xref: /llvm-project/bolt/lib/Profile/BoltAddressTranslation.cpp (revision 8fb8ad66c95a51b82e5c2876ed925b5512ce6b83)
1 //===- bolt/Profile/BoltAddressTranslation.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Profile/BoltAddressTranslation.h"
10 #include "bolt/Core/BinaryFunction.h"
11 #include "llvm/Support/DataExtractor.h"
12 #include "llvm/Support/Errc.h"
13 #include "llvm/Support/Error.h"
14 #include "llvm/Support/LEB128.h"
15 
16 #define DEBUG_TYPE "bolt-bat"
17 
18 namespace llvm {
19 namespace bolt {
20 
21 const char *BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";
22 
23 void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
24                                                const BinaryBasicBlock &BB,
25                                                uint64_t FuncAddress) {
26   const uint64_t BBOutputOffset =
27       BB.getOutputAddressRange().first - FuncAddress;
28   const uint32_t BBInputOffset = BB.getInputOffset();
29 
30   // Every output BB must track back to an input BB for profile collection
31   // in bolted binaries. If we are missing an offset, it means this block was
32   // created by a pass. We will skip writing any entries for it, and this means
33   // any traffic happening in this block will map to the previous block in the
34   // layout. This covers the case where an input basic block is split into two,
35   // and the second one lacks any offset.
36   if (BBInputOffset == BinaryBasicBlock::INVALID_OFFSET)
37     return;
38 
39   LLVM_DEBUG(dbgs() << "BB " << BB.getName() << "\n");
40   LLVM_DEBUG(dbgs() << "  Key: " << Twine::utohexstr(BBOutputOffset)
41                     << " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
42   // In case of conflicts (same Key mapping to different Vals), the last
43   // update takes precedence. Of course it is not ideal to have conflicts and
44   // those happen when we have an empty BB that either contained only
45   // NOPs or a jump to the next block (successor). Either way, the successor
46   // and this deleted block will both share the same output address (the same
47   // key), and we need to map back. We choose here to privilege the successor by
48   // allowing it to overwrite the previously inserted key in the map.
49   Map[BBOutputOffset] = BBInputOffset << 1;
50 
51   const auto &IOAddressMap =
52       BB.getFunction()->getBinaryContext().getIOAddressMap();
53 
54   for (const auto &[InputOffset, Sym] : BB.getLocSyms()) {
55     const auto InputAddress = BB.getFunction()->getAddress() + InputOffset;
56     const auto OutputAddress = IOAddressMap.lookup(InputAddress);
57     assert(OutputAddress && "Unknown instruction address");
58     const auto OutputOffset = *OutputAddress - FuncAddress;
59 
60     // Is this the first instruction in the BB? No need to duplicate the entry.
61     if (OutputOffset == BBOutputOffset)
62       continue;
63 
64     LLVM_DEBUG(dbgs() << "  Key: " << Twine::utohexstr(OutputOffset) << " Val: "
65                       << Twine::utohexstr(InputOffset) << " (branch)\n");
66     Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset,
67                                              (InputOffset << 1) | BRANCHENTRY));
68   }
69 }
70 
71 void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
72   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Writing BOLT Address Translation Tables\n");
73   for (auto &BFI : BC.getBinaryFunctions()) {
74     const BinaryFunction &Function = BFI.second;
75     // We don't need a translation table if the body of the function hasn't
76     // changed
77     if (Function.isIgnored() || (!BC.HasRelocations && !Function.isSimple()))
78       continue;
79 
80     LLVM_DEBUG(dbgs() << "Function name: " << Function.getPrintName() << "\n");
81     LLVM_DEBUG(dbgs() << " Address reference: 0x"
82                       << Twine::utohexstr(Function.getOutputAddress()) << "\n");
83 
84     MapTy Map;
85     for (const BinaryBasicBlock *const BB :
86          Function.getLayout().getMainFragment())
87       writeEntriesForBB(Map, *BB, Function.getOutputAddress());
88     Maps.emplace(Function.getOutputAddress(), std::move(Map));
89 
90     if (!Function.isSplit())
91       continue;
92 
93     // Split maps
94     LLVM_DEBUG(dbgs() << " Cold part\n");
95     for (const FunctionFragment &FF :
96          Function.getLayout().getSplitFragments()) {
97       Map.clear();
98       for (const BinaryBasicBlock *const BB : FF)
99         writeEntriesForBB(Map, *BB, FF.getAddress());
100 
101       Maps.emplace(FF.getAddress(), std::move(Map));
102       ColdPartSource.emplace(FF.getAddress(), Function.getOutputAddress());
103     }
104   }
105 
106   const uint32_t NumFuncs = Maps.size();
107   encodeULEB128(NumFuncs, OS);
108   LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << " functions for BAT.\n");
109   uint64_t PrevAddress = 0;
110   for (auto &MapEntry : Maps) {
111     const uint64_t Address = MapEntry.first;
112     MapTy &Map = MapEntry.second;
113     const uint32_t NumEntries = Map.size();
114     LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
115                       << Twine::utohexstr(Address) << ".\n");
116     encodeULEB128(Address - PrevAddress, OS);
117     PrevAddress = Address;
118     encodeULEB128(NumEntries, OS);
119     uint64_t InOffset = 0, OutOffset = 0;
120     // Output and Input addresses and delta-encoded
121     for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
122       encodeULEB128(KeyVal.first - OutOffset, OS);
123       encodeSLEB128(KeyVal.second - InOffset, OS);
124       std::tie(OutOffset, InOffset) = KeyVal;
125     }
126   }
127   const uint32_t NumColdEntries = ColdPartSource.size();
128   LLVM_DEBUG(dbgs() << "Writing " << NumColdEntries
129                     << " cold part mappings.\n");
130   encodeULEB128(NumColdEntries, OS);
131   for (std::pair<const uint64_t, uint64_t> &ColdEntry : ColdPartSource) {
132     encodeULEB128(ColdEntry.first, OS);
133     encodeULEB128(ColdEntry.second, OS);
134     LLVM_DEBUG(dbgs() << " " << Twine::utohexstr(ColdEntry.first) << " -> "
135                       << Twine::utohexstr(ColdEntry.second) << "\n");
136   }
137 
138   outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
139   outs() << "BOLT-INFO: Wrote " << NumColdEntries
140          << " BAT cold-to-hot entries\n";
141 }
142 
143 std::error_code BoltAddressTranslation::parse(StringRef Buf) {
144   DataExtractor DE = DataExtractor(Buf, true, 8);
145   uint64_t Offset = 0;
146   if (Buf.size() < 12)
147     return make_error_code(llvm::errc::io_error);
148 
149   const uint32_t NameSz = DE.getU32(&Offset);
150   const uint32_t DescSz = DE.getU32(&Offset);
151   const uint32_t Type = DE.getU32(&Offset);
152 
153   if (Type != BinarySection::NT_BOLT_BAT ||
154       Buf.size() + Offset < alignTo(NameSz, 4) + DescSz)
155     return make_error_code(llvm::errc::io_error);
156 
157   StringRef Name = Buf.slice(Offset, Offset + NameSz);
158   Offset = alignTo(Offset + NameSz, 4);
159   if (Name.substr(0, 4) != "BOLT")
160     return make_error_code(llvm::errc::io_error);
161 
162   Error Err(Error::success());
163   const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err);
164   LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << " functions\n");
165   uint64_t PrevAddress = 0;
166   for (uint32_t I = 0; I < NumFunctions; ++I) {
167     const uint64_t Address = PrevAddress + DE.getULEB128(&Offset, &Err);
168     PrevAddress = Address;
169     const uint32_t NumEntries = DE.getULEB128(&Offset, &Err);
170     MapTy Map;
171 
172     LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
173                       << Twine::utohexstr(Address) << "\n");
174     uint64_t InputOffset = 0, OutputOffset = 0;
175     for (uint32_t J = 0; J < NumEntries; ++J) {
176       const uint64_t OutputDelta = DE.getULEB128(&Offset, &Err);
177       const int64_t InputDelta = DE.getSLEB128(&Offset, &Err);
178       OutputOffset += OutputDelta;
179       InputOffset += InputDelta;
180       Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset));
181       LLVM_DEBUG(dbgs() << Twine::utohexstr(OutputOffset) << " -> "
182                         << Twine::utohexstr(InputOffset) << " (" << OutputDelta
183                         << ", " << InputDelta << ")\n");
184     }
185     Maps.insert(std::pair<uint64_t, MapTy>(Address, Map));
186   }
187 
188   const uint32_t NumColdEntries = DE.getULEB128(&Offset, &Err);
189   LLVM_DEBUG(dbgs() << "Parsing " << NumColdEntries << " cold part mappings\n");
190   for (uint32_t I = 0; I < NumColdEntries; ++I) {
191     const uint32_t ColdAddress = DE.getULEB128(&Offset, &Err);
192     const uint32_t HotAddress = DE.getULEB128(&Offset, &Err);
193     ColdPartSource.insert(
194         std::pair<uint64_t, uint64_t>(ColdAddress, HotAddress));
195     LLVM_DEBUG(dbgs() << Twine::utohexstr(ColdAddress) << " -> "
196                       << Twine::utohexstr(HotAddress) << "\n");
197   }
198   outs() << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n";
199   outs() << "BOLT-INFO: Parsed " << NumColdEntries
200          << " BAT cold-to-hot entries\n";
201 
202   return errorToErrorCode(std::move(Err));
203 }
204 
205 void BoltAddressTranslation::dump(raw_ostream &OS) {
206   const size_t NumTables = Maps.size();
207   OS << "BAT tables for " << NumTables << " functions:\n";
208   for (const auto &MapEntry : Maps) {
209     OS << "Function Address: 0x" << Twine::utohexstr(MapEntry.first) << "\n";
210     OS << "BB mappings:\n";
211     for (const auto &Entry : MapEntry.second) {
212       const bool IsBranch = Entry.second & BRANCHENTRY;
213       const uint32_t Val = Entry.second >> 1; // dropping BRANCHENTRY bit
214       OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
215          << "0x" << Twine::utohexstr(Val);
216       if (IsBranch)
217         OS << " (branch)";
218       OS << "\n";
219     }
220     OS << "\n";
221   }
222   const size_t NumColdParts = ColdPartSource.size();
223   if (!NumColdParts)
224     return;
225 
226   OS << NumColdParts << " cold mappings:\n";
227   for (const auto &Entry : ColdPartSource) {
228     OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
229        << Twine::utohexstr(Entry.second) << "\n";
230   }
231   OS << "\n";
232 }
233 
234 uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress,
235                                            uint64_t Offset,
236                                            bool IsBranchSrc) const {
237   auto Iter = Maps.find(FuncAddress);
238   if (Iter == Maps.end())
239     return Offset;
240 
241   const MapTy &Map = Iter->second;
242   auto KeyVal = Map.upper_bound(Offset);
243   if (KeyVal == Map.begin())
244     return Offset;
245 
246   --KeyVal;
247 
248   const uint32_t Val = KeyVal->second >> 1; // dropping BRANCHENTRY bit
249   // Branch source addresses are translated to the first instruction of the
250   // source BB to avoid accounting for modifications BOLT may have made in the
251   // BB regarding deletion/addition of instructions.
252   if (IsBranchSrc)
253     return Val;
254   return Offset - KeyVal->first + Val;
255 }
256 
257 std::optional<BoltAddressTranslation::FallthroughListTy>
258 BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress,
259                                                uint64_t From,
260                                                uint64_t To) const {
261   SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
262 
263   // Filter out trivial case
264   if (From >= To)
265     return Res;
266 
267   From -= FuncAddress;
268   To -= FuncAddress;
269 
270   auto Iter = Maps.find(FuncAddress);
271   if (Iter == Maps.end())
272     return std::nullopt;
273 
274   const MapTy &Map = Iter->second;
275   auto FromIter = Map.upper_bound(From);
276   if (FromIter == Map.begin())
277     return Res;
278   // Skip instruction entries, to create fallthroughs we are only interested in
279   // BB boundaries
280   do {
281     if (FromIter == Map.begin())
282       return Res;
283     --FromIter;
284   } while (FromIter->second & BRANCHENTRY);
285 
286   auto ToIter = Map.upper_bound(To);
287   if (ToIter == Map.begin())
288     return Res;
289   --ToIter;
290   if (FromIter->first >= ToIter->first)
291     return Res;
292 
293   for (auto Iter = FromIter; Iter != ToIter;) {
294     const uint32_t Src = Iter->first;
295     if (Iter->second & BRANCHENTRY) {
296       ++Iter;
297       continue;
298     }
299 
300     ++Iter;
301     while (Iter->second & BRANCHENTRY && Iter != ToIter)
302       ++Iter;
303     if (Iter->second & BRANCHENTRY)
304       break;
305     Res.emplace_back(Src, Iter->first);
306   }
307 
308   return Res;
309 }
310 
311 uint64_t BoltAddressTranslation::fetchParentAddress(uint64_t Address) const {
312   auto Iter = ColdPartSource.find(Address);
313   if (Iter == ColdPartSource.end())
314     return 0;
315   return Iter->second;
316 }
317 
318 bool BoltAddressTranslation::enabledFor(
319     llvm::object::ELFObjectFileBase *InputFile) const {
320   for (const SectionRef &Section : InputFile->sections()) {
321     Expected<StringRef> SectionNameOrErr = Section.getName();
322     if (Error E = SectionNameOrErr.takeError())
323       continue;
324 
325     if (SectionNameOrErr.get() == SECTION_NAME)
326       return true;
327   }
328   return false;
329 }
330 } // namespace bolt
331 } // namespace llvm
332