xref: /llvm-project/bolt/lib/Profile/BoltAddressTranslation.cpp (revision 1fa870b1bd6c0041d06c31c4d3c830713d0a2a3f)
1 //===- bolt/Profile/BoltAddressTranslation.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Profile/BoltAddressTranslation.h"
10 #include "bolt/Core/BinaryFunction.h"
11 #include "llvm/Support/DataExtractor.h"
12 #include "llvm/Support/Errc.h"
13 
14 #define DEBUG_TYPE "bolt-bat"
15 
16 namespace llvm {
17 namespace bolt {
18 
19 const char *BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";
20 
21 void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
22                                                const BinaryBasicBlock &BB,
23                                                uint64_t FuncAddress) {
24   const uint64_t BBOutputOffset =
25       BB.getOutputAddressRange().first - FuncAddress;
26   const uint32_t BBInputOffset = BB.getInputOffset();
27 
28   // Every output BB must track back to an input BB for profile collection
29   // in bolted binaries. If we are missing an offset, it means this block was
30   // created by a pass. We will skip writing any entries for it, and this means
31   // any traffic happening in this block will map to the previous block in the
32   // layout. This covers the case where an input basic block is split into two,
33   // and the second one lacks any offset.
34   if (BBInputOffset == BinaryBasicBlock::INVALID_OFFSET)
35     return;
36 
37   LLVM_DEBUG(dbgs() << "BB " << BB.getName() << "\n");
38   LLVM_DEBUG(dbgs() << "  Key: " << Twine::utohexstr(BBOutputOffset)
39                     << " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
40   // In case of conflicts (same Key mapping to different Vals), the last
41   // update takes precedence. Of course it is not ideal to have conflicts and
42   // those happen when we have an empty BB that either contained only
43   // NOPs or a jump to the next block (successor). Either way, the successor
44   // and this deleted block will both share the same output address (the same
45   // key), and we need to map back. We choose here to privilege the successor by
46   // allowing it to overwrite the previously inserted key in the map.
47   Map[BBOutputOffset] = BBInputOffset;
48 
49   for (const auto &IOPair : BB.getOffsetTranslationTable()) {
50     const uint64_t OutputOffset = IOPair.first + BBOutputOffset;
51     const uint32_t InputOffset = IOPair.second;
52 
53     // Is this the first instruction in the BB? No need to duplicate the entry.
54     if (OutputOffset == BBOutputOffset)
55       continue;
56 
57     LLVM_DEBUG(dbgs() << "  Key: " << Twine::utohexstr(OutputOffset) << " Val: "
58                       << Twine::utohexstr(InputOffset) << " (branch)\n");
59     Map.insert(
60         std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset | BRANCHENTRY));
61   }
62 }
63 
64 void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
65   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Writing BOLT Address Translation Tables\n");
66   for (auto &BFI : BC.getBinaryFunctions()) {
67     const BinaryFunction &Function = BFI.second;
68     // We don't need a translation table if the body of the function hasn't
69     // changed
70     if (Function.isIgnored() || (!BC.HasRelocations && !Function.isSimple()))
71       continue;
72 
73     LLVM_DEBUG(dbgs() << "Function name: " << Function.getPrintName() << "\n");
74     LLVM_DEBUG(dbgs() << " Address reference: 0x"
75                       << Twine::utohexstr(Function.getOutputAddress()) << "\n");
76 
77     MapTy Map;
78     for (const BinaryBasicBlock *const BB :
79          Function.getLayout().getMainFragment())
80       writeEntriesForBB(Map, *BB, Function.getOutputAddress());
81     Maps.emplace(Function.getOutputAddress(), std::move(Map));
82 
83     if (!Function.isSplit())
84       continue;
85 
86     // Split maps
87     LLVM_DEBUG(dbgs() << " Cold part\n");
88     for (const FunctionFragment &FF :
89          Function.getLayout().getSplitFragments()) {
90       Map.clear();
91       for (const BinaryBasicBlock *const BB : FF)
92         writeEntriesForBB(Map, *BB, FF.getAddress());
93 
94       Maps.emplace(FF.getAddress(), std::move(Map));
95       ColdPartSource.emplace(FF.getAddress(), Function.getOutputAddress());
96     }
97   }
98 
99   const uint32_t NumFuncs = Maps.size();
100   OS.write(reinterpret_cast<const char *>(&NumFuncs), 4);
101   LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << " functions for BAT.\n");
102   for (auto &MapEntry : Maps) {
103     const uint64_t Address = MapEntry.first;
104     MapTy &Map = MapEntry.second;
105     const uint32_t NumEntries = Map.size();
106     LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
107                       << Twine::utohexstr(Address) << ".\n");
108     OS.write(reinterpret_cast<const char *>(&Address), 8);
109     OS.write(reinterpret_cast<const char *>(&NumEntries), 4);
110     for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
111       OS.write(reinterpret_cast<const char *>(&KeyVal.first), 4);
112       OS.write(reinterpret_cast<const char *>(&KeyVal.second), 4);
113     }
114   }
115   const uint32_t NumColdEntries = ColdPartSource.size();
116   LLVM_DEBUG(dbgs() << "Writing " << NumColdEntries
117                     << " cold part mappings.\n");
118   OS.write(reinterpret_cast<const char *>(&NumColdEntries), 4);
119   for (std::pair<const uint64_t, uint64_t> &ColdEntry : ColdPartSource) {
120     OS.write(reinterpret_cast<const char *>(&ColdEntry.first), 8);
121     OS.write(reinterpret_cast<const char *>(&ColdEntry.second), 8);
122     LLVM_DEBUG(dbgs() << " " << Twine::utohexstr(ColdEntry.first) << " -> "
123                       << Twine::utohexstr(ColdEntry.second) << "\n");
124   }
125 
126   outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
127   outs() << "BOLT-INFO: Wrote " << NumColdEntries
128          << " BAT cold-to-hot entries\n";
129 }
130 
131 std::error_code BoltAddressTranslation::parse(StringRef Buf) {
132   DataExtractor DE = DataExtractor(Buf, true, 8);
133   uint64_t Offset = 0;
134   if (Buf.size() < 12)
135     return make_error_code(llvm::errc::io_error);
136 
137   const uint32_t NameSz = DE.getU32(&Offset);
138   const uint32_t DescSz = DE.getU32(&Offset);
139   const uint32_t Type = DE.getU32(&Offset);
140 
141   if (Type != BinarySection::NT_BOLT_BAT ||
142       Buf.size() + Offset < alignTo(NameSz, 4) + DescSz)
143     return make_error_code(llvm::errc::io_error);
144 
145   StringRef Name = Buf.slice(Offset, Offset + NameSz);
146   Offset = alignTo(Offset + NameSz, 4);
147   if (Name.substr(0, 4) != "BOLT")
148     return make_error_code(llvm::errc::io_error);
149 
150   if (Buf.size() - Offset < 4)
151     return make_error_code(llvm::errc::io_error);
152 
153   const uint32_t NumFunctions = DE.getU32(&Offset);
154   LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << " functions\n");
155   for (uint32_t I = 0; I < NumFunctions; ++I) {
156     if (Buf.size() - Offset < 12)
157       return make_error_code(llvm::errc::io_error);
158 
159     const uint64_t Address = DE.getU64(&Offset);
160     const uint32_t NumEntries = DE.getU32(&Offset);
161     MapTy Map;
162 
163     LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
164                       << Twine::utohexstr(Address) << "\n");
165     if (Buf.size() - Offset < 8 * NumEntries)
166       return make_error_code(llvm::errc::io_error);
167     for (uint32_t J = 0; J < NumEntries; ++J) {
168       const uint32_t OutputAddr = DE.getU32(&Offset);
169       const uint32_t InputAddr = DE.getU32(&Offset);
170       Map.insert(std::pair<uint32_t, uint32_t>(OutputAddr, InputAddr));
171       LLVM_DEBUG(dbgs() << Twine::utohexstr(OutputAddr) << " -> "
172                         << Twine::utohexstr(InputAddr) << "\n");
173     }
174     Maps.insert(std::pair<uint64_t, MapTy>(Address, Map));
175   }
176 
177   if (Buf.size() - Offset < 4)
178     return make_error_code(llvm::errc::io_error);
179 
180   const uint32_t NumColdEntries = DE.getU32(&Offset);
181   LLVM_DEBUG(dbgs() << "Parsing " << NumColdEntries << " cold part mappings\n");
182   for (uint32_t I = 0; I < NumColdEntries; ++I) {
183     if (Buf.size() - Offset < 16)
184       return make_error_code(llvm::errc::io_error);
185     const uint32_t ColdAddress = DE.getU64(&Offset);
186     const uint32_t HotAddress = DE.getU64(&Offset);
187     ColdPartSource.insert(
188         std::pair<uint64_t, uint64_t>(ColdAddress, HotAddress));
189     LLVM_DEBUG(dbgs() << Twine::utohexstr(ColdAddress) << " -> "
190                       << Twine::utohexstr(HotAddress) << "\n");
191   }
192   outs() << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n";
193   outs() << "BOLT-INFO: Parsed " << NumColdEntries
194          << " BAT cold-to-hot entries\n";
195 
196   return std::error_code();
197 }
198 
199 void BoltAddressTranslation::dump(raw_ostream &OS) {
200   const size_t NumTables = Maps.size();
201   OS << "BAT tables for " << NumTables << " functions:\n";
202   for (const auto &MapEntry : Maps) {
203     OS << "Function Address: 0x" << Twine::utohexstr(MapEntry.first) << "\n";
204     OS << "BB mappings:\n";
205     for (const auto &Entry : MapEntry.second) {
206       const bool IsBranch = Entry.second & BRANCHENTRY;
207       const uint32_t Val = Entry.second & ~BRANCHENTRY;
208       OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
209          << "0x" << Twine::utohexstr(Val);
210       if (IsBranch)
211         OS << " (branch)";
212       OS << "\n";
213     }
214     OS << "\n";
215   }
216   const size_t NumColdParts = ColdPartSource.size();
217   if (!NumColdParts)
218     return;
219 
220   OS << NumColdParts << " cold mappings:\n";
221   for (const auto &Entry : ColdPartSource) {
222     OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
223        << Twine::utohexstr(Entry.second) << "\n";
224   }
225   OS << "\n";
226 }
227 
228 uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress,
229                                            uint64_t Offset,
230                                            bool IsBranchSrc) const {
231   auto Iter = Maps.find(FuncAddress);
232   if (Iter == Maps.end())
233     return Offset;
234 
235   const MapTy &Map = Iter->second;
236   auto KeyVal = Map.upper_bound(Offset);
237   if (KeyVal == Map.begin())
238     return Offset;
239 
240   --KeyVal;
241 
242   const uint32_t Val = KeyVal->second & ~BRANCHENTRY;
243   // Branch source addresses are translated to the first instruction of the
244   // source BB to avoid accounting for modifications BOLT may have made in the
245   // BB regarding deletion/addition of instructions.
246   if (IsBranchSrc)
247     return Val;
248   return Offset - KeyVal->first + Val;
249 }
250 
251 Optional<BoltAddressTranslation::FallthroughListTy>
252 BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress,
253                                                uint64_t From,
254                                                uint64_t To) const {
255   SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
256 
257   // Filter out trivial case
258   if (From >= To)
259     return Res;
260 
261   From -= FuncAddress;
262   To -= FuncAddress;
263 
264   auto Iter = Maps.find(FuncAddress);
265   if (Iter == Maps.end())
266     return None;
267 
268   const MapTy &Map = Iter->second;
269   auto FromIter = Map.upper_bound(From);
270   if (FromIter == Map.begin())
271     return Res;
272   // Skip instruction entries, to create fallthroughs we are only interested in
273   // BB boundaries
274   do {
275     if (FromIter == Map.begin())
276       return Res;
277     --FromIter;
278   } while (FromIter->second & BRANCHENTRY);
279 
280   auto ToIter = Map.upper_bound(To);
281   if (ToIter == Map.begin())
282     return Res;
283   --ToIter;
284   if (FromIter->first >= ToIter->first)
285     return Res;
286 
287   for (auto Iter = FromIter; Iter != ToIter;) {
288     const uint32_t Src = Iter->first;
289     if (Iter->second & BRANCHENTRY) {
290       ++Iter;
291       continue;
292     }
293 
294     ++Iter;
295     while (Iter->second & BRANCHENTRY && Iter != ToIter)
296       ++Iter;
297     if (Iter->second & BRANCHENTRY)
298       break;
299     Res.emplace_back(Src, Iter->first);
300   }
301 
302   return Res;
303 }
304 
305 uint64_t BoltAddressTranslation::fetchParentAddress(uint64_t Address) const {
306   auto Iter = ColdPartSource.find(Address);
307   if (Iter == ColdPartSource.end())
308     return 0;
309   return Iter->second;
310 }
311 
312 bool BoltAddressTranslation::enabledFor(
313     llvm::object::ELFObjectFileBase *InputFile) const {
314   for (const SectionRef &Section : InputFile->sections()) {
315     Expected<StringRef> SectionNameOrErr = Section.getName();
316     if (Error E = SectionNameOrErr.takeError())
317       continue;
318 
319     if (SectionNameOrErr.get() == SECTION_NAME)
320       return true;
321   }
322   return false;
323 }
324 } // namespace bolt
325 } // namespace llvm
326