xref: /llvm-project/bolt/lib/Passes/ReorderData.cpp (revision 52cf07116bf0a8cab87b0f55176d198bcaa02575)
1 //===- bolt/Passes/ReorderSection.cpp - Reordering of section data --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements ReorderData class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 // TODO:
14 // - make sure writable data isn't put on same cache line unless temporally
15 // local
16 // - estimate temporal locality by looking at CFG?
17 
18 #include "bolt/Passes/ReorderData.h"
19 #include "llvm/ADT/MapVector.h"
20 #include <algorithm>
21 
22 #undef  DEBUG_TYPE
23 #define DEBUG_TYPE "reorder-data"
24 
25 using namespace llvm;
26 using namespace bolt;
27 
28 namespace opts {
29 extern cl::OptionCategory BoltCategory;
30 extern cl::OptionCategory BoltOptCategory;
31 extern cl::opt<JumpTableSupportLevel> JumpTables;
32 
33 static cl::opt<bool>
34     PrintReorderedData("print-reordered-data",
35                        cl::desc("print section contents after reordering"),
36                        cl::Hidden, cl::cat(BoltCategory));
37 
38 cl::list<std::string>
39 ReorderData("reorder-data",
40   cl::CommaSeparated,
41   cl::desc("list of sections to reorder"),
42   cl::value_desc("section1,section2,section3,..."),
43   cl::cat(BoltOptCategory));
44 
45 enum ReorderAlgo : char {
46   REORDER_COUNT         = 0,
47   REORDER_FUNCS         = 1
48 };
49 
50 static cl::opt<ReorderAlgo>
51 ReorderAlgorithm("reorder-data-algo",
52   cl::desc("algorithm used to reorder data sections"),
53   cl::init(REORDER_COUNT),
54   cl::values(
55     clEnumValN(REORDER_COUNT,
56       "count",
57       "sort hot data by read counts"),
58     clEnumValN(REORDER_FUNCS,
59       "funcs",
60       "sort hot data by hot function usage and count")),
61   cl::ZeroOrMore,
62   cl::cat(BoltOptCategory));
63 
64 static cl::opt<unsigned>
65     ReorderDataMaxSymbols("reorder-data-max-symbols",
66                           cl::desc("maximum number of symbols to reorder"),
67                           cl::init(std::numeric_limits<unsigned>::max()),
68                           cl::cat(BoltOptCategory));
69 
70 static cl::opt<unsigned> ReorderDataMaxBytes(
71     "reorder-data-max-bytes", cl::desc("maximum number of bytes to reorder"),
72     cl::init(std::numeric_limits<unsigned>::max()), cl::cat(BoltOptCategory));
73 
74 static cl::list<std::string>
75 ReorderSymbols("reorder-symbols",
76   cl::CommaSeparated,
77   cl::desc("list of symbol names that can be reordered"),
78   cl::value_desc("symbol1,symbol2,symbol3,..."),
79   cl::Hidden,
80   cl::cat(BoltCategory));
81 
82 static cl::list<std::string>
83 SkipSymbols("reorder-skip-symbols",
84   cl::CommaSeparated,
85   cl::desc("list of symbol names that cannot be reordered"),
86   cl::value_desc("symbol1,symbol2,symbol3,..."),
87   cl::Hidden,
88   cl::cat(BoltCategory));
89 
90 static cl::opt<bool> ReorderInplace("reorder-data-inplace",
91                                     cl::desc("reorder data sections in place"),
92 
93                                     cl::cat(BoltOptCategory));
94 }
95 
96 namespace llvm {
97 namespace bolt {
98 
99 namespace {
100 
101 static constexpr uint16_t MinAlignment = 16;
102 
isSupported(const BinarySection & BS)103 bool isSupported(const BinarySection &BS) { return BS.isData() && !BS.isTLS(); }
104 
filterSymbol(const BinaryData * BD)105 bool filterSymbol(const BinaryData *BD) {
106   if (!BD->isAtomic() || BD->isJumpTable() || !BD->isMoveable())
107     return false;
108 
109   bool IsValid = true;
110 
111   if (!opts::ReorderSymbols.empty()) {
112     IsValid = llvm::any_of(opts::ReorderSymbols, [&](const std::string &Name) {
113       return BD->hasName(Name);
114     });
115   }
116 
117   if (!IsValid)
118     return false;
119 
120   if (!opts::SkipSymbols.empty()) {
121     for (const std::string &Name : opts::SkipSymbols) {
122       if (BD->hasName(Name)) {
123         IsValid = false;
124         break;
125       }
126     }
127   }
128 
129   return IsValid;
130 }
131 
132 } // namespace
133 
134 using DataOrder = ReorderData::DataOrder;
135 
printOrder(BinaryContext & BC,const BinarySection & Section,DataOrder::const_iterator Begin,DataOrder::const_iterator End) const136 void ReorderData::printOrder(BinaryContext &BC, const BinarySection &Section,
137                              DataOrder::const_iterator Begin,
138                              DataOrder::const_iterator End) const {
139   uint64_t TotalSize = 0;
140   bool PrintHeader = false;
141   while (Begin != End) {
142     const BinaryData *BD = Begin->first;
143 
144     if (!PrintHeader) {
145       BC.outs() << "BOLT-INFO: Hot global symbols for " << Section.getName()
146                 << ":\n";
147       PrintHeader = true;
148     }
149 
150     BC.outs() << "BOLT-INFO: " << *BD << ", moveable=" << BD->isMoveable()
151               << format(", weight=%.5f\n",
152                         double(Begin->second) / BD->getSize());
153 
154     TotalSize += BD->getSize();
155     ++Begin;
156   }
157   if (TotalSize)
158     BC.outs() << "BOLT-INFO: Total hot symbol size = " << TotalSize << "\n";
159 }
160 
baseOrder(BinaryContext & BC,const BinarySection & Section) const161 DataOrder ReorderData::baseOrder(BinaryContext &BC,
162                                  const BinarySection &Section) const {
163   DataOrder Order;
164   for (auto &Entry : BC.getBinaryDataForSection(Section)) {
165     BinaryData *BD = Entry.second;
166     if (!BD->isAtomic()) // skip sub-symbols
167       continue;
168     auto BDCI = BinaryDataCounts.find(BD);
169     uint64_t BDCount = BDCI == BinaryDataCounts.end() ? 0 : BDCI->second;
170     Order.emplace_back(BD, BDCount);
171   }
172   return Order;
173 }
174 
assignMemData(BinaryContext & BC)175 void ReorderData::assignMemData(BinaryContext &BC) {
176   // Map of sections (or heap/stack) to count/size.
177   MapVector<StringRef, uint64_t> Counts;
178   MapVector<StringRef, uint64_t> JumpTableCounts;
179   uint64_t TotalCount = 0;
180   for (auto &BFI : BC.getBinaryFunctions()) {
181     const BinaryFunction &BF = BFI.second;
182     if (!BF.hasMemoryProfile())
183       continue;
184 
185     for (const BinaryBasicBlock &BB : BF) {
186       for (const MCInst &Inst : BB) {
187         auto ErrorOrMemAccessProfile =
188             BC.MIB->tryGetAnnotationAs<MemoryAccessProfile>(
189                 Inst, "MemoryAccessProfile");
190         if (!ErrorOrMemAccessProfile)
191           continue;
192 
193         const MemoryAccessProfile &MemAccessProfile =
194             ErrorOrMemAccessProfile.get();
195         for (const AddressAccess &AccessInfo :
196              MemAccessProfile.AddressAccessInfo) {
197           if (BinaryData *BD = AccessInfo.MemoryObject) {
198             BinaryDataCounts[BD->getAtomicRoot()] += AccessInfo.Count;
199             Counts[BD->getSectionName()] += AccessInfo.Count;
200             if (BD->getAtomicRoot()->isJumpTable())
201               JumpTableCounts[BD->getSectionName()] += AccessInfo.Count;
202           } else {
203             Counts["Heap/stack"] += AccessInfo.Count;
204           }
205           TotalCount += AccessInfo.Count;
206         }
207       }
208     }
209   }
210 
211   if (!Counts.empty()) {
212     BC.outs() << "BOLT-INFO: Memory stats breakdown:\n";
213     for (const auto &KV : Counts) {
214       StringRef Section = KV.first;
215       const uint64_t Count = KV.second;
216       BC.outs() << "BOLT-INFO:   " << Section << " = " << Count
217                 << format(" (%.1f%%)\n", 100.0 * Count / TotalCount);
218       if (JumpTableCounts.count(Section) != 0) {
219         const uint64_t JTCount = JumpTableCounts[Section];
220         BC.outs() << "BOLT-INFO:     jump tables = " << JTCount
221                   << format(" (%.1f%%)\n", 100.0 * JTCount / Count);
222       }
223     }
224     BC.outs() << "BOLT-INFO: Total memory events: " << TotalCount << "\n";
225   }
226 }
227 
228 /// Only consider moving data that is used by the hottest functions with
229 /// valid profiles.
230 std::pair<DataOrder, unsigned>
sortedByFunc(BinaryContext & BC,const BinarySection & Section,std::map<uint64_t,BinaryFunction> & BFs) const231 ReorderData::sortedByFunc(BinaryContext &BC, const BinarySection &Section,
232                           std::map<uint64_t, BinaryFunction> &BFs) const {
233   std::map<BinaryData *, std::set<BinaryFunction *>> BDtoFunc;
234   std::map<BinaryData *, uint64_t> BDtoFuncCount;
235 
236   auto dataUses = [&BC](const BinaryFunction &BF, bool OnlyHot) {
237     std::set<BinaryData *> Uses;
238     for (const BinaryBasicBlock &BB : BF) {
239       if (OnlyHot && BB.isCold())
240         continue;
241 
242       for (const MCInst &Inst : BB) {
243         auto ErrorOrMemAccessProfile =
244             BC.MIB->tryGetAnnotationAs<MemoryAccessProfile>(
245                 Inst, "MemoryAccessProfile");
246         if (!ErrorOrMemAccessProfile)
247           continue;
248 
249         const MemoryAccessProfile &MemAccessProfile =
250             ErrorOrMemAccessProfile.get();
251         for (const AddressAccess &AccessInfo :
252              MemAccessProfile.AddressAccessInfo) {
253           if (AccessInfo.MemoryObject)
254             Uses.insert(AccessInfo.MemoryObject);
255         }
256       }
257     }
258     return Uses;
259   };
260 
261   for (auto &Entry : BFs) {
262     BinaryFunction &BF = Entry.second;
263     if (BF.hasValidProfile()) {
264       for (BinaryData *BD : dataUses(BF, true)) {
265         if (!BC.getFunctionForSymbol(BD->getSymbol())) {
266           BDtoFunc[BD->getAtomicRoot()].insert(&BF);
267           BDtoFuncCount[BD->getAtomicRoot()] += BF.getKnownExecutionCount();
268         }
269       }
270     }
271   }
272 
273   DataOrder Order = baseOrder(BC, Section);
274   unsigned SplitPoint = Order.size();
275 
276   llvm::sort(
277       Order,
278       [&](const DataOrder::value_type &A, const DataOrder::value_type &B) {
279         // Total execution counts of functions referencing BD.
280         const uint64_t ACount = BDtoFuncCount[A.first];
281         const uint64_t BCount = BDtoFuncCount[B.first];
282         // Weight by number of loads/data size.
283         const double AWeight = double(A.second) / A.first->getSize();
284         const double BWeight = double(B.second) / B.first->getSize();
285         return (ACount > BCount ||
286                 (ACount == BCount &&
287                  (AWeight > BWeight ||
288                   (AWeight == BWeight &&
289                    A.first->getAddress() < B.first->getAddress()))));
290       });
291 
292   for (unsigned Idx = 0; Idx < Order.size(); ++Idx) {
293     if (!BDtoFuncCount[Order[Idx].first]) {
294       SplitPoint = Idx;
295       break;
296     }
297   }
298 
299   return std::make_pair(Order, SplitPoint);
300 }
301 
302 std::pair<DataOrder, unsigned>
sortedByCount(BinaryContext & BC,const BinarySection & Section) const303 ReorderData::sortedByCount(BinaryContext &BC,
304                            const BinarySection &Section) const {
305   DataOrder Order = baseOrder(BC, Section);
306   unsigned SplitPoint = Order.size();
307 
308   llvm::sort(Order, [](const DataOrder::value_type &A,
309                        const DataOrder::value_type &B) {
310     // Weight by number of loads/data size.
311     const double AWeight = double(A.second) / A.first->getSize();
312     const double BWeight = double(B.second) / B.first->getSize();
313     return (AWeight > BWeight ||
314             (AWeight == BWeight &&
315              (A.first->getSize() < B.first->getSize() ||
316               (A.first->getSize() == B.first->getSize() &&
317                A.first->getAddress() < B.first->getAddress()))));
318   });
319 
320   for (unsigned Idx = 0; Idx < Order.size(); ++Idx) {
321     if (!Order[Idx].second) {
322       SplitPoint = Idx;
323       break;
324     }
325   }
326 
327   return std::make_pair(Order, SplitPoint);
328 }
329 
330 // TODO
331 // add option for cache-line alignment (or just use cache-line when section
332 // is writable)?
setSectionOrder(BinaryContext & BC,BinarySection & OutputSection,DataOrder::iterator Begin,DataOrder::iterator End)333 void ReorderData::setSectionOrder(BinaryContext &BC,
334                                   BinarySection &OutputSection,
335                                   DataOrder::iterator Begin,
336                                   DataOrder::iterator End) {
337   std::vector<BinaryData *> NewOrder;
338   unsigned NumReordered = 0;
339   uint64_t Offset = 0;
340   uint64_t Count = 0;
341 
342   // Get the total count just for stats
343   uint64_t TotalCount = 0;
344   for (auto Itr = Begin; Itr != End; ++Itr)
345     TotalCount += Itr->second;
346 
347   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: setSectionOrder for "
348                     << OutputSection.getName() << "\n");
349 
350   for (; Begin != End; ++Begin) {
351     BinaryData *BD = Begin->first;
352 
353     // We can't move certain symbols.
354     if (!filterSymbol(BD))
355       continue;
356 
357     ++NumReordered;
358     if (NumReordered > opts::ReorderDataMaxSymbols) {
359       if (!NewOrder.empty())
360         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on symbol "
361                           << *NewOrder.back() << "\n");
362       break;
363     }
364 
365     uint16_t Alignment = std::max(BD->getAlignment(), MinAlignment);
366     Offset = alignTo(Offset, Alignment);
367 
368     if ((Offset + BD->getSize()) > opts::ReorderDataMaxBytes) {
369       if (!NewOrder.empty())
370         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on symbol "
371                           << *NewOrder.back() << "\n");
372       break;
373     }
374 
375     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ 0x"
376                       << Twine::utohexstr(Offset) << "\n");
377 
378     BD->setOutputLocation(OutputSection, Offset);
379 
380     // reorder sub-symbols
381     for (std::pair<const uint64_t, BinaryData *> &SubBD :
382          BC.getSubBinaryData(BD)) {
383       if (!SubBD.second->isJumpTable()) {
384         uint64_t SubOffset =
385             Offset + SubBD.second->getAddress() - BD->getAddress();
386         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: SubBD " << SubBD.second->getName()
387                           << " @ " << SubOffset << "\n");
388         SubBD.second->setOutputLocation(OutputSection, SubOffset);
389       }
390     }
391 
392     Offset += BD->getSize();
393     Count += Begin->second;
394     NewOrder.push_back(BD);
395   }
396 
397   OutputSection.reorderContents(NewOrder, opts::ReorderInplace);
398 
399   BC.outs() << "BOLT-INFO: reorder-data: " << Count << "/" << TotalCount
400             << format(" (%.1f%%)", 100.0 * Count / TotalCount) << " events, "
401             << Offset << " hot bytes\n";
402 }
403 
markUnmoveableSymbols(BinaryContext & BC,BinarySection & Section) const404 bool ReorderData::markUnmoveableSymbols(BinaryContext &BC,
405                                         BinarySection &Section) const {
406   // Private symbols currently can't be moved because data can "leak" across
407   // the boundary of one symbol to the next, e.g. a string that has a common
408   // suffix might start in one private symbol and end with the common
409   // suffix in another.
410   auto isPrivate = [&](const BinaryData *BD) {
411     auto Prefix = std::string("PG") + BC.AsmInfo->getPrivateGlobalPrefix();
412     return BD->getName().starts_with(Prefix.str());
413   };
414   auto Range = BC.getBinaryDataForSection(Section);
415   bool FoundUnmoveable = false;
416   for (auto Itr = Range.begin(); Itr != Range.end(); ++Itr) {
417     BinaryData *Next =
418         std::next(Itr) != Range.end() ? std::next(Itr)->second : nullptr;
419     if (Itr->second->getName().starts_with("PG.")) {
420       BinaryData *Prev =
421           Itr != Range.begin() ? std::prev(Itr)->second : nullptr;
422       bool PrevIsPrivate = Prev && isPrivate(Prev);
423       bool NextIsPrivate = Next && isPrivate(Next);
424       if (isPrivate(Itr->second) && (PrevIsPrivate || NextIsPrivate))
425         Itr->second->setIsMoveable(false);
426     } else {
427       // check for overlapping symbols.
428       if (Next && Itr->second->getEndAddress() != Next->getAddress() &&
429           Next->containsAddress(Itr->second->getEndAddress())) {
430         Itr->second->setIsMoveable(false);
431         Next->setIsMoveable(false);
432       }
433     }
434     FoundUnmoveable |= !Itr->second->isMoveable();
435   }
436   return FoundUnmoveable;
437 }
438 
runOnFunctions(BinaryContext & BC)439 Error ReorderData::runOnFunctions(BinaryContext &BC) {
440   static const char *DefaultSections[] = {".rodata", ".data", ".bss", nullptr};
441 
442   if (!BC.HasRelocations || opts::ReorderData.empty())
443     return Error::success();
444 
445   // For now
446   if (opts::JumpTables > JTS_BASIC) {
447     BC.outs() << "BOLT-WARNING: jump table support must be basic for "
448               << "data reordering to work.\n";
449     return Error::success();
450   }
451 
452   assignMemData(BC);
453 
454   std::vector<BinarySection *> Sections;
455 
456   for (const std::string &SectionName : opts::ReorderData) {
457     if (SectionName == "default") {
458       for (unsigned I = 0; DefaultSections[I]; ++I)
459         if (ErrorOr<BinarySection &> Section =
460                 BC.getUniqueSectionByName(DefaultSections[I]))
461           Sections.push_back(&*Section);
462       continue;
463     }
464 
465     ErrorOr<BinarySection &> Section = BC.getUniqueSectionByName(SectionName);
466     if (!Section) {
467       BC.outs() << "BOLT-WARNING: Section " << SectionName
468                 << " not found, skipping.\n";
469       continue;
470     }
471 
472     if (!isSupported(*Section)) {
473       BC.errs() << "BOLT-ERROR: Section " << SectionName << " not supported.\n";
474       return createFatalBOLTError("");
475     }
476 
477     Sections.push_back(&*Section);
478   }
479 
480   for (BinarySection *Section : Sections) {
481     const bool FoundUnmoveable = markUnmoveableSymbols(BC, *Section);
482 
483     DataOrder Order;
484     unsigned SplitPointIdx;
485 
486     if (opts::ReorderAlgorithm == opts::ReorderAlgo::REORDER_COUNT) {
487       BC.outs() << "BOLT-INFO: reorder-sections: ordering data by count\n";
488       std::tie(Order, SplitPointIdx) = sortedByCount(BC, *Section);
489     } else {
490       BC.outs() << "BOLT-INFO: reorder-sections: ordering data by funcs\n";
491       std::tie(Order, SplitPointIdx) =
492           sortedByFunc(BC, *Section, BC.getBinaryFunctions());
493     }
494     auto SplitPoint = Order.begin() + SplitPointIdx;
495 
496     if (opts::PrintReorderedData)
497       printOrder(BC, *Section, Order.begin(), SplitPoint);
498 
499     if (!opts::ReorderInplace || FoundUnmoveable) {
500       if (opts::ReorderInplace && FoundUnmoveable)
501         BC.outs() << "BOLT-INFO: Found unmoveable symbols in "
502                   << Section->getName() << " falling back to splitting "
503                   << "instead of in-place reordering.\n";
504 
505       // Rename sections.
506       BinarySection &Hot =
507           BC.registerSection(Section->getName() + ".hot", *Section);
508       Hot.setOutputName(Section->getName());
509       Section->setOutputName(".bolt.org" + Section->getName());
510 
511       // Reorder contents of original section.
512       setSectionOrder(BC, Hot, Order.begin(), SplitPoint);
513 
514       // This keeps the original data from thinking it has been moved.
515       for (std::pair<const uint64_t, BinaryData *> &Entry :
516            BC.getBinaryDataForSection(*Section)) {
517         if (!Entry.second->isMoved()) {
518           Entry.second->setSection(*Section);
519           Entry.second->setOutputSection(*Section);
520         }
521       }
522     } else {
523       BC.outs()
524           << "BOLT-WARNING: Inplace section reordering not supported yet.\n";
525       setSectionOrder(BC, *Section, Order.begin(), Order.end());
526     }
527   }
528   return Error::success();
529 }
530 
531 } // namespace bolt
532 } // namespace llvm
533