xref: /llvm-project/bolt/lib/Passes/ReorderData.cpp (revision a34c753fe709a624f5b087397fb05adeac2311e4)
1 //===--- ReorderSection.cpp - Profile based reordering of section data =======//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 // TODO:
12 // - make sure writeable data isn't put on same cache line unless temporally local
13 // - estimate temporal locality by looking at CFG?
14 
15 #include "bolt/Passes/ReorderData.h"
16 #include <algorithm>
17 
18 #undef  DEBUG_TYPE
19 #define DEBUG_TYPE "reorder-data"
20 
21 using namespace llvm;
22 using namespace bolt;
23 
24 namespace opts {
25 extern cl::OptionCategory BoltCategory;
26 extern cl::OptionCategory BoltOptCategory;
27 extern cl::opt<JumpTableSupportLevel> JumpTables;
28 
29 static cl::opt<bool>
30 PrintReorderedData("print-reordered-data",
31   cl::desc("print section contents after reordering"),
32   cl::ZeroOrMore,
33   cl::Hidden,
34   cl::cat(BoltCategory));
35 
36 cl::list<std::string>
37 ReorderData("reorder-data",
38   cl::CommaSeparated,
39   cl::desc("list of sections to reorder"),
40   cl::value_desc("section1,section2,section3,..."),
41   cl::cat(BoltOptCategory));
42 
43 enum ReorderAlgo : char {
44   REORDER_COUNT         = 0,
45   REORDER_FUNCS         = 1
46 };
47 
48 static cl::opt<ReorderAlgo>
49 ReorderAlgorithm("reorder-data-algo",
50   cl::desc("algorithm used to reorder data sections"),
51   cl::init(REORDER_COUNT),
52   cl::values(
53     clEnumValN(REORDER_COUNT,
54       "count",
55       "sort hot data by read counts"),
56     clEnumValN(REORDER_FUNCS,
57       "funcs",
58       "sort hot data by hot function usage and count")),
59   cl::ZeroOrMore,
60   cl::cat(BoltOptCategory));
61 
62 static cl::opt<unsigned>
63 ReorderDataMaxSymbols("reorder-data-max-symbols",
64   cl::desc("maximum number of symbols to reorder"),
65   cl::ZeroOrMore,
66   cl::init(std::numeric_limits<unsigned>::max()),
67   cl::cat(BoltOptCategory));
68 
69 static cl::opt<unsigned>
70 ReorderDataMaxBytes("reorder-data-max-bytes",
71   cl::desc("maximum number of bytes to reorder"),
72   cl::ZeroOrMore,
73   cl::init(std::numeric_limits<unsigned>::max()),
74   cl::cat(BoltOptCategory));
75 
76 static cl::list<std::string>
77 ReorderSymbols("reorder-symbols",
78   cl::CommaSeparated,
79   cl::desc("list of symbol names that can be reordered"),
80   cl::value_desc("symbol1,symbol2,symbol3,..."),
81   cl::Hidden,
82   cl::cat(BoltCategory));
83 
84 static cl::list<std::string>
85 SkipSymbols("reorder-skip-symbols",
86   cl::CommaSeparated,
87   cl::desc("list of symbol names that cannot be reordered"),
88   cl::value_desc("symbol1,symbol2,symbol3,..."),
89   cl::Hidden,
90   cl::cat(BoltCategory));
91 
92 static cl::opt<bool>
93 ReorderInplace("reorder-data-inplace",
94   cl::desc("reorder data sections in place"),
95   cl::init(false),
96   cl::ZeroOrMore,
97   cl::cat(BoltOptCategory));
98 
99 }
100 
101 namespace llvm {
102 namespace bolt {
103 
104 namespace {
105 
106 static constexpr uint16_t MinAlignment = 16;
107 
108 bool isSupported(const BinarySection &BS) {
109   return BS.isData() && !BS.isTLS();
110 }
111 
112 bool filterSymbol(const BinaryData *BD) {
113   if (!BD->isAtomic() || BD->isJumpTable() || !BD->isMoveable())
114     return false;
115 
116   bool IsValid = true;
117 
118   if (!opts::ReorderSymbols.empty()) {
119     IsValid = false;
120     for (const std::string &Name : opts::ReorderSymbols) {
121       if (BD->hasName(Name)) {
122         IsValid = true;
123         break;
124       }
125     }
126   }
127 
128   if (!IsValid)
129     return false;
130 
131   if (!opts::SkipSymbols.empty()) {
132     for (const std::string &Name : opts::SkipSymbols) {
133       if (BD->hasName(Name)) {
134         IsValid = false;
135         break;
136       }
137     }
138   }
139 
140   return IsValid;
141 }
142 
143 }
144 
145 using DataOrder = ReorderData::DataOrder;
146 
147 void ReorderData::printOrder(const BinarySection &Section,
148                              DataOrder::const_iterator Begin,
149                              DataOrder::const_iterator End) const {
150   uint64_t TotalSize = 0;
151   bool PrintHeader = false;
152   while (Begin != End) {
153     const BinaryData *BD = Begin->first;
154 
155     if (!PrintHeader) {
156       outs() << "BOLT-INFO: Hot global symbols for "
157              << Section.getName() << ":\n";
158       PrintHeader = true;
159     }
160 
161     outs() << "BOLT-INFO: " << *BD << ", moveable=" << BD->isMoveable()
162            << format(", weight=%.5f\n", double(Begin->second)/BD->getSize());
163 
164     TotalSize += BD->getSize();
165     ++Begin;
166   }
167   if (TotalSize)
168     outs() << "BOLT-INFO: Total hot symbol size = " << TotalSize << "\n";
169 }
170 
171 DataOrder ReorderData::baseOrder(BinaryContext &BC,
172                                  const BinarySection &Section) const {
173   DataOrder Order;
174   for (auto &Entry : BC.getBinaryDataForSection(Section)) {
175     BinaryData *BD = Entry.second;
176     if (!BD->isAtomic()) // skip sub-symbols
177       continue;
178     auto BDCI = BinaryDataCounts.find(BD);
179     uint64_t BDCount = BDCI == BinaryDataCounts.end() ? 0 : BDCI->second;
180     Order.emplace_back(BD, BDCount);
181   }
182   return Order;
183 }
184 
185 void ReorderData::assignMemData(BinaryContext &BC) {
186   // Map of sections (or heap/stack) to count/size.
187   StringMap<uint64_t> Counts;
188   StringMap<uint64_t> JumpTableCounts;
189   uint64_t TotalCount = 0;
190   for (auto &BFI : BC.getBinaryFunctions()) {
191     const BinaryFunction &BF = BFI.second;
192     if (!BF.hasMemoryProfile())
193       continue;
194 
195     for (const BinaryBasicBlock &BB : BF) {
196       for (const MCInst &Inst : BB) {
197         auto ErrorOrMemAccesssProfile =
198           BC.MIB->tryGetAnnotationAs<MemoryAccessProfile>(
199               Inst, "MemoryAccessProfile");
200         if (!ErrorOrMemAccesssProfile)
201           continue;
202 
203         const MemoryAccessProfile &MemAccessProfile =
204             ErrorOrMemAccesssProfile.get();
205         for (const AddressAccess &AccessInfo :
206              MemAccessProfile.AddressAccessInfo) {
207           if (BinaryData *BD = AccessInfo.MemoryObject) {
208             BinaryDataCounts[BD->getAtomicRoot()] += AccessInfo.Count;
209             Counts[BD->getSectionName()] += AccessInfo.Count;
210             if (BD->getAtomicRoot()->isJumpTable()) {
211               JumpTableCounts[BD->getSectionName()] += AccessInfo.Count;
212             }
213           } else {
214             Counts["Heap/stack"] += AccessInfo.Count;
215           }
216           TotalCount += AccessInfo.Count;
217         }
218       }
219     }
220   }
221 
222   if (!Counts.empty()) {
223     outs() << "BOLT-INFO: Memory stats breakdown:\n";
224     for (StringMapEntry<uint64_t> &Entry : Counts) {
225       StringRef Section = Entry.first();
226       const uint64_t Count = Entry.second;
227       outs() << "BOLT-INFO:   " << Section << " = " << Count
228              << format(" (%.1f%%)\n", 100.0*Count/TotalCount);
229       if (JumpTableCounts.count(Section) != 0) {
230         const uint64_t JTCount = JumpTableCounts[Section];
231         outs() << "BOLT-INFO:     jump tables = " << JTCount
232                << format(" (%.1f%%)\n", 100.0*JTCount/Count);
233       }
234     }
235     outs() << "BOLT-INFO: Total memory events: " << TotalCount << "\n";
236   }
237 }
238 
239 /// Only consider moving data that is used by the hottest functions with
240 /// valid profiles.
241 std::pair<DataOrder, unsigned> ReorderData::sortedByFunc(
242   BinaryContext &BC,
243   const BinarySection &Section,
244   std::map<uint64_t, BinaryFunction> &BFs
245 ) const {
246   std::map<BinaryData *, std::set<BinaryFunction *>> BDtoFunc;
247   std::map<BinaryData *, uint64_t> BDtoFuncCount;
248 
249   auto dataUses = [&BC](const BinaryFunction &BF, bool OnlyHot) {
250     std::set<BinaryData *> Uses;
251     for (const BinaryBasicBlock &BB : BF) {
252       if (OnlyHot && BB.isCold())
253         continue;
254 
255       for (const MCInst &Inst : BB) {
256         auto ErrorOrMemAccesssProfile =
257           BC.MIB->tryGetAnnotationAs<MemoryAccessProfile>(
258               Inst, "MemoryAccessProfile");
259         if (!ErrorOrMemAccesssProfile)
260           continue;
261 
262         const MemoryAccessProfile &MemAccessProfile =
263             ErrorOrMemAccesssProfile.get();
264         for (const AddressAccess &AccessInfo :
265              MemAccessProfile.AddressAccessInfo) {
266           if (AccessInfo.MemoryObject)
267             Uses.insert(AccessInfo.MemoryObject);
268         }
269       }
270     }
271     return Uses;
272   };
273 
274   for (auto &Entry : BFs) {
275     BinaryFunction &BF = Entry.second;
276     if (BF.hasValidProfile()) {
277       for (BinaryData *BD : dataUses(BF, true)) {
278         if (!BC.getFunctionForSymbol(BD->getSymbol())) {
279           BDtoFunc[BD->getAtomicRoot()].insert(&BF);
280           BDtoFuncCount[BD->getAtomicRoot()] += BF.getKnownExecutionCount();
281         }
282       }
283     }
284   }
285 
286   DataOrder Order = baseOrder(BC, Section);
287   unsigned SplitPoint = Order.size();
288 
289   std::sort(Order.begin(), Order.end(),
290             [&](const DataOrder::value_type &A,
291                 const DataOrder::value_type &B) {
292               // Total execution counts of functions referencing BD.
293               const uint64_t ACount = BDtoFuncCount[A.first];
294               const uint64_t BCount = BDtoFuncCount[B.first];
295               // Weight by number of loads/data size.
296               const double AWeight = double(A.second) / A.first->getSize();
297               const double BWeight = double(B.second) / B.first->getSize();
298               return (ACount > BCount ||
299                       (ACount == BCount &&
300                        (AWeight > BWeight ||
301                         (AWeight == BWeight &&
302                          A.first->getAddress() < B.first->getAddress()))));
303             });
304 
305   for (unsigned Idx = 0; Idx < Order.size(); ++Idx) {
306     if (!BDtoFuncCount[Order[Idx].first]) {
307       SplitPoint = Idx;
308       break;
309     }
310   }
311 
312   return std::make_pair(Order, SplitPoint);
313 }
314 
315 std::pair<DataOrder, unsigned> ReorderData::sortedByCount(
316   BinaryContext &BC,
317   const BinarySection &Section
318 ) const {
319   DataOrder Order = baseOrder(BC, Section);
320   unsigned SplitPoint = Order.size();
321 
322   std::sort(Order.begin(), Order.end(),
323             [](const DataOrder::value_type &A,
324                const DataOrder::value_type &B) {
325               // Weight by number of loads/data size.
326               const double AWeight = double(A.second) / A.first->getSize();
327               const double BWeight = double(B.second) / B.first->getSize();
328               return (AWeight > BWeight ||
329                       (AWeight == BWeight &&
330                        (A.first->getSize() < B.first->getSize() ||
331                         (A.first->getSize() == B.first->getSize() &&
332                          A.first->getAddress() < B.first->getAddress()))));
333             });
334 
335   for (unsigned Idx = 0; Idx < Order.size(); ++Idx) {
336     if (!Order[Idx].second) {
337       SplitPoint = Idx;
338       break;
339     }
340   }
341 
342   return std::make_pair(Order, SplitPoint);
343 }
344 
345 // TODO
346 // add option for cache-line alignment (or just use cache-line when section
347 // is writeable)?
348 void ReorderData::setSectionOrder(BinaryContext &BC,
349                                   BinarySection &OutputSection,
350                                   DataOrder::iterator Begin,
351                                   DataOrder::iterator End) {
352   std::vector<BinaryData *> NewOrder;
353   unsigned NumReordered = 0;
354   uint64_t Offset = 0;
355   uint64_t Count = 0;
356 
357   // Get the total count just for stats
358   uint64_t TotalCount = 0;
359   for (auto Itr = Begin; Itr != End; ++Itr) {
360     TotalCount += Itr->second;
361   }
362 
363   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: setSectionOrder for "
364                     << OutputSection.getName() << "\n");
365 
366   for (; Begin != End; ++Begin) {
367     BinaryData *BD = Begin->first;
368 
369     // we can't move certain symbols because they are screwy, see T25076484.
370     if (!filterSymbol(BD))
371       continue;
372 
373     ++NumReordered;
374     if (NumReordered > opts::ReorderDataMaxSymbols) {
375       if (!NewOrder.empty()) {
376         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on symbol "
377                           << *NewOrder.back() << "\n");
378       }
379       break;
380     }
381 
382     uint16_t Alignment = std::max(BD->getAlignment(), MinAlignment);
383     Offset = alignTo(Offset, Alignment);
384 
385     if ((Offset + BD->getSize()) > opts::ReorderDataMaxBytes) {
386       if (!NewOrder.empty()) {
387         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on symbol "
388                           << *NewOrder.back() << "\n");
389       }
390       break;
391     }
392 
393     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ 0x"
394                       << Twine::utohexstr(Offset) << "\n");
395 
396     BD->setOutputLocation(OutputSection, Offset);
397 
398     // reorder sub-symbols
399     for (std::pair<const uint64_t, BinaryData *> &SubBD :
400          BC.getSubBinaryData(BD)) {
401       if (!SubBD.second->isJumpTable()) {
402         uint64_t SubOffset =
403             Offset + SubBD.second->getAddress() - BD->getAddress();
404         LLVM_DEBUG(dbgs() << "BOLT-DEBUG: SubBD " << SubBD.second->getName()
405                           << " @ " << SubOffset << "\n");
406         SubBD.second->setOutputLocation(OutputSection, SubOffset);
407       }
408     }
409 
410     Offset += BD->getSize();
411     Count += Begin->second;
412     NewOrder.push_back(BD);
413   }
414 
415   OutputSection.reorderContents(NewOrder, opts::ReorderInplace);
416 
417   outs() << "BOLT-INFO: reorder-data: " << Count << "/" << TotalCount
418          << format(" (%.1f%%)", 100.0*Count/TotalCount) << " events, "
419          << Offset << " hot bytes\n";
420 }
421 
422 bool ReorderData::markUnmoveableSymbols(BinaryContext &BC,
423                                         BinarySection &Section) const {
424   // Private symbols currently can't be moved because data can "leak" across
425   // the boundary of one symbol to the next, e.g. a string that has a common
426   // suffix might start in one private symbol and end with the common
427   // suffix in another.
428   auto isPrivate = [&](const BinaryData *BD) {
429     auto Prefix = std::string("PG") + BC.AsmInfo->getPrivateGlobalPrefix();
430     return BD->getName().startswith(Prefix.str());
431   };
432   auto Range = BC.getBinaryDataForSection(Section);
433   bool FoundUnmoveable = false;
434   for (auto Itr = Range.begin(); Itr != Range.end(); ++Itr) {
435     if (Itr->second->getName().startswith("PG.")) {
436       BinaryData *Prev =
437           Itr != Range.begin() ? std::prev(Itr)->second : nullptr;
438       BinaryData *Next = Itr != Range.end() ? std::next(Itr)->second : nullptr;
439       bool PrevIsPrivate = Prev && isPrivate(Prev);
440       bool NextIsPrivate = Next && isPrivate(Next);
441       if (isPrivate(Itr->second) && (PrevIsPrivate || NextIsPrivate))
442         Itr->second->setIsMoveable(false);
443     } else {
444       // check for overlapping symbols.
445       BinaryData *Next = Itr != Range.end() ? std::next(Itr)->second : nullptr;
446       if (Next &&
447           Itr->second->getEndAddress() != Next->getAddress() &&
448           Next->containsAddress(Itr->second->getEndAddress())) {
449         Itr->second->setIsMoveable(false);
450         Next->setIsMoveable(false);
451       }
452     }
453     FoundUnmoveable |= !Itr->second->isMoveable();
454   }
455   return FoundUnmoveable;
456 }
457 
458 void ReorderData::runOnFunctions(BinaryContext &BC) {
459   static const char* DefaultSections[] = {
460     ".rodata",
461     ".data",
462     ".bss",
463     nullptr
464   };
465 
466   if (!BC.HasRelocations || opts::ReorderData.empty())
467     return;
468 
469   // For now
470   if (opts::JumpTables > JTS_BASIC) {
471     outs() << "BOLT-WARNING: jump table support must be basic for "
472            << "data reordering to work.\n";
473     return;
474   }
475 
476   assignMemData(BC);
477 
478   std::vector<BinarySection *> Sections;
479 
480   for (const std::string &SectionName : opts::ReorderData) {
481     if (SectionName == "default") {
482       for (unsigned I = 0; DefaultSections[I]; ++I) {
483         if (ErrorOr<BinarySection &> Section =
484                 BC.getUniqueSectionByName(DefaultSections[I]))
485           Sections.push_back(&*Section);
486       }
487       continue;
488     }
489 
490     ErrorOr<BinarySection &> Section = BC.getUniqueSectionByName(SectionName);
491     if (!Section) {
492       outs() << "BOLT-WARNING: Section " << SectionName
493              << " not found, skipping.\n";
494       continue;
495     }
496 
497     if (!isSupported(*Section)) {
498       outs() << "BOLT-ERROR: Section " << SectionName << " not supported.\n";
499       exit(1);
500     }
501 
502     Sections.push_back(&*Section);
503   }
504 
505   for (BinarySection *Section : Sections) {
506     const bool FoundUnmoveable = markUnmoveableSymbols(BC, *Section);
507 
508     DataOrder Order;
509     unsigned SplitPointIdx;
510 
511     if (opts::ReorderAlgorithm == opts::ReorderAlgo::REORDER_COUNT) {
512       outs() << "BOLT-INFO: reorder-sections: ordering data by count\n";
513       std::tie(Order, SplitPointIdx) = sortedByCount(BC, *Section);
514     } else {
515       outs() << "BOLT-INFO: reorder-sections: ordering data by funcs\n";
516       std::tie(Order, SplitPointIdx) =
517         sortedByFunc(BC, *Section, BC.getBinaryFunctions());
518     }
519     auto SplitPoint = Order.begin() + SplitPointIdx;
520 
521     if (opts::PrintReorderedData) {
522       printOrder(*Section, Order.begin(), SplitPoint);
523     }
524 
525     if (!opts::ReorderInplace || FoundUnmoveable) {
526       if (opts::ReorderInplace && FoundUnmoveable) {
527         outs() << "BOLT-INFO: Found unmoveable symbols in "
528                << Section->getName() << " falling back to splitting "
529                << "instead of in-place reordering.\n";
530       }
531 
532       // Copy original section to <section name>.cold.
533       BinarySection &Cold = BC.registerSection(
534           std::string(Section->getName()) + ".cold", *Section);
535 
536       // Reorder contents of original section.
537       setSectionOrder(BC, *Section, Order.begin(), SplitPoint);
538 
539       // This keeps the original data from thinking it has been moved.
540       for (std::pair<const uint64_t, BinaryData *> &Entry :
541            BC.getBinaryDataForSection(*Section)) {
542         if (!Entry.second->isMoved()) {
543           Entry.second->setSection(Cold);
544           Entry.second->setOutputSection(Cold);
545         }
546       }
547     } else {
548       outs() << "BOLT-WARNING: Inplace section reordering not supported yet.\n";
549       setSectionOrder(BC, *Section, Order.begin(), Order.end());
550     }
551   }
552 }
553 
554 }
555 }
556