1 //===--- ReorderSection.cpp - Profile based reordering of section data =======// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 11 // TODO: 12 // - make sure writeable data isn't put on same cache line unless temporally local 13 // - estimate temporal locality by looking at CFG? 14 15 #include "bolt/Passes/ReorderData.h" 16 #include <algorithm> 17 18 #undef DEBUG_TYPE 19 #define DEBUG_TYPE "reorder-data" 20 21 using namespace llvm; 22 using namespace bolt; 23 24 namespace opts { 25 extern cl::OptionCategory BoltCategory; 26 extern cl::OptionCategory BoltOptCategory; 27 extern cl::opt<JumpTableSupportLevel> JumpTables; 28 29 static cl::opt<bool> 30 PrintReorderedData("print-reordered-data", 31 cl::desc("print section contents after reordering"), 32 cl::ZeroOrMore, 33 cl::Hidden, 34 cl::cat(BoltCategory)); 35 36 cl::list<std::string> 37 ReorderData("reorder-data", 38 cl::CommaSeparated, 39 cl::desc("list of sections to reorder"), 40 cl::value_desc("section1,section2,section3,..."), 41 cl::cat(BoltOptCategory)); 42 43 enum ReorderAlgo : char { 44 REORDER_COUNT = 0, 45 REORDER_FUNCS = 1 46 }; 47 48 static cl::opt<ReorderAlgo> 49 ReorderAlgorithm("reorder-data-algo", 50 cl::desc("algorithm used to reorder data sections"), 51 cl::init(REORDER_COUNT), 52 cl::values( 53 clEnumValN(REORDER_COUNT, 54 "count", 55 "sort hot data by read counts"), 56 clEnumValN(REORDER_FUNCS, 57 "funcs", 58 "sort hot data by hot function usage and count")), 59 cl::ZeroOrMore, 60 cl::cat(BoltOptCategory)); 61 62 static cl::opt<unsigned> 63 ReorderDataMaxSymbols("reorder-data-max-symbols", 64 cl::desc("maximum number of symbols to reorder"), 65 cl::ZeroOrMore, 66 cl::init(std::numeric_limits<unsigned>::max()), 67 cl::cat(BoltOptCategory)); 68 69 static cl::opt<unsigned> 70 ReorderDataMaxBytes("reorder-data-max-bytes", 71 cl::desc("maximum number of bytes to reorder"), 72 cl::ZeroOrMore, 73 cl::init(std::numeric_limits<unsigned>::max()), 74 cl::cat(BoltOptCategory)); 75 76 static cl::list<std::string> 77 ReorderSymbols("reorder-symbols", 78 cl::CommaSeparated, 79 cl::desc("list of symbol names that can be reordered"), 80 cl::value_desc("symbol1,symbol2,symbol3,..."), 81 cl::Hidden, 82 cl::cat(BoltCategory)); 83 84 static cl::list<std::string> 85 SkipSymbols("reorder-skip-symbols", 86 cl::CommaSeparated, 87 cl::desc("list of symbol names that cannot be reordered"), 88 cl::value_desc("symbol1,symbol2,symbol3,..."), 89 cl::Hidden, 90 cl::cat(BoltCategory)); 91 92 static cl::opt<bool> 93 ReorderInplace("reorder-data-inplace", 94 cl::desc("reorder data sections in place"), 95 cl::init(false), 96 cl::ZeroOrMore, 97 cl::cat(BoltOptCategory)); 98 99 } 100 101 namespace llvm { 102 namespace bolt { 103 104 namespace { 105 106 static constexpr uint16_t MinAlignment = 16; 107 108 bool isSupported(const BinarySection &BS) { 109 return BS.isData() && !BS.isTLS(); 110 } 111 112 bool filterSymbol(const BinaryData *BD) { 113 if (!BD->isAtomic() || BD->isJumpTable() || !BD->isMoveable()) 114 return false; 115 116 bool IsValid = true; 117 118 if (!opts::ReorderSymbols.empty()) { 119 IsValid = false; 120 for (const std::string &Name : opts::ReorderSymbols) { 121 if (BD->hasName(Name)) { 122 IsValid = true; 123 break; 124 } 125 } 126 } 127 128 if (!IsValid) 129 return false; 130 131 if (!opts::SkipSymbols.empty()) { 132 for (const std::string &Name : opts::SkipSymbols) { 133 if (BD->hasName(Name)) { 134 IsValid = false; 135 break; 136 } 137 } 138 } 139 140 return IsValid; 141 } 142 143 } 144 145 using DataOrder = ReorderData::DataOrder; 146 147 void ReorderData::printOrder(const BinarySection &Section, 148 DataOrder::const_iterator Begin, 149 DataOrder::const_iterator End) const { 150 uint64_t TotalSize = 0; 151 bool PrintHeader = false; 152 while (Begin != End) { 153 const BinaryData *BD = Begin->first; 154 155 if (!PrintHeader) { 156 outs() << "BOLT-INFO: Hot global symbols for " 157 << Section.getName() << ":\n"; 158 PrintHeader = true; 159 } 160 161 outs() << "BOLT-INFO: " << *BD << ", moveable=" << BD->isMoveable() 162 << format(", weight=%.5f\n", double(Begin->second)/BD->getSize()); 163 164 TotalSize += BD->getSize(); 165 ++Begin; 166 } 167 if (TotalSize) 168 outs() << "BOLT-INFO: Total hot symbol size = " << TotalSize << "\n"; 169 } 170 171 DataOrder ReorderData::baseOrder(BinaryContext &BC, 172 const BinarySection &Section) const { 173 DataOrder Order; 174 for (auto &Entry : BC.getBinaryDataForSection(Section)) { 175 BinaryData *BD = Entry.second; 176 if (!BD->isAtomic()) // skip sub-symbols 177 continue; 178 auto BDCI = BinaryDataCounts.find(BD); 179 uint64_t BDCount = BDCI == BinaryDataCounts.end() ? 0 : BDCI->second; 180 Order.emplace_back(BD, BDCount); 181 } 182 return Order; 183 } 184 185 void ReorderData::assignMemData(BinaryContext &BC) { 186 // Map of sections (or heap/stack) to count/size. 187 StringMap<uint64_t> Counts; 188 StringMap<uint64_t> JumpTableCounts; 189 uint64_t TotalCount = 0; 190 for (auto &BFI : BC.getBinaryFunctions()) { 191 const BinaryFunction &BF = BFI.second; 192 if (!BF.hasMemoryProfile()) 193 continue; 194 195 for (const BinaryBasicBlock &BB : BF) { 196 for (const MCInst &Inst : BB) { 197 auto ErrorOrMemAccesssProfile = 198 BC.MIB->tryGetAnnotationAs<MemoryAccessProfile>( 199 Inst, "MemoryAccessProfile"); 200 if (!ErrorOrMemAccesssProfile) 201 continue; 202 203 const MemoryAccessProfile &MemAccessProfile = 204 ErrorOrMemAccesssProfile.get(); 205 for (const AddressAccess &AccessInfo : 206 MemAccessProfile.AddressAccessInfo) { 207 if (BinaryData *BD = AccessInfo.MemoryObject) { 208 BinaryDataCounts[BD->getAtomicRoot()] += AccessInfo.Count; 209 Counts[BD->getSectionName()] += AccessInfo.Count; 210 if (BD->getAtomicRoot()->isJumpTable()) { 211 JumpTableCounts[BD->getSectionName()] += AccessInfo.Count; 212 } 213 } else { 214 Counts["Heap/stack"] += AccessInfo.Count; 215 } 216 TotalCount += AccessInfo.Count; 217 } 218 } 219 } 220 } 221 222 if (!Counts.empty()) { 223 outs() << "BOLT-INFO: Memory stats breakdown:\n"; 224 for (StringMapEntry<uint64_t> &Entry : Counts) { 225 StringRef Section = Entry.first(); 226 const uint64_t Count = Entry.second; 227 outs() << "BOLT-INFO: " << Section << " = " << Count 228 << format(" (%.1f%%)\n", 100.0*Count/TotalCount); 229 if (JumpTableCounts.count(Section) != 0) { 230 const uint64_t JTCount = JumpTableCounts[Section]; 231 outs() << "BOLT-INFO: jump tables = " << JTCount 232 << format(" (%.1f%%)\n", 100.0*JTCount/Count); 233 } 234 } 235 outs() << "BOLT-INFO: Total memory events: " << TotalCount << "\n"; 236 } 237 } 238 239 /// Only consider moving data that is used by the hottest functions with 240 /// valid profiles. 241 std::pair<DataOrder, unsigned> ReorderData::sortedByFunc( 242 BinaryContext &BC, 243 const BinarySection &Section, 244 std::map<uint64_t, BinaryFunction> &BFs 245 ) const { 246 std::map<BinaryData *, std::set<BinaryFunction *>> BDtoFunc; 247 std::map<BinaryData *, uint64_t> BDtoFuncCount; 248 249 auto dataUses = [&BC](const BinaryFunction &BF, bool OnlyHot) { 250 std::set<BinaryData *> Uses; 251 for (const BinaryBasicBlock &BB : BF) { 252 if (OnlyHot && BB.isCold()) 253 continue; 254 255 for (const MCInst &Inst : BB) { 256 auto ErrorOrMemAccesssProfile = 257 BC.MIB->tryGetAnnotationAs<MemoryAccessProfile>( 258 Inst, "MemoryAccessProfile"); 259 if (!ErrorOrMemAccesssProfile) 260 continue; 261 262 const MemoryAccessProfile &MemAccessProfile = 263 ErrorOrMemAccesssProfile.get(); 264 for (const AddressAccess &AccessInfo : 265 MemAccessProfile.AddressAccessInfo) { 266 if (AccessInfo.MemoryObject) 267 Uses.insert(AccessInfo.MemoryObject); 268 } 269 } 270 } 271 return Uses; 272 }; 273 274 for (auto &Entry : BFs) { 275 BinaryFunction &BF = Entry.second; 276 if (BF.hasValidProfile()) { 277 for (BinaryData *BD : dataUses(BF, true)) { 278 if (!BC.getFunctionForSymbol(BD->getSymbol())) { 279 BDtoFunc[BD->getAtomicRoot()].insert(&BF); 280 BDtoFuncCount[BD->getAtomicRoot()] += BF.getKnownExecutionCount(); 281 } 282 } 283 } 284 } 285 286 DataOrder Order = baseOrder(BC, Section); 287 unsigned SplitPoint = Order.size(); 288 289 std::sort(Order.begin(), Order.end(), 290 [&](const DataOrder::value_type &A, 291 const DataOrder::value_type &B) { 292 // Total execution counts of functions referencing BD. 293 const uint64_t ACount = BDtoFuncCount[A.first]; 294 const uint64_t BCount = BDtoFuncCount[B.first]; 295 // Weight by number of loads/data size. 296 const double AWeight = double(A.second) / A.first->getSize(); 297 const double BWeight = double(B.second) / B.first->getSize(); 298 return (ACount > BCount || 299 (ACount == BCount && 300 (AWeight > BWeight || 301 (AWeight == BWeight && 302 A.first->getAddress() < B.first->getAddress())))); 303 }); 304 305 for (unsigned Idx = 0; Idx < Order.size(); ++Idx) { 306 if (!BDtoFuncCount[Order[Idx].first]) { 307 SplitPoint = Idx; 308 break; 309 } 310 } 311 312 return std::make_pair(Order, SplitPoint); 313 } 314 315 std::pair<DataOrder, unsigned> ReorderData::sortedByCount( 316 BinaryContext &BC, 317 const BinarySection &Section 318 ) const { 319 DataOrder Order = baseOrder(BC, Section); 320 unsigned SplitPoint = Order.size(); 321 322 std::sort(Order.begin(), Order.end(), 323 [](const DataOrder::value_type &A, 324 const DataOrder::value_type &B) { 325 // Weight by number of loads/data size. 326 const double AWeight = double(A.second) / A.first->getSize(); 327 const double BWeight = double(B.second) / B.first->getSize(); 328 return (AWeight > BWeight || 329 (AWeight == BWeight && 330 (A.first->getSize() < B.first->getSize() || 331 (A.first->getSize() == B.first->getSize() && 332 A.first->getAddress() < B.first->getAddress())))); 333 }); 334 335 for (unsigned Idx = 0; Idx < Order.size(); ++Idx) { 336 if (!Order[Idx].second) { 337 SplitPoint = Idx; 338 break; 339 } 340 } 341 342 return std::make_pair(Order, SplitPoint); 343 } 344 345 // TODO 346 // add option for cache-line alignment (or just use cache-line when section 347 // is writeable)? 348 void ReorderData::setSectionOrder(BinaryContext &BC, 349 BinarySection &OutputSection, 350 DataOrder::iterator Begin, 351 DataOrder::iterator End) { 352 std::vector<BinaryData *> NewOrder; 353 unsigned NumReordered = 0; 354 uint64_t Offset = 0; 355 uint64_t Count = 0; 356 357 // Get the total count just for stats 358 uint64_t TotalCount = 0; 359 for (auto Itr = Begin; Itr != End; ++Itr) { 360 TotalCount += Itr->second; 361 } 362 363 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: setSectionOrder for " 364 << OutputSection.getName() << "\n"); 365 366 for (; Begin != End; ++Begin) { 367 BinaryData *BD = Begin->first; 368 369 // we can't move certain symbols because they are screwy, see T25076484. 370 if (!filterSymbol(BD)) 371 continue; 372 373 ++NumReordered; 374 if (NumReordered > opts::ReorderDataMaxSymbols) { 375 if (!NewOrder.empty()) { 376 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on symbol " 377 << *NewOrder.back() << "\n"); 378 } 379 break; 380 } 381 382 uint16_t Alignment = std::max(BD->getAlignment(), MinAlignment); 383 Offset = alignTo(Offset, Alignment); 384 385 if ((Offset + BD->getSize()) > opts::ReorderDataMaxBytes) { 386 if (!NewOrder.empty()) { 387 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on symbol " 388 << *NewOrder.back() << "\n"); 389 } 390 break; 391 } 392 393 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ 0x" 394 << Twine::utohexstr(Offset) << "\n"); 395 396 BD->setOutputLocation(OutputSection, Offset); 397 398 // reorder sub-symbols 399 for (std::pair<const uint64_t, BinaryData *> &SubBD : 400 BC.getSubBinaryData(BD)) { 401 if (!SubBD.second->isJumpTable()) { 402 uint64_t SubOffset = 403 Offset + SubBD.second->getAddress() - BD->getAddress(); 404 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: SubBD " << SubBD.second->getName() 405 << " @ " << SubOffset << "\n"); 406 SubBD.second->setOutputLocation(OutputSection, SubOffset); 407 } 408 } 409 410 Offset += BD->getSize(); 411 Count += Begin->second; 412 NewOrder.push_back(BD); 413 } 414 415 OutputSection.reorderContents(NewOrder, opts::ReorderInplace); 416 417 outs() << "BOLT-INFO: reorder-data: " << Count << "/" << TotalCount 418 << format(" (%.1f%%)", 100.0*Count/TotalCount) << " events, " 419 << Offset << " hot bytes\n"; 420 } 421 422 bool ReorderData::markUnmoveableSymbols(BinaryContext &BC, 423 BinarySection &Section) const { 424 // Private symbols currently can't be moved because data can "leak" across 425 // the boundary of one symbol to the next, e.g. a string that has a common 426 // suffix might start in one private symbol and end with the common 427 // suffix in another. 428 auto isPrivate = [&](const BinaryData *BD) { 429 auto Prefix = std::string("PG") + BC.AsmInfo->getPrivateGlobalPrefix(); 430 return BD->getName().startswith(Prefix.str()); 431 }; 432 auto Range = BC.getBinaryDataForSection(Section); 433 bool FoundUnmoveable = false; 434 for (auto Itr = Range.begin(); Itr != Range.end(); ++Itr) { 435 if (Itr->second->getName().startswith("PG.")) { 436 BinaryData *Prev = 437 Itr != Range.begin() ? std::prev(Itr)->second : nullptr; 438 BinaryData *Next = Itr != Range.end() ? std::next(Itr)->second : nullptr; 439 bool PrevIsPrivate = Prev && isPrivate(Prev); 440 bool NextIsPrivate = Next && isPrivate(Next); 441 if (isPrivate(Itr->second) && (PrevIsPrivate || NextIsPrivate)) 442 Itr->second->setIsMoveable(false); 443 } else { 444 // check for overlapping symbols. 445 BinaryData *Next = Itr != Range.end() ? std::next(Itr)->second : nullptr; 446 if (Next && 447 Itr->second->getEndAddress() != Next->getAddress() && 448 Next->containsAddress(Itr->second->getEndAddress())) { 449 Itr->second->setIsMoveable(false); 450 Next->setIsMoveable(false); 451 } 452 } 453 FoundUnmoveable |= !Itr->second->isMoveable(); 454 } 455 return FoundUnmoveable; 456 } 457 458 void ReorderData::runOnFunctions(BinaryContext &BC) { 459 static const char* DefaultSections[] = { 460 ".rodata", 461 ".data", 462 ".bss", 463 nullptr 464 }; 465 466 if (!BC.HasRelocations || opts::ReorderData.empty()) 467 return; 468 469 // For now 470 if (opts::JumpTables > JTS_BASIC) { 471 outs() << "BOLT-WARNING: jump table support must be basic for " 472 << "data reordering to work.\n"; 473 return; 474 } 475 476 assignMemData(BC); 477 478 std::vector<BinarySection *> Sections; 479 480 for (const std::string &SectionName : opts::ReorderData) { 481 if (SectionName == "default") { 482 for (unsigned I = 0; DefaultSections[I]; ++I) { 483 if (ErrorOr<BinarySection &> Section = 484 BC.getUniqueSectionByName(DefaultSections[I])) 485 Sections.push_back(&*Section); 486 } 487 continue; 488 } 489 490 ErrorOr<BinarySection &> Section = BC.getUniqueSectionByName(SectionName); 491 if (!Section) { 492 outs() << "BOLT-WARNING: Section " << SectionName 493 << " not found, skipping.\n"; 494 continue; 495 } 496 497 if (!isSupported(*Section)) { 498 outs() << "BOLT-ERROR: Section " << SectionName << " not supported.\n"; 499 exit(1); 500 } 501 502 Sections.push_back(&*Section); 503 } 504 505 for (BinarySection *Section : Sections) { 506 const bool FoundUnmoveable = markUnmoveableSymbols(BC, *Section); 507 508 DataOrder Order; 509 unsigned SplitPointIdx; 510 511 if (opts::ReorderAlgorithm == opts::ReorderAlgo::REORDER_COUNT) { 512 outs() << "BOLT-INFO: reorder-sections: ordering data by count\n"; 513 std::tie(Order, SplitPointIdx) = sortedByCount(BC, *Section); 514 } else { 515 outs() << "BOLT-INFO: reorder-sections: ordering data by funcs\n"; 516 std::tie(Order, SplitPointIdx) = 517 sortedByFunc(BC, *Section, BC.getBinaryFunctions()); 518 } 519 auto SplitPoint = Order.begin() + SplitPointIdx; 520 521 if (opts::PrintReorderedData) { 522 printOrder(*Section, Order.begin(), SplitPoint); 523 } 524 525 if (!opts::ReorderInplace || FoundUnmoveable) { 526 if (opts::ReorderInplace && FoundUnmoveable) { 527 outs() << "BOLT-INFO: Found unmoveable symbols in " 528 << Section->getName() << " falling back to splitting " 529 << "instead of in-place reordering.\n"; 530 } 531 532 // Copy original section to <section name>.cold. 533 BinarySection &Cold = BC.registerSection( 534 std::string(Section->getName()) + ".cold", *Section); 535 536 // Reorder contents of original section. 537 setSectionOrder(BC, *Section, Order.begin(), SplitPoint); 538 539 // This keeps the original data from thinking it has been moved. 540 for (std::pair<const uint64_t, BinaryData *> &Entry : 541 BC.getBinaryDataForSection(*Section)) { 542 if (!Entry.second->isMoved()) { 543 Entry.second->setSection(Cold); 544 Entry.second->setOutputSection(Cold); 545 } 546 } 547 } else { 548 outs() << "BOLT-WARNING: Inplace section reordering not supported yet.\n"; 549 setSectionOrder(BC, *Section, Order.begin(), Order.end()); 550 } 551 } 552 } 553 554 } 555 } 556